Skip to content

Commit

Permalink
🚀 [RofuncRL] Add QbSofthand-based tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
Skylark0924 committed Jan 1, 2024
1 parent 935b54b commit 45f08dd
Show file tree
Hide file tree
Showing 16 changed files with 3,985 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,13 @@ def inference(custom_args):
# BiShadowHandGraspAndPlace, BiShadowHandLiftUnderarm, BiShadowHandPen, BiShadowHandPointCloud,
# BiShadowHandPushBlock, BiShadowHandReOrientation, BiShadowHandScissors, BiShadowHandSwingCup,
# BiShadowHandSwitch, BiShadowHandTwoCatchUnderarm
parser.add_argument("--task", type=str, default="BiShadowHandSwitch")
# QbSoftHandGrasp
parser.add_argument("--task", type=str, default="BiQbSoftHandGraspAndPlace")
parser.add_argument("--agent", type=str, default="ppo") # Available agents: ppo, sac, td3, a2c
parser.add_argument("--num_envs", type=int, default=256)
parser.add_argument("--sim_device", type=int, default=0)
parser.add_argument("--rl_device", type=int, default=gpu_id)
parser.add_argument("--headless", type=str, default="True")
parser.add_argument("--headless", type=str, default="False")
parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
parser.add_argument("--ckpt_path", type=str, default=None)
custom_args = parser.parse_args()
Expand Down
178 changes: 178 additions & 0 deletions rofunc/config/learning/rl/task/BiQbSoftHandGraspAndPlace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
name: BiShadowHandGraspAndPlace

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env:
env_name: "bi_qbsofthand_grasp_and_place"
numEnvs: ${resolve_default:4096,${...num_envs}}
envSpacing: 1.5
episodeLength: 500
enableDebugVis: False
cameraDebug: True
pointCloudDebug: True
aggregateMode: 1

stiffnessScale: 1.0
forceLimitScale: 1.0
useRelativeControl: False
dofSpeedScale: 20.0
actionsMovingAverage: 1.0
controlFrequencyInv: 1 # 60 Hz

startPositionNoise: 0.0
startRotationNoise: 0.0

resetPositionNoise: 0.0
resetRotationNoise: 0.0
resetDofPosRandomInterval: 0.0
resetDofVelRandomInterval: 0.0

distRewardScale: 20
transition_scale: 0.5
orientation_scale: 0.1
rotRewardScale: 1.0
rotEps: 0.1
actionPenaltyScale: -0.0002
reachGoalBonus: 250
fallDistance: 0.4
fallPenalty: 0.0

objectType: "pot" # can be block, egg or pen
observationType: "full_state" # point_cloud or full_state
handAgentIndex: "[[0, 1, 2, 3, 4, 5]]"
asymmetric_observations: False
successTolerance: 0.1
printNumSuccesses: False
maxConsecutiveSuccesses: 0

asset:
assetFileNameBlock: "urdf/objects/cube_multicolor.urdf"
assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml"
assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml"

task:
randomize: False
randomization_params:
frequency: 600 # Define how many simulation steps between generating new randomizations
observations:
range: [0, .002] # range for the white noise
range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency`
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "constant" is to turn on noise after `schedule_steps` num steps
schedule_steps: 40000
actions:
range: [0., .05]
range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency`
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 40000
sim_params:
gravity:
range: [0, 0.4]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 40000
actor_params:
hand:
color: True
tendon_properties:
damping:
range: [0.3, 3.0]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
stiffness:
range: [0.75, 1.5]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
dof_properties:
damping:
range: [0.3, 3.0]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
stiffness:
range: [0.75, 1.5]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
lower:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
upper:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_shape_properties:
friction:
num_buckets: 250
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
object:
scale:
range: [0.95, 1.05]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_shape_properties:
friction:
num_buckets: 250
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000

sim:
dt: 0.0166 # 1/60 s
substeps: 2
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [ 0.0, 0.0, -9.81 ]
physx:
num_threads: 4
solver_type: 1 # 0: pgs, 1: tgs
num_position_iterations: 8
num_velocity_iterations: 0
contact_offset: 0.002
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 1000.0
default_buffer_size_multiplier: 5.0
flex:
num_outer_iterations: 5
num_inner_iterations: 20
warm_start: 0.8
relaxation: 0.75
179 changes: 179 additions & 0 deletions rofunc/config/learning/rl/task/QbSoftHandGrasp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
name: QbSoftHandGrasp

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env:
env_name: "qbsofthand_grasp"
numEnvs: ${resolve_default:4096,${...num_envs}}
envSpacing: 1.5
episodeLength: 500
enableDebugVis: False
cameraDebug: True
pointCloudDebug: True
aggregateMode: 1

stiffnessScale: 1.0
forceLimitScale: 1.0
useRelativeControl: False
dofSpeedScale: 20.0
actionsMovingAverage: 1.0
controlFrequencyInv: 1 # 60 Hz

startPositionNoise: 0.0
startRotationNoise: 0.0

resetPositionNoise: 0.0
resetRotationNoise: 0.0
resetDofPosRandomInterval: 0.0
resetDofVelRandomInterval: 0.0

distRewardScale: 20
transition_scale: 0.5
orientation_scale: 0.1
rotRewardScale: 1.0
rotEps: 0.1
actionPenaltyScale: -0.0002
reachGoalBonus: 250
fallDistance: 0.4
fallPenalty: 0.0

objectType: "pot" # can be block, egg or pen
observationType: "full_state" # point_cloud or full_state
handAgentIndex: "[[0, 1, 2, 3, 4, 5]]"
asymmetric_observations: False
successTolerance: 0.1
printNumSuccesses: False
maxConsecutiveSuccesses: 0

asset:
# assetFileName: "mjcf/open_ai_assets/hand/shadow_hand.xml"
assetFileNameBlock: "urdf/objects/cube_multicolor.urdf"
assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml"
assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml"

task:
randomize: False
randomization_params:
frequency: 600 # Define how many simulation steps between generating new randomizations
observations:
range: [0, .002] # range for the white noise
range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency`
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "constant" is to turn on noise after `schedule_steps` num steps
schedule_steps: 40000
actions:
range: [0., .05]
range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency`
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 40000
sim_params:
gravity:
range: [0, 0.4]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 40000
actor_params:
hand:
color: True
tendon_properties:
damping:
range: [0.3, 3.0]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
stiffness:
range: [0.75, 1.5]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
dof_properties:
damping:
range: [0.3, 3.0]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
stiffness:
range: [0.75, 1.5]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
lower:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
upper:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_shape_properties:
friction:
num_buckets: 250
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
object:
scale:
range: [0.95, 1.05]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_shape_properties:
friction:
num_buckets: 250
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000

sim:
dt: 0.0166 # 1/60 s
substeps: 2
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [ 0.0, 0.0, -9.81 ]
physx:
num_threads: 4
solver_type: 1 # 0: pgs, 1: tgs
num_position_iterations: 8
num_velocity_iterations: 0
contact_offset: 0.002
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 1000.0
default_buffer_size_multiplier: 5.0
flex:
num_outer_iterations: 5
num_inner_iterations: 20
warm_start: 0.8
relaxation: 0.75
4 changes: 4 additions & 0 deletions rofunc/learning/RofuncRL/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def __init__(self, env_type="isaacgym"):
from .isaacgymenv.hands.shadow_hand_swing_cup import ShadowHandSwingCupTask
from .isaacgymenv.hands.shadow_hand_switch import ShadowHandSwitchTask
from .isaacgymenv.hands.shadow_hand_two_catch_underarm import ShadowHandTwoCatchUnderarmTask
from .isaacgymenv.hands.qbsofthand_grasp import QbSoftHandGraspTask
from .isaacgymenv.hands.bi_qbhand_grasp_and_place import BiQbSoftHandGraspAndPlaceTask

self.task_map = {
"Ant": AntTask,
Expand Down Expand Up @@ -80,6 +82,8 @@ def __init__(self, env_type="isaacgym"):
"BiShadowHandSwingCup": ShadowHandSwingCupTask,
"BiShadowHandSwitch": ShadowHandSwitchTask,
"BiShadowHandTwoCatchUnderarm": ShadowHandTwoCatchUnderarmTask,
"QbSoftHandGrasp": QbSoftHandGraspTask,
"BiQbSoftHandGraspAndPlace": BiQbSoftHandGraspAndPlaceTask,
}
elif env_type == "omniisaacgym":
# OmniIsaacGym tasks
Expand Down
Loading

0 comments on commit 45f08dd

Please sign in to comment.