Skip to content

Commit

Permalink
🚀 [RofuncRL] Update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
Skylark0924 committed Jan 14, 2024
1 parent bce3bb9 commit 0c2c560
Show file tree
Hide file tree
Showing 21 changed files with 1,579 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def inference(custom_args):
# BiShadowHandPushBlock, BiShadowHandReOrientation, BiShadowHandScissors, BiShadowHandSwingCup,
# BiShadowHandSwitch, BiShadowHandTwoCatchUnderarm
# QbSoftHandGrasp, BiQbSoftHandGraspAndPlace, BiQbSoftHandSynergyGrasp, QbSoftHandSynergyGrasp
# ShadowHandGrasp
parser.add_argument("--task", type=str, default="QbSoftHandSynergyGrasp")
# ShadowHandGrasp, CURIQbSoftHandSynergyGrasp
parser.add_argument("--task", type=str, default="CURIQbSoftHandSynergyGrasp")
parser.add_argument("--agent", type=str, default="ppo") # Available agents: ppo, sac, td3, a2c
parser.add_argument("--num_envs", type=int, default=4096)
parser.add_argument("--sim_device", type=int, default=0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def inference(custom_args):
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
env=omni_env,
device=cfg.rl_device,
env_name=custom_args.task)
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
raise ValueError("Please specify the checkpoint path for inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
def train(custom_args):
args_overrides = ["task={}".format(custom_args.task),
"train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()),
"sim_device={}".format(custom_args.sim_device),
"rl_device={}".format(custom_args.rl_device),
"graphics_device_id={}".format(custom_args.graphics_device_id),
"headless={}".format(custom_args.headless)]
cfg = get_config('./learning/rl', 'config', args=args_overrides)
gym_task_name = custom_args.task.split('_')[1]
Expand Down
178 changes: 178 additions & 0 deletions rofunc/config/learning/rl/task/CURIQbSoftHandSynergyGrasp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
name: CURIQbSoftHandSynergyGrasp

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env:
env_name: "curi_qbsofthand_synergy_grasp"
numEnvs: ${resolve_default:4096,${...num_envs}}
envSpacing: 1.5
episodeLength: 500
enableDebugVis: False
cameraDebug: True
pointCloudDebug: True
aggregateMode: 1

stiffnessScale: 1.0
forceLimitScale: 1.0
useRelativeControl: False
dofSpeedScale: 20.0
actionsMovingAverage: 1.0
controlFrequencyInv: 1 # 60 Hz

startPositionNoise: 0.0
startRotationNoise: 0.0

resetPositionNoise: 0.0
resetRotationNoise: 0.0
resetDofPosRandomInterval: 0.0
resetDofVelRandomInterval: 0.0

distRewardScale: 20
transition_scale: 0.5
orientation_scale: 0.1
rotRewardScale: 1.0
rotEps: 0.1
actionPenaltyScale: -0.0002
reachGoalBonus: 250
fallDistance: 0.4
fallPenalty: 0.0

objectType: "power_drill" # can be block, egg or pen
observationType: "full_state" # point_cloud or full_state
handAgentIndex: "[[0, 1, 2, 3, 4, 5]]"
asymmetric_observations: False
successTolerance: 0.1
printNumSuccesses: False
maxConsecutiveSuccesses: 0

asset:
assetFileNameBlock: "urdf/objects/cube_multicolor.urdf"
assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml"
assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml"

task:
randomize: False
randomization_params:
frequency: 600 # Define how many simulation steps between generating new randomizations
observations:
range: [0, .002] # range for the white noise
range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency`
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "constant" is to turn on noise after `schedule_steps` num steps
schedule_steps: 40000
actions:
range: [0., .05]
range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency`
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 40000
sim_params:
gravity:
range: [0, 0.4]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 40000
actor_params:
hand:
color: True
tendon_properties:
damping:
range: [0.3, 3.0]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
stiffness:
range: [0.75, 1.5]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
dof_properties:
damping:
range: [0.3, 3.0]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
stiffness:
range: [0.75, 1.5]
operation: "scaling"
distribution: "loguniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
lower:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
upper:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_shape_properties:
friction:
num_buckets: 250
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
object:
scale:
range: [0.95, 1.05]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000
rigid_shape_properties:
friction:
num_buckets: 250
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 30000

sim:
dt: 0.0166 # 1/60 s
substeps: 2
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [ 0.0, 0.0, -9.81 ]
physx:
num_threads: 4
solver_type: 1 # 0: pgs, 1: tgs
num_position_iterations: 8
num_velocity_iterations: 0
contact_offset: 0.002
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 1000.0
default_buffer_size_multiplier: 5.0
flex:
num_outer_iterations: 5
num_inner_iterations: 20
warm_start: 0.8
relaxation: 0.75
4 changes: 2 additions & 2 deletions rofunc/config/learning/rl/train/BaseTaskSACRofuncRL.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ Trainer:
rofunc_logger_kwargs: # Rofunc BeautyLogger kwargs.
verbose: True # If true, print to stdout.
maximum_steps: 100000 # The maximum number of steps to run for.
random_steps: 0 # The number of random exploration steps to take.
start_learning_steps: 0 # The number of steps to take before starting network updating.
random_steps: 1000 # The number of random exploration steps to take.
start_learning_steps: 1000 # The number of steps to take before starting network updating.
seed: 42 # The random seed.
rollouts: 16 # The number of rollouts before updating.
max_episode_steps: 200 # The maximum number of steps per episode.
Expand Down
9 changes: 8 additions & 1 deletion rofunc/learning/RofuncRL/agents/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,14 @@ def multi_gpu_transfer(self, *args):
elif isinstance(arg, int):
pass
elif isinstance(arg, np.ndarray):
arg = torch.tensor(arg).to(rl_device)
try:
arg = torch.from_numpy(arg).to(rl_device)
except:
for i in range(len(arg)):
self.multi_gpu_transfer(*arg[i])
elif isinstance(arg, np.float32) or isinstance(arg, np.float64) or isinstance(arg, np.int32) or isinstance(
arg, np.int64):
pass
else:
raise ValueError("Unknown type: {}".format(type(arg)))
return args
2 changes: 2 additions & 0 deletions rofunc/learning/RofuncRL/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self, env_type="isaacgym"):
from .isaacgymenv.hands.qbhand_synergy_grasp import QbSoftHandSynergyGraspTask
from .isaacgymenv.hands.shadow_hand_grasp import ShadowHandGraspTask
from .isaacgymenv.grasp.lift_object import LiftObjectTask
from .isaacgymenv.hands.curi_qbhand_synergy_grasp import CURIQbSoftHandSynergyGraspTask

self.task_map = {
"Ant": AntTask,
Expand Down Expand Up @@ -92,6 +93,7 @@ def __init__(self, env_type="isaacgym"):
"QbSoftHandSynergyGrasp": QbSoftHandSynergyGraspTask,
"ShadowHandGrasp": ShadowHandGraspTask,
"LiftObject": LiftObjectTask,
"CURIQbSoftHandSynergyGrasp": CURIQbSoftHandSynergyGraspTask,
}
elif env_type == "omniisaacgym":
# OmniIsaacGym tasks
Expand Down
Loading

0 comments on commit 0c2c560

Please sign in to comment.