🚀 [RofuncRL] Update examples

Skylark0924 · Jan 14, 2024 · 0c2c560 · 0c2c560
1 parent bce3bb9
commit 0c2c560
Show file tree

Hide file tree

Showing 21 changed files with 1,579 additions and 21 deletions.
diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py
@@ -98,8 +98,8 @@ def inference(custom_args):
     #                  BiShadowHandPushBlock, BiShadowHandReOrientation, BiShadowHandScissors, BiShadowHandSwingCup,
     #                  BiShadowHandSwitch, BiShadowHandTwoCatchUnderarm
     #                  QbSoftHandGrasp, BiQbSoftHandGraspAndPlace, BiQbSoftHandSynergyGrasp, QbSoftHandSynergyGrasp
-    #                  ShadowHandGrasp
-    parser.add_argument("--task", type=str, default="QbSoftHandSynergyGrasp")
+    #                  ShadowHandGrasp, CURIQbSoftHandSynergyGrasp
+    parser.add_argument("--task", type=str, default="CURIQbSoftHandSynergyGrasp")
     parser.add_argument("--agent", type=str, default="ppo")  # Available agents: ppo, sac, td3, a2c
     parser.add_argument("--num_envs", type=int, default=4096)
     parser.add_argument("--sim_device", type=int, default=0)

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AllegroHandOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AllegroHandOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AntOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AntOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AnymalOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AnymalOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AnymalTerrainOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_AnymalTerrainOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_BallBalanceOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_BallBalanceOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_CartpoleOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_CartpoleOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_CrazyflieOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_CrazyflieOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_FactoryNutBoltPickOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_FactoryNutBoltPickOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_FrankaCabinetOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_FrankaCabinetOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_HumanoidOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_HumanoidOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_IngenuityOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_IngenuityOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_QuadcopterOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_QuadcopterOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OmniIsaacGym_RofuncRL/example_ShadowHandOmni_RofuncRL.py b/examples/learning_rl/OmniIsaacGym_RofuncRL/example_ShadowHandOmni_RofuncRL.py
@@ -70,7 +70,8 @@ def inference(custom_args):
     trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
                                                         env=omni_env,
                                                         device=cfg.rl_device,
-                                                        env_name=custom_args.task)
+                                                        env_name=custom_args.task,
+                                                        inference=True)
     # load checkpoint
     if custom_args.ckpt_path is None:
         raise ValueError("Please specify the checkpoint path for inference.")

diff --git a/examples/learning_rl/OpenAIGym_RofuncRL/example_GymTasks_RofuncRL.py b/examples/learning_rl/OpenAIGym_RofuncRL/example_GymTasks_RofuncRL.py
@@ -17,9 +17,7 @@
 def train(custom_args):
     args_overrides = ["task={}".format(custom_args.task),
                       "train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()),
-                      "sim_device={}".format(custom_args.sim_device),
                       "rl_device={}".format(custom_args.rl_device),
-                      "graphics_device_id={}".format(custom_args.graphics_device_id),
                       "headless={}".format(custom_args.headless)]
     cfg = get_config('./learning/rl', 'config', args=args_overrides)
     gym_task_name = custom_args.task.split('_')[1]

diff --git a/rofunc/config/learning/rl/task/CURIQbSoftHandSynergyGrasp.yaml b/rofunc/config/learning/rl/task/CURIQbSoftHandSynergyGrasp.yaml
@@ -0,0 +1,178 @@
+name: CURIQbSoftHandSynergyGrasp
+
+physics_engine: ${..physics_engine}
+
+# if given, will override the device setting in gym.
+env: 
+  env_name: "curi_qbsofthand_synergy_grasp"
+  numEnvs: ${resolve_default:4096,${...num_envs}}
+  envSpacing: 1.5
+  episodeLength: 500
+  enableDebugVis: False
+  cameraDebug: True
+  pointCloudDebug: True
+  aggregateMode: 1
+
+  stiffnessScale: 1.0
+  forceLimitScale: 1.0
+  useRelativeControl: False
+  dofSpeedScale: 20.0
+  actionsMovingAverage: 1.0
+  controlFrequencyInv: 1 # 60 Hz
+
+  startPositionNoise: 0.0
+  startRotationNoise: 0.0
+
+  resetPositionNoise: 0.0
+  resetRotationNoise: 0.0
+  resetDofPosRandomInterval: 0.0
+  resetDofVelRandomInterval: 0.0
+
+  distRewardScale: 20
+  transition_scale: 0.5
+  orientation_scale: 0.1
+  rotRewardScale: 1.0
+  rotEps: 0.1
+  actionPenaltyScale: -0.0002
+  reachGoalBonus: 250
+  fallDistance: 0.4
+  fallPenalty: 0.0
+
+  objectType: "power_drill" # can be block, egg or pen
+  observationType: "full_state" # point_cloud or full_state
+  handAgentIndex: "[[0, 1, 2, 3, 4, 5]]"
+  asymmetric_observations: False
+  successTolerance: 0.1
+  printNumSuccesses: False
+  maxConsecutiveSuccesses: 0
+
+  asset:
+    assetFileNameBlock: "urdf/objects/cube_multicolor.urdf"
+    assetFileNameEgg: "mjcf/open_ai_assets/hand/egg.xml"
+    assetFileNamePen: "mjcf/open_ai_assets/hand/pen.xml"
+
+task:
+  randomize: False
+  randomization_params:
+    frequency: 600   # Define how many simulation steps between generating new randomizations
+    observations:
+      range: [0, .002] # range for the white noise
+      range_correlated: [0, .001 ] # range for correlated noise, refreshed with freq `frequency`
+      operation: "additive"
+      distribution: "gaussian"
+      schedule: "linear"   # "constant" is to turn on noise after `schedule_steps` num steps
+      schedule_steps: 40000
+    actions:
+      range: [0., .05]
+      range_correlated: [0, .015] # range for correlated noise, refreshed with freq `frequency`
+      operation: "additive"
+      distribution: "gaussian"
+      schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+      schedule_steps: 40000
+    sim_params: 
+      gravity:
+        range: [0, 0.4]
+        operation: "additive"
+        distribution: "gaussian"
+        schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+        schedule_steps: 40000
+    actor_params:
+      hand:
+        color: True
+        tendon_properties:
+          damping:
+            range: [0.3, 3.0]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          stiffness:
+            range: [0.75, 1.5]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        dof_properties:
+          damping: 
+            range: [0.3, 3.0]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          stiffness: 
+            range: [0.75, 1.5]
+            operation: "scaling"
+            distribution: "loguniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          lower:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+          upper:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_shape_properties:
+          friction: 
+            num_buckets: 250
+            range: [0.7, 1.3]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+      object:
+        scale:
+          range: [0.95, 1.05]
+          operation: "scaling"
+          distribution: "uniform"
+          schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
+          schedule_steps: 30000
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by ``min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+        rigid_shape_properties:
+          friction:
+            num_buckets: 250
+            range: [0.7, 1.3]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 30000
+
+sim:
+  dt: 0.0166 # 1/60 s
+  substeps: 2
+  up_axis: "z"
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [ 0.0, 0.0, -9.81 ]
+  physx:
+    num_threads: 4
+    solver_type: 1  # 0: pgs, 1: tgs
+    num_position_iterations: 8
+    num_velocity_iterations: 0
+    contact_offset: 0.002
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 1000.0
+    default_buffer_size_multiplier: 5.0
+  flex:
+    num_outer_iterations: 5
+    num_inner_iterations: 20
+    warm_start: 0.8
+    relaxation: 0.75
diff --git a/rofunc/config/learning/rl/train/BaseTaskSACRofuncRL.yaml b/rofunc/config/learning/rl/train/BaseTaskSACRofuncRL.yaml
@@ -11,8 +11,8 @@ Trainer:
   rofunc_logger_kwargs:                        # Rofunc BeautyLogger kwargs.
     verbose: True                              # If true, print to stdout.
   maximum_steps: 100000                        # The maximum number of steps to run for.
-  random_steps: 0                              # The number of random exploration steps to take.
-  start_learning_steps: 0                      # The number of steps to take before starting network updating.
+  random_steps: 1000                              # The number of random exploration steps to take.
+  start_learning_steps: 1000                      # The number of steps to take before starting network updating.
   seed: 42                                     # The random seed.
   rollouts: 16                                 # The number of rollouts before updating.
   max_episode_steps: 200                       # The maximum number of steps per episode.

diff --git a/rofunc/learning/RofuncRL/agents/base_agent.py b/rofunc/learning/RofuncRL/agents/base_agent.py
@@ -212,7 +212,14 @@ def multi_gpu_transfer(self, *args):
             elif isinstance(arg, int):
                 pass
             elif isinstance(arg, np.ndarray):
-                arg = torch.tensor(arg).to(rl_device)
+                try:
+                    arg = torch.from_numpy(arg).to(rl_device)
+                except:
+                    for i in range(len(arg)):
+                        self.multi_gpu_transfer(*arg[i])
+            elif isinstance(arg, np.float32) or isinstance(arg, np.float64) or isinstance(arg, np.int32) or isinstance(
+                    arg, np.int64):
+                pass
             else:
                 raise ValueError("Unknown type: {}".format(type(arg)))
         return args
diff --git a/rofunc/learning/RofuncRL/tasks/__init__.py b/rofunc/learning/RofuncRL/tasks/__init__.py
@@ -46,6 +46,7 @@ def __init__(self, env_type="isaacgym"):
             from .isaacgymenv.hands.qbhand_synergy_grasp import QbSoftHandSynergyGraspTask
             from .isaacgymenv.hands.shadow_hand_grasp import ShadowHandGraspTask
             from .isaacgymenv.grasp.lift_object import LiftObjectTask
+            from .isaacgymenv.hands.curi_qbhand_synergy_grasp import CURIQbSoftHandSynergyGraspTask
 
             self.task_map = {
                 "Ant": AntTask,
@@ -92,6 +93,7 @@ def __init__(self, env_type="isaacgym"):
                 "QbSoftHandSynergyGrasp": QbSoftHandSynergyGraspTask,
                 "ShadowHandGrasp": ShadowHandGraspTask,
                 "LiftObject": LiftObjectTask,
+                "CURIQbSoftHandSynergyGrasp": CURIQbSoftHandSynergyGraspTask,
             }
         elif env_type == "omniisaacgym":
             # OmniIsaacGym tasks