🚀 [RofuncRL] Finish Synergy-based QbSofthand functional grasping

Skylark0924 · Jan 13, 2024 · aadd088 · aadd088
1 parent 1ff3fb2
commit aadd088
Show file tree

Hide file tree

Showing 78 changed files with 220,880 additions and 68 deletions.
diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py
@@ -99,7 +99,7 @@ def inference(custom_args):
     #                  BiShadowHandSwitch, BiShadowHandTwoCatchUnderarm
     #                  QbSoftHandGrasp, BiQbSoftHandGraspAndPlace, BiQbSoftHandSynergyGrasp, QbSoftHandSynergyGrasp
     #                  ShadowHandGrasp
-    parser.add_argument("--task", type=str, default="ShadowHandGrasp")
+    parser.add_argument("--task", type=str, default="QbSoftHandSynergyGrasp")
     parser.add_argument("--agent", type=str, default="ppo")  # Available agents: ppo, sac, td3, a2c
     parser.add_argument("--num_envs", type=int, default=4096)
     parser.add_argument("--sim_device", type=int, default=0)

diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_Grasp_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_Grasp_RofuncRL.py
@@ -0,0 +1,105 @@
+"""
+Dexterous Hands (RofuncRL)
+===========================
+
+Examples of learning hand (Shadow Hand, Allegro Hand, qbSofthand) tasks by RofuncRL
+"""
+
+import isaacgym
+import argparse
+
+from rofunc.config.utils import omegaconf_to_dict, get_config
+from rofunc.learning.RofuncRL.tasks import Tasks
+from rofunc.learning.RofuncRL.trainers import Trainers
+from rofunc.learning.pre_trained_models.download import model_zoo
+from rofunc.learning.utils.utils import set_seed
+
+
+def train(custom_args):
+    # Config task and trainer parameters for Isaac Gym environments
+    custom_args.num_envs = 64 if custom_args.agent.upper() in ["SAC", "TD3"] else custom_args.num_envs
+
+    args_overrides = ["task={}".format(custom_args.task),
+                      "train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()),
+                      "device_id={}".format(custom_args.sim_device),
+                      "rl_device=cuda:{}".format(custom_args.rl_device),
+                      "headless={}".format(custom_args.headless),
+                      "num_envs={}".format(custom_args.num_envs)]
+    cfg = get_config('./learning/rl', 'config', args=args_overrides)
+    cfg.train.Trainer.maximum_steps = 1000000
+    cfg_dict = omegaconf_to_dict(cfg.task)
+
+    set_seed(cfg.train.Trainer.seed)
+
+    # Instantiate the Isaac Gym environment
+    env = Tasks().task_map[custom_args.task](cfg=cfg_dict,
+                                             sim_device=f'cuda:{cfg.device_id}',
+                                             graphics_device_id=cfg.device_id,
+                                             headless=cfg.headless)
+
+    # Instantiate the RL trainer
+    trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg.train,
+                                                        env=env,
+                                                        device=cfg.rl_device,
+                                                        env_name=custom_args.task)
+
+    # Start training
+    trainer.train()
+
+
+def inference(custom_args):
+    # Config task and trainer parameters for Isaac Gym environments
+    args_overrides = ["task={}".format(custom_args.task),
+                      "train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()),
+                      "device_id={}".format(custom_args.sim_device),
+                      "rl_device=cuda:{}".format(custom_args.rl_device),
+                      "headless={}".format(False),
+                      "num_envs={}".format(16)]
+    cfg = get_config('./learning/rl', 'config', args=args_overrides)
+    cfg_dict = omegaconf_to_dict(cfg.task)
+
+    set_seed(cfg.train.Trainer.seed)
+
+    # Instantiate the Isaac Gym environment
+    infer_env = Tasks().task_map[custom_args.task](cfg=cfg_dict,
+                                                   rl_device=cfg.rl_device,
+                                                   sim_device=f'cuda:{cfg.device_id}',
+                                                   graphics_device_id=cfg.device_id,
+                                                   headless=cfg.headless,
+                                                   virtual_screen_capture=cfg.capture_video,  # TODO: check
+                                                   force_render=cfg.force_render)
+
+    # Instantiate the RL trainer
+    trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg.train,
+                                                        env=infer_env,
+                                                        device=cfg.rl_device,
+                                                        env_name=custom_args.task,
+                                                        inference=True)
+    # load checkpoint
+    if custom_args.ckpt_path is None:
+        custom_args.ckpt_path = model_zoo(name=f"{custom_args.task}RofuncRLPPO.pth")
+    trainer.agent.load_ckpt(custom_args.ckpt_path)
+
+    # Start inference
+    trainer.inference()
+
+
+if __name__ == '__main__':
+    gpu_id = 1
+
+    parser = argparse.ArgumentParser()
+    # Available tasks: LiftObject
+    parser.add_argument("--task", type=str, default="LiftObject")
+    parser.add_argument("--agent", type=str, default="ppo")  # Available agents: ppo, sac, td3, a2c
+    parser.add_argument("--num_envs", type=int, default=256)
+    parser.add_argument("--sim_device", type=int, default=1)
+    parser.add_argument("--rl_device", type=int, default=gpu_id)
+    parser.add_argument("--headless", type=str, default="True")
+    parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
+    parser.add_argument("--ckpt_path", type=str, default=None)
+    custom_args = parser.parse_args()
+
+    if not custom_args.inference:
+        train(custom_args)
+    else:
+        inference(custom_args)
diff --git a/rofunc/config/learning/rl/task/LiftObject.yaml b/rofunc/config/learning/rl/task/LiftObject.yaml
@@ -0,0 +1,109 @@
+name: LiftObject
+experiment_name: ${resolve_default:LiftObject,${..experiment}}
+physics_engine: ${..physics_engine}
+rl_device: ${..rl_device}
+
+task:
+  # List of objects to be included in the task.
+  ycbObjects: [ "014_lemon" ]
+  egadObjects: [ ]
+  # List of measures/states to be returned as the observation.
+  observationType: [ "handPose","handDofPos", "objectPos", "objectRot" ]
+  # Dimensionality of said measures
+  num_observations:
+    fingertipContactForces: 15
+    fingertipPos: 15
+    handDofPos: 5
+    handPose: 7
+    jointPos: 17
+    jointVel: 17
+    objectBboxBounds: 6
+    objectBboxCorners: 24
+    objectBboxPose: 7
+    objectSurfaceSamples: 60
+    objectPos: 3
+    objectLinVel: 3
+    objectRot: 4
+    objectAngVel: 3
+    previousAction: 11
+  returnObsDict: False    
+
+debug:
+  verbose: True
+  visualization: False
+  drawEefPose: False
+  drawTrackerPose: False
+  drawFingertipPose: False
+  drawObjectPose: False
+  drawObjectBbox: False
+  drawObjectSurfaceSamples: False
+  drawHandObjectDistance: False
+  drawObjectTargetDistance: False
+  drawFingertipContactForces: False
+  colorObjectsOnSuccess: False
+  colorFingertipContactForce: False
+
+#cameras:
+#  save_recordings: ${...save_recordings}
+#  convert_to_pointcloud: True
+#  convert_to_voxelgrid: True
+#  camera0:
+#    type: rgbd
+#    pos: [ 0.0, -0.5, 1.3 ]
+#    lookat: [ 0,  0, 0.8 ]
+#    horizontal_fov: 70
+#    width: 128
+#    height: 128
+#  camera1:
+#    type: rgbd
+#    pos: [ 0.0, 0.5, 1.3 ]
+#    lookat: [ 0,  0, 0.8 ]
+#    horizontal_fov: 70
+#    width: 128
+#    height: 128
+
+initState:
+  noise:
+    objectPos: 0.05
+    robotDofPos: 0.01
+
+asset:
+  robotAssetFile: "urdf/sih/ur5e_schunk_sih_right.urdf"
+  tableAssetFile: "urdf/table/table.urdf"
+  ycbObjectAssetRoot: "urdf/ycb"
+  egadObjectAssetRoot: "urdf/egad/train"
+  robotProps:
+    armStiffness: 1e4
+    armDamping: 300
+    handStiffness: 10
+    handDamping: 0.5
+
+reward:
+  scale:
+    actionPenalty: 0.0 #-0.002
+    fingertipsToObjectDistanceReward: 1.0
+    objectHeightReward: 0.5
+    objectVelocityReward: 0.0
+    targetHeightReward: 5000.0
+    objectFallsOffTablePenalty: -500.0
+  liftOffHeight: 0.1
+  maxXyDrift: 0.2
+  targetHeight: 0.2
+  epsHeight: 0.02
+  epsFingertips: 1.0
+  xyCenter: [0., 0.]
+  sparse: False
+  returnRewardsDict: False
+
+reset:
+  maxEpisodeLength: 300
+  objectFallsOffTable: True
+  objectLifted: True
+
+defaults:
+  - control: default
+  - domainRandomization: default
+  - env: parallel
+  - haptics: default
+  - sim: default
+  - viewer: first_person
diff --git a/rofunc/config/learning/rl/task/control/default.yaml b/rofunc/config/learning/rl/task/control/default.yaml
@@ -0,0 +1,9 @@
+useRelativeControl: True
+actionMovingAverage: 1.0
+controlFreqInv: 3 # 1 / (controlFreqInv * dt) Hz
+clampActionDeviation: True
+teleoperated: False  # Whether the simulation is controlled via teleoperation
+scale:
+  relativePosSpeed: 2.5
+  relativeRotSpeed: 10
+  handJointSpeed: 10.0
diff --git a/rofunc/config/learning/rl/task/domainRandomization/default.yaml b/rofunc/config/learning/rl/task/domainRandomization/default.yaml
@@ -0,0 +1,3 @@
+enable: False
+randomizationParams:
+  none: 0
diff --git a/rofunc/config/learning/rl/task/haptics/default.yaml b/rofunc/config/learning/rl/task/haptics/default.yaml
@@ -0,0 +1,10 @@
+enable: True
+buzz:
+  proportional_to: 'high_freq'  # Which property is mapped to vibration of the fingers ['force', 'force_increase', 'collision']
+  full_at: 3.  # Which value is equivalent to maximum vibration
+  collision_vibration_time: 0.09  # Time vibration is active after the impact of a finger
+  low_pass_horizon: 0.3 # [s]
+force_feedback:
+  proportional_to: 'force'
+  full_at: 1.0  # Which effective fingertip force is equivalent to maximum feedback force
+  binary: True  # Whether to apply feedback force at 100% once full_at is reached and 0% else
diff --git a/rofunc/config/learning/rl/task/sim/default.yaml b/rofunc/config/learning/rl/task/sim/default.yaml
@@ -0,0 +1,22 @@
+dt: 0.011 # 1/90 [s]
+substeps: 2
+up_axis: "z"
+enableCameraSensors: False
+useContactForces: True
+useForceSensors: False
+use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+gravity: [0.0, 0.0, -9.81]
+physx:
+  num_threads: ${....num_threads}
+  solver_type: ${....solver_type}
+  use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
+  num_position_iterations: 8
+  num_velocity_iterations: 0
+  max_gpu_contact_pairs: 8388608 # 8*1024*1024
+  num_subscenes: ${....num_subscenes}
+  contact_offset: 0.002
+  rest_offset: 0.0
+  bounce_threshold_velocity: 0.2
+  max_depenetration_velocity: 1000.0
+  default_buffer_size_multiplier: 5.0
+  contact_collection: 2 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts)
diff --git a/rofunc/config/learning/rl/task/viewer/first_person.yaml b/rofunc/config/learning/rl/task/viewer/first_person.yaml
@@ -0,0 +1,2 @@
+pos: [-0.45, -0.55, 1.35]
+lookat: [0, 0, 0.9]
diff --git a/rofunc/config/learning/rl/task/viewer/front_view.yaml b/rofunc/config/learning/rl/task/viewer/front_view.yaml
@@ -0,0 +1,2 @@
+pos: [0, 0.8, 1.35]
+lookat: [0, 0, 0.9]
diff --git a/rofunc/config/learning/rl/train/BaseTaskPPORofuncRL.yaml b/rofunc/config/learning/rl/train/BaseTaskPPORofuncRL.yaml
@@ -60,7 +60,7 @@ Agent:
 # ========= Model parameters ==========
 Model:
   use_init: True
-  use_action_clip: False               # If true, clip actions to the action space range.
+  use_action_clip: True               # If true, clip actions to the action space range.
   use_same_model: False                # If true, use the same model for the actor and critic.
   use_action_out_tanh: True            # If true, use tanh to scale the action outputs to the action space range.
   action_clip: 1.0                     # clipping coefficient for the norm of the actions

diff --git a/rofunc/learning/RofuncRL/tasks/__init__.py b/rofunc/learning/RofuncRL/tasks/__init__.py
@@ -45,6 +45,7 @@ def __init__(self, env_type="isaacgym"):
             from .isaacgymenv.hands.bi_qbhand_synergy_grasp import BiQbSoftHandSynergyGraspTask
             from .isaacgymenv.hands.qbhand_synergy_grasp import QbSoftHandSynergyGraspTask
             from .isaacgymenv.hands.shadow_hand_grasp import ShadowHandGraspTask
+            from .isaacgymenv.grasp.lift_object import LiftObjectTask
 
             self.task_map = {
                 "Ant": AntTask,
@@ -90,6 +91,7 @@ def __init__(self, env_type="isaacgym"):
                 "BiQbSoftHandSynergyGrasp": BiQbSoftHandSynergyGraspTask,
                 "QbSoftHandSynergyGrasp": QbSoftHandSynergyGraspTask,
                 "ShadowHandGrasp": ShadowHandGraspTask,
+                "LiftObject": LiftObjectTask,
             }
         elif env_type == "omniisaacgym":
             # OmniIsaacGym tasks