Skip to content

Commit

Permalink
🚀 [RofuncRL] Finish Synergy-based QbSofthand functional grasping
Browse files Browse the repository at this point in the history
  • Loading branch information
Skylark0924 committed Jan 13, 2024
1 parent 1ff3fb2 commit aadd088
Show file tree
Hide file tree
Showing 78 changed files with 220,880 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def inference(custom_args):
# BiShadowHandSwitch, BiShadowHandTwoCatchUnderarm
# QbSoftHandGrasp, BiQbSoftHandGraspAndPlace, BiQbSoftHandSynergyGrasp, QbSoftHandSynergyGrasp
# ShadowHandGrasp
parser.add_argument("--task", type=str, default="ShadowHandGrasp")
parser.add_argument("--task", type=str, default="QbSoftHandSynergyGrasp")
parser.add_argument("--agent", type=str, default="ppo") # Available agents: ppo, sac, td3, a2c
parser.add_argument("--num_envs", type=int, default=4096)
parser.add_argument("--sim_device", type=int, default=0)
Expand Down
105 changes: 105 additions & 0 deletions examples/learning_rl/IsaacGym_RofuncRL/example_Grasp_RofuncRL.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""
Dexterous Hands (RofuncRL)
===========================
Examples of learning hand (Shadow Hand, Allegro Hand, qbSofthand) tasks by RofuncRL
"""

import isaacgym
import argparse

from rofunc.config.utils import omegaconf_to_dict, get_config
from rofunc.learning.RofuncRL.tasks import Tasks
from rofunc.learning.RofuncRL.trainers import Trainers
from rofunc.learning.pre_trained_models.download import model_zoo
from rofunc.learning.utils.utils import set_seed


def train(custom_args):
# Config task and trainer parameters for Isaac Gym environments
custom_args.num_envs = 64 if custom_args.agent.upper() in ["SAC", "TD3"] else custom_args.num_envs

args_overrides = ["task={}".format(custom_args.task),
"train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()),
"device_id={}".format(custom_args.sim_device),
"rl_device=cuda:{}".format(custom_args.rl_device),
"headless={}".format(custom_args.headless),
"num_envs={}".format(custom_args.num_envs)]
cfg = get_config('./learning/rl', 'config', args=args_overrides)
cfg.train.Trainer.maximum_steps = 1000000
cfg_dict = omegaconf_to_dict(cfg.task)

set_seed(cfg.train.Trainer.seed)

# Instantiate the Isaac Gym environment
env = Tasks().task_map[custom_args.task](cfg=cfg_dict,
sim_device=f'cuda:{cfg.device_id}',
graphics_device_id=cfg.device_id,
headless=cfg.headless)

# Instantiate the RL trainer
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg.train,
env=env,
device=cfg.rl_device,
env_name=custom_args.task)

# Start training
trainer.train()


def inference(custom_args):
# Config task and trainer parameters for Isaac Gym environments
args_overrides = ["task={}".format(custom_args.task),
"train={}{}RofuncRL".format(custom_args.task, custom_args.agent.upper()),
"device_id={}".format(custom_args.sim_device),
"rl_device=cuda:{}".format(custom_args.rl_device),
"headless={}".format(False),
"num_envs={}".format(16)]
cfg = get_config('./learning/rl', 'config', args=args_overrides)
cfg_dict = omegaconf_to_dict(cfg.task)

set_seed(cfg.train.Trainer.seed)

# Instantiate the Isaac Gym environment
infer_env = Tasks().task_map[custom_args.task](cfg=cfg_dict,
rl_device=cfg.rl_device,
sim_device=f'cuda:{cfg.device_id}',
graphics_device_id=cfg.device_id,
headless=cfg.headless,
virtual_screen_capture=cfg.capture_video, # TODO: check
force_render=cfg.force_render)

# Instantiate the RL trainer
trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg.train,
env=infer_env,
device=cfg.rl_device,
env_name=custom_args.task,
inference=True)
# load checkpoint
if custom_args.ckpt_path is None:
custom_args.ckpt_path = model_zoo(name=f"{custom_args.task}RofuncRLPPO.pth")
trainer.agent.load_ckpt(custom_args.ckpt_path)

# Start inference
trainer.inference()


if __name__ == '__main__':
gpu_id = 1

parser = argparse.ArgumentParser()
# Available tasks: LiftObject
parser.add_argument("--task", type=str, default="LiftObject")
parser.add_argument("--agent", type=str, default="ppo") # Available agents: ppo, sac, td3, a2c
parser.add_argument("--num_envs", type=int, default=256)
parser.add_argument("--sim_device", type=int, default=1)
parser.add_argument("--rl_device", type=int, default=gpu_id)
parser.add_argument("--headless", type=str, default="True")
parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
parser.add_argument("--ckpt_path", type=str, default=None)
custom_args = parser.parse_args()

if not custom_args.inference:
train(custom_args)
else:
inference(custom_args)
109 changes: 109 additions & 0 deletions rofunc/config/learning/rl/task/LiftObject.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: LiftObject
experiment_name: ${resolve_default:LiftObject,${..experiment}}
physics_engine: ${..physics_engine}
rl_device: ${..rl_device}

task:
# List of objects to be included in the task.
ycbObjects: [ "014_lemon" ]
egadObjects: [ ]
# List of measures/states to be returned as the observation.
observationType: [ "handPose","handDofPos", "objectPos", "objectRot" ]
# Dimensionality of said measures
num_observations:
fingertipContactForces: 15
fingertipPos: 15
handDofPos: 5
handPose: 7
jointPos: 17
jointVel: 17
objectBboxBounds: 6
objectBboxCorners: 24
objectBboxPose: 7
objectSurfaceSamples: 60
objectPos: 3
objectLinVel: 3
objectRot: 4
objectAngVel: 3
previousAction: 11
returnObsDict: False

debug:
verbose: True
visualization: False
drawEefPose: False
drawTrackerPose: False
drawFingertipPose: False
drawObjectPose: False
drawObjectBbox: False
drawObjectSurfaceSamples: False
drawHandObjectDistance: False
drawObjectTargetDistance: False
drawFingertipContactForces: False
colorObjectsOnSuccess: False
colorFingertipContactForce: False

#cameras:
# save_recordings: ${...save_recordings}
# convert_to_pointcloud: True
# convert_to_voxelgrid: True
# camera0:
# type: rgbd
# pos: [ 0.0, -0.5, 1.3 ]
# lookat: [ 0, 0, 0.8 ]
# horizontal_fov: 70
# width: 128
# height: 128
# camera1:
# type: rgbd
# pos: [ 0.0, 0.5, 1.3 ]
# lookat: [ 0, 0, 0.8 ]
# horizontal_fov: 70
# width: 128
# height: 128

initState:
noise:
objectPos: 0.05
robotDofPos: 0.01

asset:
robotAssetFile: "urdf/sih/ur5e_schunk_sih_right.urdf"
tableAssetFile: "urdf/table/table.urdf"
ycbObjectAssetRoot: "urdf/ycb"
egadObjectAssetRoot: "urdf/egad/train"
robotProps:
armStiffness: 1e4
armDamping: 300
handStiffness: 10
handDamping: 0.5

reward:
scale:
actionPenalty: 0.0 #-0.002
fingertipsToObjectDistanceReward: 1.0
objectHeightReward: 0.5
objectVelocityReward: 0.0
targetHeightReward: 5000.0
objectFallsOffTablePenalty: -500.0
liftOffHeight: 0.1
maxXyDrift: 0.2
targetHeight: 0.2
epsHeight: 0.02
epsFingertips: 1.0
xyCenter: [0., 0.]
sparse: False
returnRewardsDict: False

reset:
maxEpisodeLength: 300
objectFallsOffTable: True
objectLifted: True

defaults:
- control: default
- domainRandomization: default
- env: parallel
- haptics: default
- sim: default
- viewer: first_person
9 changes: 9 additions & 0 deletions rofunc/config/learning/rl/task/control/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
useRelativeControl: True
actionMovingAverage: 1.0
controlFreqInv: 3 # 1 / (controlFreqInv * dt) Hz
clampActionDeviation: True
teleoperated: False # Whether the simulation is controlled via teleoperation
scale:
relativePosSpeed: 2.5
relativeRotSpeed: 10
handJointSpeed: 10.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
enable: False
randomizationParams:
none: 0
10 changes: 10 additions & 0 deletions rofunc/config/learning/rl/task/haptics/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
enable: True
buzz:
proportional_to: 'high_freq' # Which property is mapped to vibration of the fingers ['force', 'force_increase', 'collision']
full_at: 3. # Which value is equivalent to maximum vibration
collision_vibration_time: 0.09 # Time vibration is active after the impact of a finger
low_pass_horizon: 0.3 # [s]
force_feedback:
proportional_to: 'force'
full_at: 1.0 # Which effective fingertip force is equivalent to maximum feedback force
binary: True # Whether to apply feedback force at 100% once full_at is reached and 0% else
22 changes: 22 additions & 0 deletions rofunc/config/learning/rl/task/sim/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
dt: 0.011 # 1/90 [s]
substeps: 2
up_axis: "z"
enableCameraSensors: False
useContactForces: True
useForceSensors: False
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [0.0, 0.0, -9.81]
physx:
num_threads: ${....num_threads}
solver_type: ${....solver_type}
use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
num_position_iterations: 8
num_velocity_iterations: 0
max_gpu_contact_pairs: 8388608 # 8*1024*1024
num_subscenes: ${....num_subscenes}
contact_offset: 0.002
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 1000.0
default_buffer_size_multiplier: 5.0
contact_collection: 2 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts)
2 changes: 2 additions & 0 deletions rofunc/config/learning/rl/task/viewer/first_person.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pos: [-0.45, -0.55, 1.35]
lookat: [0, 0, 0.9]
2 changes: 2 additions & 0 deletions rofunc/config/learning/rl/task/viewer/front_view.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pos: [0, 0.8, 1.35]
lookat: [0, 0, 0.9]
2 changes: 1 addition & 1 deletion rofunc/config/learning/rl/train/BaseTaskPPORofuncRL.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Agent:
# ========= Model parameters ==========
Model:
use_init: True
use_action_clip: False # If true, clip actions to the action space range.
use_action_clip: True # If true, clip actions to the action space range.
use_same_model: False # If true, use the same model for the actor and critic.
use_action_out_tanh: True # If true, use tanh to scale the action outputs to the action space range.
action_clip: 1.0 # clipping coefficient for the norm of the actions
Expand Down
2 changes: 2 additions & 0 deletions rofunc/learning/RofuncRL/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(self, env_type="isaacgym"):
from .isaacgymenv.hands.bi_qbhand_synergy_grasp import BiQbSoftHandSynergyGraspTask
from .isaacgymenv.hands.qbhand_synergy_grasp import QbSoftHandSynergyGraspTask
from .isaacgymenv.hands.shadow_hand_grasp import ShadowHandGraspTask
from .isaacgymenv.grasp.lift_object import LiftObjectTask

self.task_map = {
"Ant": AntTask,
Expand Down Expand Up @@ -90,6 +91,7 @@ def __init__(self, env_type="isaacgym"):
"BiQbSoftHandSynergyGrasp": BiQbSoftHandSynergyGraspTask,
"QbSoftHandSynergyGrasp": QbSoftHandSynergyGraspTask,
"ShadowHandGrasp": ShadowHandGraspTask,
"LiftObject": LiftObjectTask,
}
elif env_type == "omniisaacgym":
# OmniIsaacGym tasks
Expand Down
Loading

0 comments on commit aadd088

Please sign in to comment.