🚀 [RofuncRL] BiQbSoftHand Tasks almost ok

Skylark0924 · Jan 2, 2024 · 74553d7 · 74553d7
1 parent 4606c7e
commit 74553d7
Show file tree

Hide file tree

Showing 8 changed files with 617 additions and 290 deletions.
diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_DexterousHands_RofuncRL.py
@@ -103,7 +103,7 @@ def inference(custom_args):
     parser.add_argument("--num_envs", type=int, default=256)
     parser.add_argument("--sim_device", type=int, default=0)
     parser.add_argument("--rl_device", type=int, default=gpu_id)
-    parser.add_argument("--headless", type=str, default="False")
+    parser.add_argument("--headless", type=str, default="True")
     parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
     parser.add_argument("--ckpt_path", type=str, default=None)
     custom_args = parser.parse_args()

diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/bi_qbhand_grasp_and_place.py
@@ -7,6 +7,7 @@
 
 from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask
 from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
+from rofunc.utils.logger.beauty_logger import beauty_print
 from rofunc.utils.oslab import get_rofunc_path
 
 
@@ -174,7 +175,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.right_hand_dof_pos = self.right_hand_dof_state[..., 0]
         self.right_hand_dof_vel = self.right_hand_dof_state[..., 1]
 
-        self.left_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_hand_dofs:self.num_hand_dofs * 2]
+        self.left_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:,
+                                   self.num_hand_dofs:self.num_hand_dofs * 2]
         self.left_hand_dof_pos = self.left_hand_dof_state[..., 0]
         self.left_hand_dof_vel = self.left_hand_dof_state[..., 1]
 
@@ -366,11 +368,11 @@ def _create_envs(self, num_envs, spacing, num_per_row):
 
         # <editor-fold desc="set initial poses">
         right_hand_start_pose = gymapi.Transform()
-        right_hand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8)
+        right_hand_start_pose.p = gymapi.Vec3(0.25, 0.2, 0.8)
         right_hand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, -1.57)
 
         left_hand_start_pose = gymapi.Transform()
-        left_hand_start_pose.p = gymapi.Vec3(0.55, -0.2, 0.8)
+        left_hand_start_pose.p = gymapi.Vec3(0.25, -0.2, 0.8)
         left_hand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, 1.57)
 
         object_start_pose = gymapi.Transform()
@@ -420,7 +422,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         self.block_indices = []
 
         self.right_fingertip_handles = [self.gym.find_asset_rigid_body_index(right_hand_asset, name) for name in
-                                  self.right_fingertips]
+                                        self.right_fingertips]
         self.left_fingertip_handles = [self.gym.find_asset_rigid_body_index(left_hand_asset, name) for name
                                        in self.left_fingertips]
 
@@ -884,7 +886,7 @@ def compute_full_state(self, asymm_obs=False):
                                                                                                                        :,
                                                                                                                        30:]
 
-        left_hand_pose_start = left_fingertip_obs_start + 95 # 307 = 212 + 95
+        left_hand_pose_start = left_fingertip_obs_start + 95  # 307 = 212 + 95
         # 307 - 309 left hand base position
         self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos
         # 310 - 312 left hand base rotation
@@ -1271,7 +1273,6 @@ def pre_physics_step(self, actions):
             #                                 self.object_dof_lower_limits[1], self.object_dof_upper_limits[1])
             # angle_offsets = self.actions[:, 26:32] * self.dt * self.orientation_scale
 
-
             self.apply_forces[:, 0, :] = actions[:, 0:3] * self.dt * self.transition_scale * 100000
             self.apply_forces[:, 0 + 16, :] = actions[:, 21:24] * self.dt * self.transition_scale * 100000
             self.apply_torque[:, 0, :] = self.actions[:, 3:6] * self.dt * self.orientation_scale * 1000

diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/qbsofthand_grasp.py
@@ -7,8 +7,8 @@
 
 from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask
 from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
-from rofunc.utils.oslab import get_rofunc_path
 from rofunc.utils.logger.beauty_logger import beauty_print
+from rofunc.utils.oslab import get_rofunc_path
 
 
 class QbSoftHandGraspTask(VecTask):
@@ -103,7 +103,7 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
             "point_cloud_for_distill": 157 + self.num_point_cloud_feature_dim * 3,
             "full_state": 157
         }
-        self.num_hand_obs = 45 + 65 + 21 + 6  # TODO
+        self.num_hand_obs = 45 + 95 + 21 + 6  # TODO
         self.up_axis = 'z'
 
         self.fingertips = ["right_qbhand_thumb_distal_link", "right_qbhand_index_distal_link",
@@ -218,12 +218,10 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         """
         Create multiple parallel isaacgym environments
 
-        Args:
-            num_envs (int): The total number of environment 
-
-            spacing (float): Specifies half the side length of the square area occupied by each environment
-
-            num_per_row (int): Specify how many environments in a row
+        :param num_envs: The total number of environment
+        :param spacing: Specifies half the side length of the square area occupied by each environment
+        :param num_per_row: Specify how many environments in a row
+        :return:
         """
         lower = gymapi.Vec3(-spacing, -spacing, 0.0)
         upper = gymapi.Vec3(spacing, spacing, spacing)
@@ -237,7 +235,6 @@ def _create_envs(self, num_envs, spacing, num_per_row):
 
         if "asset" in self.cfg["env"]:
             asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root)
-            qbsofthand_asset_file = self.cfg["env"]["asset"].get("assetFileName", qbsofthand_asset_file)
 
         object_asset_file = self.asset_files_dict[self.object_type]
 
@@ -346,7 +343,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
 
         qbsofthand_start_pose = gymapi.Transform()
         qbsofthand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8)
-        qbsofthand_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 0, 1.57)
+        qbsofthand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, -1.57)
 
         object_start_pose = gymapi.Transform()
         object_start_pose.p = gymapi.Vec3(0.0, 0.2, 0.6)
@@ -617,7 +614,6 @@ def compute_observations(self):
                                                                                         device=self.device).repeat(
                                                                                    self.num_envs, 1) * 0.0)
 
-
         self.right_hand_pos = self.rigid_body_states[:, 0, 0:3]
         self.right_hand_rot = self.rigid_body_states[:, 0, 3:7]
         self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
@@ -693,13 +689,13 @@ def compute_full_state(self, asymm_obs=False):
         num_ft_states = 13 * int(self.num_fingertips)  # 65
 
         self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos,
-                                                               self.qbsofthand_dof_lower_limits,
-                                                               self.qbsofthand_dof_upper_limits)
+                                                              self.qbsofthand_dof_lower_limits,
+                                                              self.qbsofthand_dof_upper_limits)
         self.obs_buf[:,
         self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel
         self.obs_buf[:,
         2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
-                                                                                                     :, :15]
+                                                                                                   :, :15]
 
         fingertip_obs_start = 45
         self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
@@ -723,7 +719,7 @@ def compute_full_state(self, asymm_obs=False):
         self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
         self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos
         self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot
-        
+
     def compute_point_cloud_observation(self, collect_demonstration=False):
         """
         Compute the observations of all environment. The observation is composed of three parts: 
@@ -761,13 +757,13 @@ def compute_point_cloud_observation(self, collect_demonstration=False):
         num_ft_force_torques = 6 * int(self.num_fingertips / 2)  # 30
 
         self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos,
-                                                               self.qbsofthand_dof_lower_limits,
-                                                               self.qbsofthand_dof_upper_limits)
+                                                              self.qbsofthand_dof_lower_limits,
+                                                              self.qbsofthand_dof_upper_limits)
         self.obs_buf[:,
         self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel
         self.obs_buf[:,
         2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
-                                                                                                     :, :24]
+                                                                                                   :, :24]
 
         fingertip_obs_start = 72  # 168 = 157 + 11
         self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
@@ -917,8 +913,8 @@ def reset_idx(self, env_ids, goal_env_ids):
         self.qbsofthand_dof_pos[env_ids, :] = pos
 
         self.qbsofthand_dof_vel[env_ids, :] = self.qbsofthand_dof_default_vel + \
-                                               self.reset_dof_vel_noise * rand_floats[:,
-                                                                          5 + self.num_qbsofthand_dofs:5 + self.num_qbsofthand_dofs * 2]
+                                              self.reset_dof_vel_noise * rand_floats[:,
+                                                                         5 + self.num_qbsofthand_dofs:5 + self.num_qbsofthand_dofs * 2]
 
         self.prev_targets[env_ids, :self.num_qbsofthand_dofs] = pos
         self.cur_targets[env_ids, :self.num_qbsofthand_dofs] = pos

diff --git a/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py b/rofunc/learning/RofuncRL/tasks/isaacgymenv/hands/shadow_hand_grasp_and_place.py
@@ -1118,6 +1118,7 @@ def reset_idx(self, env_ids, goal_env_ids):
             goal_env_ids (tensor): The index of the environment that only goals need reset
 
         """
+        print("reset_idx")
         # randomization can happen only at reset time, since it can reset actor positions on GPU
         if self.randomize:
             self.apply_randomizations(self.randomization_params)