Skip to content

Commit

Permalink
🚀 [RofuncRL] BiQbSoftHand Tasks almost ok
Browse files Browse the repository at this point in the history
  • Loading branch information
Skylark0924 committed Jan 2, 2024
1 parent 4606c7e commit 74553d7
Show file tree
Hide file tree
Showing 8 changed files with 617 additions and 290 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def inference(custom_args):
parser.add_argument("--num_envs", type=int, default=256)
parser.add_argument("--sim_device", type=int, default=0)
parser.add_argument("--rl_device", type=int, default=gpu_id)
parser.add_argument("--headless", type=str, default="False")
parser.add_argument("--headless", type=str, default="True")
parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
parser.add_argument("--ckpt_path", type=str, default=None)
custom_args = parser.parse_args()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
from rofunc.utils.logger.beauty_logger import beauty_print
from rofunc.utils.oslab import get_rofunc_path


Expand Down Expand Up @@ -174,7 +175,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
self.right_hand_dof_pos = self.right_hand_dof_state[..., 0]
self.right_hand_dof_vel = self.right_hand_dof_state[..., 1]

self.left_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_hand_dofs:self.num_hand_dofs * 2]
self.left_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:,
self.num_hand_dofs:self.num_hand_dofs * 2]
self.left_hand_dof_pos = self.left_hand_dof_state[..., 0]
self.left_hand_dof_vel = self.left_hand_dof_state[..., 1]

Expand Down Expand Up @@ -366,11 +368,11 @@ def _create_envs(self, num_envs, spacing, num_per_row):

# <editor-fold desc="set initial poses">
right_hand_start_pose = gymapi.Transform()
right_hand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8)
right_hand_start_pose.p = gymapi.Vec3(0.25, 0.2, 0.8)
right_hand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, -1.57)

left_hand_start_pose = gymapi.Transform()
left_hand_start_pose.p = gymapi.Vec3(0.55, -0.2, 0.8)
left_hand_start_pose.p = gymapi.Vec3(0.25, -0.2, 0.8)
left_hand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, 1.57)

object_start_pose = gymapi.Transform()
Expand Down Expand Up @@ -420,7 +422,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
self.block_indices = []

self.right_fingertip_handles = [self.gym.find_asset_rigid_body_index(right_hand_asset, name) for name in
self.right_fingertips]
self.right_fingertips]
self.left_fingertip_handles = [self.gym.find_asset_rigid_body_index(left_hand_asset, name) for name
in self.left_fingertips]

Expand Down Expand Up @@ -884,7 +886,7 @@ def compute_full_state(self, asymm_obs=False):
:,
30:]

left_hand_pose_start = left_fingertip_obs_start + 95 # 307 = 212 + 95
left_hand_pose_start = left_fingertip_obs_start + 95 # 307 = 212 + 95
# 307 - 309 left hand base position
self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos
# 310 - 312 left hand base rotation
Expand Down Expand Up @@ -1271,7 +1273,6 @@ def pre_physics_step(self, actions):
# self.object_dof_lower_limits[1], self.object_dof_upper_limits[1])
# angle_offsets = self.actions[:, 26:32] * self.dt * self.orientation_scale


self.apply_forces[:, 0, :] = actions[:, 0:3] * self.dt * self.transition_scale * 100000
self.apply_forces[:, 0 + 16, :] = actions[:, 21:24] * self.dt * self.transition_scale * 100000
self.apply_torque[:, 0, :] = self.actions[:, 3:6] * self.dt * self.orientation_scale * 1000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
from rofunc.utils.oslab import get_rofunc_path
from rofunc.utils.logger.beauty_logger import beauty_print
from rofunc.utils.oslab import get_rofunc_path


class QbSoftHandGraspTask(VecTask):
Expand Down Expand Up @@ -103,7 +103,7 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
"point_cloud_for_distill": 157 + self.num_point_cloud_feature_dim * 3,
"full_state": 157
}
self.num_hand_obs = 45 + 65 + 21 + 6 # TODO
self.num_hand_obs = 45 + 95 + 21 + 6 # TODO
self.up_axis = 'z'

self.fingertips = ["right_qbhand_thumb_distal_link", "right_qbhand_index_distal_link",
Expand Down Expand Up @@ -218,12 +218,10 @@ def _create_envs(self, num_envs, spacing, num_per_row):
"""
Create multiple parallel isaacgym environments
Args:
num_envs (int): The total number of environment
spacing (float): Specifies half the side length of the square area occupied by each environment
num_per_row (int): Specify how many environments in a row
:param num_envs: The total number of environment
:param spacing: Specifies half the side length of the square area occupied by each environment
:param num_per_row: Specify how many environments in a row
:return:
"""
lower = gymapi.Vec3(-spacing, -spacing, 0.0)
upper = gymapi.Vec3(spacing, spacing, spacing)
Expand All @@ -237,7 +235,6 @@ def _create_envs(self, num_envs, spacing, num_per_row):

if "asset" in self.cfg["env"]:
asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root)
qbsofthand_asset_file = self.cfg["env"]["asset"].get("assetFileName", qbsofthand_asset_file)

object_asset_file = self.asset_files_dict[self.object_type]

Expand Down Expand Up @@ -346,7 +343,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):

qbsofthand_start_pose = gymapi.Transform()
qbsofthand_start_pose.p = gymapi.Vec3(0.55, 0.2, 0.8)
qbsofthand_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 0, 1.57)
qbsofthand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, -1.57)

object_start_pose = gymapi.Transform()
object_start_pose.p = gymapi.Vec3(0.0, 0.2, 0.6)
Expand Down Expand Up @@ -617,7 +614,6 @@ def compute_observations(self):
device=self.device).repeat(
self.num_envs, 1) * 0.0)


self.right_hand_pos = self.rigid_body_states[:, 0, 0:3]
self.right_hand_rot = self.rigid_body_states[:, 0, 3:7]
self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
Expand Down Expand Up @@ -693,13 +689,13 @@ def compute_full_state(self, asymm_obs=False):
num_ft_states = 13 * int(self.num_fingertips) # 65

self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos,
self.qbsofthand_dof_lower_limits,
self.qbsofthand_dof_upper_limits)
self.qbsofthand_dof_lower_limits,
self.qbsofthand_dof_upper_limits)
self.obs_buf[:,
self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel
self.obs_buf[:,
2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
:, :15]
:, :15]

fingertip_obs_start = 45
self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
Expand All @@ -723,7 +719,7 @@ def compute_full_state(self, asymm_obs=False):
self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos
self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot

def compute_point_cloud_observation(self, collect_demonstration=False):
"""
Compute the observations of all environment. The observation is composed of three parts:
Expand Down Expand Up @@ -761,13 +757,13 @@ def compute_point_cloud_observation(self, collect_demonstration=False):
num_ft_force_torques = 6 * int(self.num_fingertips / 2) # 30

self.obs_buf[:, 0:self.num_qbsofthand_dofs] = unscale(self.qbsofthand_dof_pos,
self.qbsofthand_dof_lower_limits,
self.qbsofthand_dof_upper_limits)
self.qbsofthand_dof_lower_limits,
self.qbsofthand_dof_upper_limits)
self.obs_buf[:,
self.num_qbsofthand_dofs:2 * self.num_qbsofthand_dofs] = self.vel_obs_scale * self.qbsofthand_dof_vel
self.obs_buf[:,
2 * self.num_qbsofthand_dofs:3 * self.num_qbsofthand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
:, :24]
:, :24]

fingertip_obs_start = 72 # 168 = 157 + 11
self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
Expand Down Expand Up @@ -917,8 +913,8 @@ def reset_idx(self, env_ids, goal_env_ids):
self.qbsofthand_dof_pos[env_ids, :] = pos

self.qbsofthand_dof_vel[env_ids, :] = self.qbsofthand_dof_default_vel + \
self.reset_dof_vel_noise * rand_floats[:,
5 + self.num_qbsofthand_dofs:5 + self.num_qbsofthand_dofs * 2]
self.reset_dof_vel_noise * rand_floats[:,
5 + self.num_qbsofthand_dofs:5 + self.num_qbsofthand_dofs * 2]

self.prev_targets[env_ids, :self.num_qbsofthand_dofs] = pos
self.cur_targets[env_ids, :self.num_qbsofthand_dofs] = pos
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,7 @@ def reset_idx(self, env_ids, goal_env_ids):
goal_env_ids (tensor): The index of the environment that only goals need reset
"""
print("reset_idx")
# randomization can happen only at reset time, since it can reset actor positions on GPU
if self.randomize:
self.apply_randomizations(self.randomization_params)
Expand Down
Loading

0 comments on commit 74553d7

Please sign in to comment.