docs: polish documents

PKU-Alignment · Aug 21, 2023 · 5610135 · 5610135
2 parents 7f80875 + cde2df1
commit 5610135
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -50,6 +50,9 @@
 
 Here we provide a table of Safe RL algorithms that the benchmark includes.
 
+
+**note: Four more classic RL algorithms are also included in the benchmark, namely PG, NaturalPG, TRPO, and PPO.**
+
 |                                 Algorithm                                  |    Proceedings&Cites    |                                 Official Code Repo                                  |                                                         Official Code Last Update                                                          |                                                                      Official Github Stars                                                                      |
 | :------------------------------------------------------------------------: | :---------------------: | :---------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 |             [PPO-Lag](https://cdn.openai.com/safexp-short.pdf)             |           :x:           |          [Tensorflow 1 ](https://github.com/openai/safety-starter-agents)           |             ![GitHub last commit](https://img.shields.io/github/last-commit/openai/safety-starter-agents?label=last%20update)              |         [![GitHub stars](https://img.shields.io/github/stars/openai/safety-starter-agents)](https://github.com/openai/safety-starter-agents/stargazers)         |

diff --git a/docs/source/usage/train.rst b/docs/source/usage/train.rst
@@ -48,9 +48,9 @@ We provide the detailed description of the command line arguments in the followi
         |                    | environments                     |                                               |
         +--------------------+----------------------------------+-----------------------------------------------+
         | --total-steps      | Total timesteps of the           | 10000000                                      |
-        |                    | experiments                      |                                               |
+        |                    | experiments                      |                                                |
         +--------------------+----------------------------------+-----------------------------------------------+
-        | --task           | ID of the environment            | "SafetyPointGoal1-v0"                         |
+        | --task             | ID of the environment            | "SafetyPointGoal1-v0"                         |
         +--------------------+----------------------------------+-----------------------------------------------+
         | --use-eval         | Toggles evaluation               | False                                         |
         +--------------------+----------------------------------+-----------------------------------------------+

diff --git a/safepo/evaluate.py b/safepo/evaluate.py
@@ -30,7 +30,7 @@ def eval_single_agent(eval_dir, eval_episodes):
     config_path = eval_dir + '/config.json'
     config = json.load(open(config_path, 'r'))
 
-    env_id = config['task']
+    env_id = config['task'] if 'task' in config.keys() else config['env_name']
     env_norms = os.listdir(eval_dir)
     env_norms = [env_norm for env_norm in env_norms if env_norm.endswith('.pkl')]
     final_norm_name = sorted(env_norms)[-1]

diff --git a/safepo/utils/config.py b/safepo/utils/config.py
@@ -62,11 +62,10 @@
 }
 
 isaac_gym_map = {
-    "ShadowHandOver_Safe_joint": "shadow_hand_over_safe_finger",
+    "ShadowHandOver_Safe_finger": "shadow_hand_over_safe_finger",
     "ShadowHandOver_Safe_joint": "shadow_hand_over_safe_joint",
     "ShadowHandCatchOver2Underarm_Safe_finger": "shadow_hand_catch_over_2_underarm_safe_finger",
     "ShadowHandCatchOver2Underarm_Safe_joint": "shadow_hand_catch_over_2_underarm_safe_joint",
-    "ShadowHandCatchUnderarm": "shadow_hand_catch_underarm",
     "FreightFrankaCloseDrawer": "freight_franka_close_drawer",
     "FreightFrankaPickAndPlace": "freight_franka_pick_and_place",
 }