Let reward modeling use ai2 entity (#360)

* let reward modeling use ai2 entity * use the latest reward modeling image * update changes
allenai · Sep 19, 2024 · 6a9f66c · 6a9f66c
1 parent 1fdc9ca
commit 6a9f66c
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 14 deletions.
diff --git a/docs/algorithms/online_dpo.md b/docs/algorithms/online_dpo.md
@@ -99,7 +99,7 @@ Here we are using --vllm_device cuda:7 to say we want to launch the vllm generat
 ```bash
 # for running TL;DR you can likely use GPUs with less memory
 python mason.py \
-    --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --cluster ai2/pluto-cirrascale ai2/prior-cirrascale ai2/s2-cirrascale ai2/general-cirrascale \
     --priority normal \
     --resumable \
@@ -145,7 +145,7 @@ python mason.py \
 # use ai2/jupiter-cirrascale-2 or ai2/pluto-cirrascale
 python mason.py \
     --cluster ai2/jupiter-cirrascale-2 \
-    --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --workspace ai2/tulu-3-dev \
     --priority high \
     --preemptible \
@@ -197,7 +197,7 @@ python mason.py \
 # use ai2/jupiter-cirrascale-2 or ai2/pluto-cirrascale
 python mason.py \
     --cluster ai2/jupiter-cirrascale-2 \
-    --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --workspace ai2/tulu-3-dev \
     --priority high \
     --preemptible \

diff --git a/docs/algorithms/ppo.md b/docs/algorithms/ppo.md
@@ -99,7 +99,7 @@ Here we are using --vllm_device cuda:7 to say we want to launch the vllm generat
 ```bash
 # for running TL;DR you can likely use GPUs with less memory
 python mason.py \
-    --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --cluster ai2/pluto-cirrascale ai2/prior-cirrascale ai2/s2-cirrascale ai2/general-cirrascale \
     --priority normal \
     --resumable \
@@ -145,7 +145,7 @@ python mason.py \
 # use ai2/jupiter-cirrascale-2 or ai2/pluto-cirrascale
 python mason.py \
     --cluster ai2/jupiter-cirrascale-2 \
-    --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --workspace ai2/tulu-3-dev \
     --priority high \
     --preemptible \
@@ -197,7 +197,7 @@ python mason.py \
 # use ai2/jupiter-cirrascale-2 or ai2/pluto-cirrascale
 python mason.py \
     --cluster ai2/pluto-cirrascale \
-    --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --workspace ai2/tulu-3-dev \
     --priority high \
     --preemptible \

diff --git a/docs/algorithms/reward_modeling.md b/docs/algorithms/reward_modeling.md
@@ -56,7 +56,7 @@ Here is a command to train a simple reward model on the sentiment dataset taken
 ```bash
 python mason.py \
     --cluster ai2/pluto-cirrascale ai2/prior-cirrascale ai2/s2-cirrascale \
-    --image costah/open_instruct_dev --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --priority normal \
     --budget ai2/allennlp \
     --gpus 1 -- python open_instruct/reward_modeling.py \
@@ -88,7 +88,7 @@ You can run the following commands to launch experiments. Note that you can mix
 ```bash
 python mason.py \
     --cluster ai2/allennlp-cirrascale  ai2/pluto-cirrascale ai2/prior-cirrascale ai2/s2-cirrascale \
-    --image costah/open_instruct_dev --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --priority normal \
     --budget ai2/allennlp \
     --gpus 1 -- python open_instruct/reward_modeling.py \
@@ -118,7 +118,7 @@ python mason.py \
 ```bash
 python mason.py \
     --cluster ai2/allennlp-cirrascale  ai2/pluto-cirrascale ai2/prior-cirrascale ai2/s2-cirrascale \
-    --image costah/open_instruct_dev --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --priority normal \
     --budget ai2/allennlp \
     --gpus 8 -- accelerate launch --config_file configs/ds_configs/deepspeed_zero2.yaml \
@@ -149,7 +149,7 @@ python mason.py \
 ```bash
 python mason.py \
     --cluster ai2/allennlp-cirrascale ai2/pluto-cirrascale \
-    --image costah/open_instruct_dev --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --priority normal \
     --budget ai2/allennlp \
     --gpus 8 -- accelerate launch --config_file configs/ds_configs/deepspeed_zero3.yaml \
@@ -182,7 +182,7 @@ python mason.py \
 ```bash
 python mason.py \
     --cluster ai2/allennlp-cirrascale ai2/pluto-cirrascale \
-    --image costah/open_instruct_dev --pure_docker_mode \
+    --image nathanl/open_instruct_auto --pure_docker_mode \
     --priority normal \
     --budget ai2/allennlp \
     --gpus 8 -- accelerate launch --config_file configs/ds_configs/deepspeed_zero3.yaml \

diff --git a/open_instruct/reward_modeling.py b/open_instruct/reward_modeling.py
@@ -51,6 +51,7 @@
     get_wandb_tags,
     is_beaker_job,
     maybe_get_beaker_config,
+    maybe_use_ai2_hf_entity,
     maybe_use_ai2_wandb_entity,
 )
 
@@ -172,9 +173,11 @@ def calculate_runtime_args_and_accelerator(args: Args, model_config: ModelConfig
     args.run_name = f"{args.exp_name}__{args.seed}__{time_int}"
     if args.push_to_hub:
         if args.hf_repo_id is None:  # auto-generate one
-            args.hf_repo_id = f"{args.exp_name}__{model_config.model_name_or_path.replace('/', '_')}"
-        if args.hf_entity is None:
-            args.hf_entity = api.whoami()["name"]
+            args.hf_repo_id = "open_instruct_dev"
+        if args.hf_entity is None:  # first try to use AI2 entity
+            args.hf_entity = maybe_use_ai2_hf_entity()
+        if args.hf_entity is None:  # then try to use the user's entity
+            args.hf_entity = HfApi().whoami()["name"]
         args.hf_repo_id = f"{args.hf_entity}/{args.hf_repo_id}"
         if args.hf_repo_revision is None:  # auto-generate one
             args.hf_repo_revision = args.run_name