allenai · vwxyzjn · Aug 27, 2024 · Aug 27, 2024 · Aug 27, 2024 · Aug 27, 2024
diff --git a/.github/workflows/push-image-olmo.yml b/.github/workflows/push-image-olmo.yml
@@ -23,14 +23,15 @@ on:
       - '.github/workflows/push-image-olmo.yml'
       # Note, add .olmo dockerfile + requirements if adding auto build to those
     branches: [main]
-  # pull_request: # note, comment this out for running on every push
-  #   # Also run on PRs that update the files in the image's directory (other than README).
-  #   branches: [main]
-  #   paths:
-  #     - 'open_instruct/**'
-  #     - '!open_instruct/README.md'
-  #     - 'requirements-olmo.txt'
-  #     - 'Dockerfile.olmo'
+  pull_request: # note, comment this out for running on every push
+    # Also run on PRs that update the files in the image's directory (other than README).
+    branches: [main, oe-eval-ci-test5]
+    paths:
+      - 'open_instruct/**'
+      - '!open_instruct/README.md'
+      - 'requirements-olmo.txt'
+      - 'Dockerfile.olmo'
+      - '.github/workflows/push-image-olmo.yml'
   workflow_dispatch:  # This allows us to manually trigger a build through the GitHub UI.
 
 env:
@@ -47,7 +48,7 @@ jobs:
 
       - uses: actions/checkout@v3
         with:
-          repository: vwxyzjn/learn-fsdp2 # dummy private repo to test out deploy keys
+          repository: allenai/oe-eval-internal
           path: './oe-eval-internal'
           ssh-key: ${{ secrets.OE_EVAL_GIT_CLONE_ACCESS_PRIVATE_SSH_DEPLOY_KEY }}
 

diff --git a/.github/workflows/push-image.yml b/.github/workflows/push-image.yml
@@ -23,14 +23,15 @@ on:
       - '.github/workflows/push-image.yml'
       # Note, add .olmo dockerfile + requirements if adding auto build to those
     branches: [main]
-  # pull_request: # note, comment this out for running on every push
-  #   # Also run on PRs that update the files in the image's directory (other than README).
-  #   branches: [main]
-  #   paths:
-  #     - 'open_instruct/**'
-  #     - '!open_instruct/README.md'
-  #     - 'requirements.txt'
-  #     - 'Dockerfile'
+  pull_request: # note, comment this out for running on every push
+    # Also run on PRs that update the files in the image's directory (other than README).
+    branches: [main, oe-eval-ci-test5]
+    paths:
+      - 'open_instruct/**'
+      - '!open_instruct/README.md'
+      - 'requirements-olmo.txt'
+      - 'Dockerfile.olmo'
+      - '.github/workflows/push-image.yml'
   workflow_dispatch:  # This allows us to manually trigger a build through the GitHub UI.
 
 env:
@@ -47,7 +48,7 @@ jobs:
 
       - uses: actions/checkout@v3
         with:
-          repository: vwxyzjn/learn-fsdp2 # dummy private repo to test out deploy keys
+          repository: allenai/oe-eval-internal
           path: './oe-eval-internal'
           ssh-key: ${{ secrets.OE_EVAL_GIT_CLONE_ACCESS_PRIVATE_SSH_DEPLOY_KEY }}
 

diff --git a/scripts/eval/oe-eval.sh b/scripts/eval/oe-eval.sh
@@ -32,7 +32,7 @@ set -ex
 
 # Function to print usage
 usage() {
-    echo "Usage: $0 --model-name MODEL_NAME --model-location MODEL_LOCATION [--hf-upload]"
+    echo "Usage: $0 --model-name MODEL_NAME --model-location MODEL_LOCATION [--revision REVISION] [--hf-upload]"
     exit 1
 }
 
@@ -41,6 +41,7 @@ while [[ "$#" -gt 0 ]]; do
     case $1 in
         --model-name) MODEL_NAME="$2"; shift ;;
         --model-location) MODEL_LOCATION="$2"; shift ;;
+        --revision) REVISION="$2"; shift ;;
         --hf-upload) HF_UPLOAD="true" ;;
         *) echo "Unknown parameter passed: $1"; usage ;;
     esac
@@ -58,6 +59,7 @@ MODEL_NAME_SAFE=${MODEL_NAME//\//_}
 
 # Set defaults for optional arguments
 HF_UPLOAD="${HF_UPLOAD:-false}"
+REVISION="${REVISION:-main}"
 
 # Set HF_UPLOAD_ARG if HF_UPLOAD is true
 if [ "$HF_UPLOAD" == "true" ]; then
@@ -83,5 +85,13 @@ for TASK in "${TASKS[@]}"; do
     else
         BATCH_SIZE=$BATCH_SIZE_VLLM
     fi
-    python oe-eval-internal/oe_eval/launch.py --model "$MODEL_NAME" --beaker-workspace "ai2/tulu-3-results" --beaker-budget ai2/oe-adapt --task "$TASK" $MODEL_TYPE --batch-size "$BATCH_SIZE" --model-args {\"model_path\":\"${MODEL_LOCATION}\"} ${HF_UPLOAD_ARG} --gpus "$GPU_COUNT"
+    python oe-eval-internal/oe_eval/launch.py \
+        --revision "$REVISION" \
+        --model "$MODEL_NAME" \
+        --beaker-workspace "ai2/tulu-3-results" \
+        --beaker-budget ai2/oe-adapt \
+        --task "$TASK" $MODEL_TYPE \
+        --batch-size "$BATCH_SIZE" \
+        --model-args "{\"model_path\":\"${MODEL_LOCATION}\",\"revision\":\"${REVISION}\",\"tokenizer_revision\":\"${REVISION}\"}" ${HF_UPLOAD_ARG} \
+        --gpus "$GPU_COUNT"
 done
diff --git a/scripts/submit_eval_jobs.py b/scripts/submit_eval_jobs.py
@@ -579,6 +579,8 @@ def adjust_gpus(task_spec, experiment_group, model_name, gpu_multiplier):
         oe_eval_cmd += f" --model-location {model_info[1]}"
     else:
         oe_eval_cmd += f" --model-location beaker://{model_info[1]}"
+    if args.hf_revision:
+        oe_eval_cmd += f" --revision {args.hf_revision}"
     subprocess.Popen(oe_eval_cmd, shell=True)
 
 # create an experiment that runs the safety eval tasks