Skip to content

Commit

Permalink
add oe-eval script
Browse files Browse the repository at this point in the history
  • Loading branch information
Hamish Ivison committed Aug 2, 2024
1 parent 42c1fa3 commit 70aaa57
Showing 1 changed file with 82 additions and 0 deletions.
82 changes: 82 additions & 0 deletions scripts/eval/oe-eval.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/bash

set -ex

# A script for using oe-eval for our development!
# to install oe-eval, check out https://github.com/allenai/oe-eval-internal
# you'll need the current main for this to work.
# sadly, this is internal at Ai2 for now, but we are working on making it public!

# Example usages:
# ./scripts/eval/oe-eval.sh --model-name <model_name> --model-location <model_path> [--hf-upload]
# model_name should be a human-readable name for the model/run. This will be used in experiment tracking.
# model_path should be
# (a) a huggingface name (e.g. allenai/llama-3-tulu-2-8b),
# (b) a beaker dataset name (e.g. beaker://hamishivi/olmo_17_7b_turbo_dpo) - note the beaker://
# (c) a beaker dataset hash (e.g., beaker://01J28FDK3GDNA2C5E9JXBW1TP4) - note the beaker://
# (d) (untested) an absolute path to a model on cirrascale nfs.
# hf-upload is an optional flag to upload the results to huggingface for result tracking.
# e.g.:
# ./scripts/eval/oe-eval.sh --model-name olmo_17_7b_turbo_sft --model-location beaker://01J28FDK3GDNA2C5E9JXBW1TP4 --hf-upload
# ./scripts/eval/oe-eval.sh --model-name llama-3-tulu-2-dpo-8b --model-location allenai/llama-3-tulu-2-8b --hf-upload

# Tulu eval dev suite is:
# gsm8k::olmo1
# drop::llama3
# minerva_math::llama3
# codex_humaneval
# codex_humanevalplus
# ifeval::tulu
# popqa
# mmlu:mc::olmes

# Function to print usage
usage() {
echo "Usage: $0 --model-name MODEL_NAME --model-location MODEL_LOCATION [--hf-upload]"
exit 1
}

# Parse named arguments
while [[ "$#" -gt 0 ]]; do
case $1 in
--model-name) MODEL_NAME="$2"; shift ;;
--model-location) MODEL_LOCATION="$2"; shift ;;
--hf-upload) HF_UPLOAD="true" ;;
*) echo "Unknown parameter passed: $1"; usage ;;
esac
shift
done

# Check required arguments
if [[ -z "$MODEL_NAME" || -z "$MODEL_LOCATION" ]]; then
echo "Error: --model-name and --model-location are required."
usage
fi

# Replace '/' with '_' in MODEL_NAME
MODEL_NAME_SAFE=${MODEL_NAME//\//_}

# Set defaults for optional arguments
HF_UPLOAD="${HF_UPLOAD:-false}"

# Set HF_UPLOAD_ARG if HF_UPLOAD is true
if [ "$HF_UPLOAD" == "true" ]; then
HF_UPLOAD_ARG="--hf-save-dir allenai/tulu-3-evals//results/${MODEL_NAME_SAFE}"
else
HF_UPLOAD_ARG=""
fi

# Run oe-eval with different tasks
TASKS=("gsm8k::olmo1" "drop::llama3" "minerva_math::llama3" "codex_humaneval" "codex_humanevalplus" "ifeval::tulu" "popqa" "mmlu:mc::olmes")
MODEL_TYPE="vllm"
BATCH_SIZE_VLLM=10000
BATCH_SIZE_OTHER=1

for TASK in "${TASKS[@]}"; do
if [[ "$TASK" == "mmlu:mc::olmes" ]]; then
BATCH_SIZE=$BATCH_SIZE_OTHER
else
BATCH_SIZE=$BATCH_SIZE_VLLM
fi
oe-eval --model "$MODEL_NAME" --beaker-workspace "ai2/tulu-3-results" --beaker-budget ai2/oe-adapt --task "$TASK" --model-type "$MODEL_TYPE" --batch-size "$BATCH_SIZE" --model-args {\"model_path\":\"${MODEL_LOCATION}\"} ${HF_UPLOAD_ARG}
done

0 comments on commit 70aaa57

Please sign in to comment.