Skip to content

Commit

Permalink
nits
Browse files Browse the repository at this point in the history
  • Loading branch information
natolambert committed Oct 25, 2024
1 parent 5b4bf2e commit 456aa32
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 13 deletions.
48 changes: 37 additions & 11 deletions scripts/eval/tulu3_baselines.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,37 @@
python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-8B --location NousResearch/Hermes-3-Llama-3.1-8B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-70B --location NousResearch/Hermes-3-Llama-3.1-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_nemotron_70B_instruct_hf --location nvidia/Llama-3.1-Nemotron-70B-Instruct-HF --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-athene_70b --location Nexusflow/Athene-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_supernova_lite --location arcee-ai/Llama-3.1-SuperNova-Lite --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-gemma_2_9b_it_simpo --location princeton-nlp/gemma-2-9b-it-SimPO --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-magpielm_8b_chat_v0_1 --location Magpie-Align/MagpieLM-8B-Chat-v0.1 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_72b_instruct --location Qwen/Qwen2.5-72B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_7b_instruct --location Qwen/Qwen2.5-7B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-ministral_8b_instruct_2410 --location mistralai/Ministral-8B-Instruct-2410 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-llama_3_tulu_2_dpo_70b --location allenai/llama-3-tulu-2-dpo-70b --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals --oe_eval_max_length 2048
python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-8B --location NousResearch/Hermes-3-Llama-3.1-8B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-70B --location NousResearch/Hermes-3-Llama-3.1-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_nemotron_70B_instruct_hf --location nvidia/Llama-3.1-Nemotron-70B-Instruct-HF --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-athene_70b --location Nexusflow/Athene-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_supernova_lite --location arcee-ai/Llama-3.1-SuperNova-Lite --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-gemma_2_9b_it_simpo --location princeton-nlp/gemma-2-9b-it-SimPO --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-magpielm_8b_chat_v0_1 --location Magpie-Align/MagpieLM-8B-Chat-v0.1 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_72b_instruct --location Qwen/Qwen2.5-72B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_7b_instruct --location Qwen/Qwen2.5-7B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-ministral_8b_instruct_2410 --location mistralai/Ministral-8B-Instruct-2410 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals
python scripts/submit_eval_jobs.py --model_name hf-llama_3_tulu_2_dpo_70b --location allenai/llama-3-tulu-2-dpo-70b --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --skip_oi_evals --oe_eval_max_length 2048

# WITH SAFETY
# python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-8B --location NousResearch/Hermes-3-Llama-3.1-8B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-70B --location NousResearch/Hermes-3-Llama-3.1-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_nemotron_70B_instruct_hf --location nvidia/Llama-3.1-Nemotron-70B-Instruct-HF --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-athene_70b --location Nexusflow/Athene-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_supernova_lite --location arcee-ai/Llama-3.1-SuperNova-Lite --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-gemma_2_9b_it_simpo --location princeton-nlp/gemma-2-9b-it-SimPO --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-magpielm_8b_chat_v0_1 --location Magpie-Align/MagpieLM-8B-Chat-v0.1 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_72b_instruct --location Qwen/Qwen2.5-72B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_7b_instruct --location Qwen/Qwen2.5-7B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-ministral_8b_instruct_2410 --location mistralai/Ministral-8B-Instruct-2410 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-llama_3_tulu_2_dpo_70b --location allenai/llama-3-tulu-2-dpo-70b --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_oe_eval_experiments --run_safety_evaluations --skip_oi_evals --oe_eval_max_length 2048

# SAFETY ONLY
# python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-8B --location NousResearch/Hermes-3-Llama-3.1-8B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-NousResearch-Hermes-3-Llama-3.1-70B --location NousResearch/Hermes-3-Llama-3.1-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_nemotron_70B_instruct_hf --location nvidia/Llama-3.1-Nemotron-70B-Instruct-HF --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-athene_70b --location Nexusflow/Athene-70B --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-llama_3_1_supernova_lite --location arcee-ai/Llama-3.1-SuperNova-Lite --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-gemma_2_9b_it_simpo --location princeton-nlp/gemma-2-9b-it-SimPO --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-magpielm_8b_chat_v0_1 --location Magpie-Align/MagpieLM-8B-Chat-v0.1 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_72b_instruct --location Qwen/Qwen2.5-72B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-qwen2_5_7b_instruct --location Qwen/Qwen2.5-7B-Instruct --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-ministral_8b_instruct_2410 --location mistralai/Ministral-8B-Instruct-2410 --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals
# python scripts/submit_eval_jobs.py --model_name hf-llama_3_tulu_2_dpo_70b --location allenai/llama-3-tulu-2-dpo-70b --is_tuned --workspace tulu-3-results --preemptible --use_hf_tokenizer_template --beaker_image nathanl/open_instruct_auto --upload_to_hf allenai/tulu-3-evals --run_safety_evaluations --skip_oi_evals --oe_eval_max_length 2048
4 changes: 2 additions & 2 deletions scripts/submit_eval_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
def adjust_batch_size(task_spec, model_name, batch_size_reduction):
"Adjust batch size using heuristics that are good for A100-size GPUs."
reduce_by_2 = ["13B"]
reduce_by_4 = ["30B", "34B", "40B", "65B", "70B", "70b", "72B"]
reduce_by_4 = ["30B", "34B", "40B", "65B", "70B", "70b", "72B", "72b"]
# If not given, choose a value based on the model name.
if batch_size_reduction is None:
if any([pattern in model_name for pattern in reduce_by_2]):
Expand All @@ -37,7 +37,7 @@ def adjust_batch_size(task_spec, model_name, batch_size_reduction):
def adjust_gpus(task_spec, experiment_group, model_name, gpu_multiplier):
"Adjust GPU count using heuristics that are good for A100-size GPUs."
medium = ["30B", "34B"]
large = ["40B", "65B", "70B", "70b", "72B"]
large = ["40B", "65B", "70B", "70b", "72B", "72b"]
# If not given, choose a value based on model name.
if gpu_multiplier is None:
if any([pattern in model_name for pattern in medium]):
Expand Down

0 comments on commit 456aa32

Please sign in to comment.