Return to original model

Due to spread of results I cant set score to be within 6% margin for chat version of model.
HabanaAI · Oct 31, 2024 · 3635263 · 3635263
1 parent 3355ea2
commit 3635263
Showing 1 changed file with 4 additions and 4 deletions.
diff --git a/.jenkins/lm-eval-harness/configs/Llama-2-7B-hf.yaml b/.jenkins/lm-eval-harness/configs/Llama-2-7B-hf.yaml
@@ -1,14 +1,14 @@
 # These scores were chosen to place within 6% range of values achieved using  vLLM on HPU:
-# 0.192 - 0.220
+# 0.148 - 0.164
 # where on https://www.llama.com/llama2/: 0.146 is given
-model_name: "/mnt/weka/data/pytorch/llama2/Llama-2-7b-chat-hf"
+model_name: "/mnt/weka/data/pytorch/llama2/Llama-2-7b-hf"
 tasks:
 - name: "gsm8k"
   metrics:
   - name: "exact_match,strict-match"
-    value: 0.206
+    value: 0.155
   - name: "exact_match,flexible-extract"
-    value: 0.206
+    value: 0.155
 limit: 250
 num_fewshot: 5
 dtype: "bfloat16"