From 55607cd2485f96a7539a53d32ed050090ebd4047 Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Tue, 17 Sep 2024 10:05:19 +0100
Subject: [PATCH 1/2] quantized model

---
 src/compute_horde_prompt_gen/Dockerfile        |  2 +-
 src/compute_horde_prompt_gen/download_model.py | 18 ++++++++++++++++++
 src/compute_horde_prompt_gen/model.py          |  4 ++--
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/compute_horde_prompt_gen/Dockerfile b/src/compute_horde_prompt_gen/Dockerfile
index ca7b698..a504b03 100644
--- a/src/compute_horde_prompt_gen/Dockerfile
+++ b/src/compute_horde_prompt_gen/Dockerfile
@@ -18,7 +18,7 @@ RUN mkdir /output
 
 # Copy your Python script into the container
 COPY saved_models/ /app/saved_models/
-COPY *.py .
+COPY *.py ./
 
 # Set the entrypoint to run your script
 ENTRYPOINT ["python3", "run.py"]
diff --git a/src/compute_horde_prompt_gen/download_model.py b/src/compute_horde_prompt_gen/download_model.py
index 7516d76..0975e23 100644
--- a/src/compute_horde_prompt_gen/download_model.py
+++ b/src/compute_horde_prompt_gen/download_model.py
@@ -31,16 +31,34 @@
         default="./saved_models/",
         help="Path to save the model and tokenizer to",
     )
+    parser.add_argument(
+        "--quantize",
+        action="store_true",
+        help="Quantize the model",
+        default=False,
+    )
 
     args = parser.parse_args()
     save_path = os.path.join(args.save_path, args.model_name)
     model_name = MODEL_PATHS[args.model_name]
     print(f"Saving {model_name} model to {save_path}")
 
+    if args.quantize:
+        import torch
+        from transformers import BitsAndBytesConfig
+
+        quantization_config = BitsAndBytesConfig(
+            llm_int8_enable_fp32_cpu_offload=False,
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+        )
+        print("using quantized model")
+
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         # either give token directly or assume logged in with huggingface-cli
         token=args.huggingface_token or True,
+        quantization_config=quantization_config,
     )
     model.save_pretrained(save_path)
 
diff --git a/src/compute_horde_prompt_gen/model.py b/src/compute_horde_prompt_gen/model.py
index d68b513..43218f3 100644
--- a/src/compute_horde_prompt_gen/model.py
+++ b/src/compute_horde_prompt_gen/model.py
@@ -15,7 +15,7 @@ def __init__(self):
         pass
 
     def generate(self, prompts: list[str], num_return_sequences: int, **_kwargs):
-        content = f"Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n"
+        content = "Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n"
         return [content for _ in range(len(prompts) * num_return_sequences)]
 
 
@@ -23,7 +23,6 @@ class GenerativeModel:
     def __init__(self, model_path: str, quantize: bool = False):
         self.input_prompt_ending = None
 
-        import torch
         from transformers import (
             AutoTokenizer,
             AutoModelForCausalLM,
@@ -31,6 +30,7 @@ def __init__(self, model_path: str, quantize: bool = False):
 
         quantization_config = None
         if quantize:
+            import torch
             from transformers import BitsAndBytesConfig
 
             quantization_config = BitsAndBytesConfig(

From 69f258cac3bf0cbf40fa94482b30fba08cf60079 Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Tue, 17 Sep 2024 10:53:49 +0100
Subject: [PATCH 2/2] better build push

---
 .github/workflows/build_push_image.yml        | 21 ++++++++++++++-----
 .../download_model.py                         |  1 +
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build_push_image.yml b/.github/workflows/build_push_image.yml
index c38630c..5192dca 100644
--- a/.github/workflows/build_push_image.yml
+++ b/.github/workflows/build_push_image.yml
@@ -2,7 +2,9 @@ name: "CD: build & push image"
 
 on:
   push:
-    branches: [build-image]
+    branches:
+      - build-push-llama3-image
+      - build-push-phi3-image
   workflow_dispatch:
 
 env:
@@ -12,7 +14,7 @@ env:
 
 jobs:
   deploy:
-    timeout-minutes: 15
+    timeout-minutes: 30
     runs-on:
       group: bulkier
     steps:
@@ -31,13 +33,22 @@ jobs:
         run: | 
           python -m pip install transformers torch
 
+      - name: Set environment variables based on branch
+        run: |
+          if [[ "${{ github.ref }}" == "refs/heads/build-push-llama3-image" ]]; then
+            echo "MODEL_NAME=llama3" >> $GITHUB_ENV
+          elif [[ "${{ github.ref }}" == "refs/heads/build-push-phi3-image" ]]; then
+            echo "MODEL_NAME=phi3" >> $GITHUB_ENV
+          fi
+
       - name: Docker build and push
         run: |
-          df -h
-          IMAGE_NAME="${DOCKER_REPO_NAME}:${TAG_VERSION}"
+          IMAGE_NAME="${DOCKER_REPO_NAME}-{MODEL_NAME}:${TAG_VERSION}"
 
           cd src/compute_horde_prompt_gen
 
-          python download_model.py --model_name phi3 --huggingface_token "${{ secrets.HUGGINGFACE_API_KEY }}"
+          python download_model.py --model_name ${{ env.MODEL_NAME }} --huggingface_token "${{ secrets.HUGGINGFACE_API_KEY }}"
 
           docker build -t $IMAGE_NAME .
+
+          docker push $IMAGE_NAME
diff --git a/src/compute_horde_prompt_gen/download_model.py b/src/compute_horde_prompt_gen/download_model.py
index 0975e23..8e062b5 100644
--- a/src/compute_horde_prompt_gen/download_model.py
+++ b/src/compute_horde_prompt_gen/download_model.py
@@ -43,6 +43,7 @@
     model_name = MODEL_PATHS[args.model_name]
     print(f"Saving {model_name} model to {save_path}")
 
+    quantization_config = None
     if args.quantize:
         import torch
         from transformers import BitsAndBytesConfig