Merge pull request #4 from backend-developers-ltd/push

push image
backend-developers-ltd · Sep 18, 2024 · 3a1cb0c · 3a1cb0c
2 parents ddcc0a9 + 69f258c
commit 3a1cb0c
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 8 deletions.
diff --git a/.github/workflows/build_push_image.yml b/.github/workflows/build_push_image.yml
@@ -2,7 +2,9 @@ name: "CD: build & push image"
 
 on:
   push:
-    branches: [build-image]
+    branches:
+      - build-push-llama3-image
+      - build-push-phi3-image
   workflow_dispatch:
 
 env:
@@ -12,7 +14,7 @@ env:
 
 jobs:
   deploy:
-    timeout-minutes: 15
+    timeout-minutes: 30
     runs-on:
       group: bulkier
     steps:
@@ -31,13 +33,22 @@ jobs:
         run: | 
           python -m pip install transformers torch
 
+      - name: Set environment variables based on branch
+        run: |
+          if [[ "${{ github.ref }}" == "refs/heads/build-push-llama3-image" ]]; then
+            echo "MODEL_NAME=llama3" >> $GITHUB_ENV
+          elif [[ "${{ github.ref }}" == "refs/heads/build-push-phi3-image" ]]; then
+            echo "MODEL_NAME=phi3" >> $GITHUB_ENV
+          fi
+
       - name: Docker build and push
         run: |
-          df -h
-          IMAGE_NAME="${DOCKER_REPO_NAME}:${TAG_VERSION}"
+          IMAGE_NAME="${DOCKER_REPO_NAME}-{MODEL_NAME}:${TAG_VERSION}"
 
           cd src/compute_horde_prompt_gen
 
-          python download_model.py --model_name phi3 --huggingface_token "${{ secrets.HUGGINGFACE_API_KEY }}"
+          python download_model.py --model_name ${{ env.MODEL_NAME }} --huggingface_token "${{ secrets.HUGGINGFACE_API_KEY }}"
 
           docker build -t $IMAGE_NAME .
+
+          docker push $IMAGE_NAME
diff --git a/src/compute_horde_prompt_gen/Dockerfile b/src/compute_horde_prompt_gen/Dockerfile
@@ -18,7 +18,7 @@ RUN mkdir /output
 
 # Copy your Python script into the container
 COPY saved_models/ /app/saved_models/
-COPY *.py .
+COPY *.py ./
 
 # Set the entrypoint to run your script
 ENTRYPOINT ["python3", "run.py"]
diff --git a/src/compute_horde_prompt_gen/download_model.py b/src/compute_horde_prompt_gen/download_model.py
@@ -31,16 +31,35 @@
         default="./saved_models/",
         help="Path to save the model and tokenizer to",
     )
+    parser.add_argument(
+        "--quantize",
+        action="store_true",
+        help="Quantize the model",
+        default=False,
+    )
 
     args = parser.parse_args()
     save_path = os.path.join(args.save_path, args.model_name)
     model_name = MODEL_PATHS[args.model_name]
     print(f"Saving {model_name} model to {save_path}")
 
+    quantization_config = None
+    if args.quantize:
+        import torch
+        from transformers import BitsAndBytesConfig
+
+        quantization_config = BitsAndBytesConfig(
+            llm_int8_enable_fp32_cpu_offload=False,
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+        )
+        print("using quantized model")
+
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         # either give token directly or assume logged in with huggingface-cli
         token=args.huggingface_token or True,
+        quantization_config=quantization_config,
     )
     model.save_pretrained(save_path)
 

diff --git a/src/compute_horde_prompt_gen/model.py b/src/compute_horde_prompt_gen/model.py
@@ -15,22 +15,22 @@ def __init__(self):
         pass
 
     def generate(self, prompts: list[str], num_return_sequences: int, **_kwargs):
-        content = f"Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n"
+        content = "Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n"
         return [content for _ in range(len(prompts) * num_return_sequences)]
 
 
 class GenerativeModel:
     def __init__(self, model_path: str, quantize: bool = False):
         self.input_prompt_ending = None
 
-        import torch
         from transformers import (
             AutoTokenizer,
             AutoModelForCausalLM,
         )
 
         quantization_config = None
         if quantize:
+            import torch
             from transformers import BitsAndBytesConfig
 
             quantization_config = BitsAndBytesConfig(