compose: utilize .env

so we can store ports, hosts, paths in one place and just reference it also simplifies the Containerfile there are no models being downloaded anymore, you need to supply them using $MODELS_PATH - this directory is bindmounted inside the llama-cpp container Signed-off-by: Tomas Tomecek <[email protected]> Co-authored-by: Jiri Podivin <[email protected]> Co-authored-by: Jiri Konecny <[email protected]>
fedora-copr · Aug 7, 2024 · c9c4425 · c9c4425
1 parent a794311
commit c9c4425
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 18 deletions.
diff --git a/.env b/.env
@@ -0,0 +1,7 @@
+LLAMA_CPP_SERVER_PORT=8090
+LLAMA_CPP_HOST=llama-cpp-server
+LLAMA_CPP_SERVER=http://$LLAMA_CPP_HOST
+LOGDETECTIVE_SERVER_PORT=8080
+MODEL_FILEPATH=/models/mistral-7b-instruct-v0.2.Q4_K_S.gguf
+# for some reason, fastapi cripples sys.path and some deps cannot be found
+PYTHONPATH=/src:/usr/local/lib64/python3.12/site-packages:/usr/lib64/python312.zip:/usr/lib64/python3.12/:/usr/lib64/python3.12/lib-dynload:/usr/local/lib/python3.12/site-packages:/usr/lib64/python3.12/site-packages:/usr/lib/python3.12/site-packages
diff --git a/Containerfile b/Containerfile
@@ -1,17 +1,16 @@
 FROM fedora:40
+# Fedora's llama-cpp-python is segfaulting on the mistral model we use :/
 RUN dnf install -y fastapi-cli python3-fastapi python3-requests python3-drain3 python3-pip python3-pydantic-settings python3-starlette+full \
-    && pip3 install sse-starlette starlette-context huggingface_hub[cli] \
+       gcc gcc-c++ python3-scikit-build git-core \
+    && dnf clean all
+# the newest 0.2.86 fails to build, it seems vendored llama-cpp is missing in the archive
+RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context \
     && mkdir /src
 
-# we need to bind mount models: this takes a lot of time to download and makes the image huge
-RUN mkdir /models \
-    && huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir /models --local-dir-use-symlinks False
-
-# Fedora's llama-cpp-python is segfaulting on the mistral model above :/
-RUN dnf install -y gcc gcc-c++ python3-scikit-build \
-    && pip3 install -U llama_cpp_python
+# uncomment below if you need to download the model, otherwise just bindmount your local
+# models inside the container
+# RUN pip3 install huggingface_hub[cli] \
+#     && mkdir /models \
+#     && huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir /models --local-dir-use-symlinks False
 
 COPY ./logdetective/ /src/logdetective/
-
-# --no-reload: doesn't work in a container - `PermissionError: Permission denied (os error 13) about ["/proc"]`
-CMD ["fastapi", "dev", "/src/logdetective/server.py", "--host", "0.0.0.0", "--port", "8080", "--no-reload"]
diff --git a/README.md b/README.md
@@ -163,6 +163,14 @@ Requests can then be made with post requests, for example:
 
 We also have a Containerfile and composefile to run the logdetective server and llama server in containers.
 
+Before doing `docker-compose up`, make sure to set `MODELS_PATH` environment variable and point to a directory with your local model files:
+```
+$ ll $MODELS_PATH
+-rw-r--r--. 1 tt tt 3.9G apr 10 17:18  mistral-7b-instruct-v0.2.Q4_K_S.gguf
+```
+
+If the variable is not set, `./models` is mounted inside by default.
+
 
 License
 -------

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -3,12 +3,15 @@ services:
   llama-cpp:
     build:
       context: .
-    hostname: llama-cpp-server
-    command: "python3 -m llama_cpp.server --model /models/mistral-7b-instruct-v0.2.Q4_K_M.gguf --host 0.0.0.0 --port 8090"
+    hostname: "${LLAMA_CPP_HOST}"
+    command: "python3 -m llama_cpp.server --model ${MODEL_FILEPATH} --host 0.0.0.0 --port ${LLAMA_CPP_SERVER_PORT}"
     stdin_open: true
     tty: true
+    env_file: .env
     ports:
-      - 8090:8090
+      - "${LLAMA_CPP_SERVER_PORT-8090}:${LLAMA_CPP_SERVER_PORT-8090}"
+    volumes:
+      - ${MODELS_PATH-./models}:/models:Z
   server:
     build:
       context: .
@@ -18,7 +21,7 @@ services:
     volumes:
       - .:/src/:z
     ports:
-      - 8080:8080
-    environment:
-      LLAMA_CPP_SERVER: "http://llama-cpp-server"
-      LLAMA_CPP_SERVER_PORT: "8090"
+      - ${LOGDETECTIVE_SERVER_PORT-8080}:${LOGDETECTIVE_SERVER_PORT-8080}
+    env_file: .env
+    # --no-reload: doesn't work in a container - `PermissionError: Permission denied (os error 13) about ["/proc"]`
+    command: fastapi dev /src/logdetective/server.py --host 0.0.0.0 --port $LOGDETECTIVE_SERVER_PORT --no-reload