-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
so we can store ports, hosts, paths in one place and just reference it also simplifies the Containerfile there are no models being downloaded anymore, you need to supply them using $MODELS_PATH - this directory is bindmounted inside the llama-cpp container Signed-off-by: Tomas Tomecek <[email protected]> Co-authored-by: Jiri Podivin <[email protected]> Co-authored-by: Jiri Konecny <[email protected]>
- Loading branch information
1 parent
a794311
commit c9c4425
Showing
4 changed files
with
35 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
LLAMA_CPP_SERVER_PORT=8090 | ||
LLAMA_CPP_HOST=llama-cpp-server | ||
LLAMA_CPP_SERVER=http://$LLAMA_CPP_HOST | ||
LOGDETECTIVE_SERVER_PORT=8080 | ||
MODEL_FILEPATH=/models/mistral-7b-instruct-v0.2.Q4_K_S.gguf | ||
# for some reason, fastapi cripples sys.path and some deps cannot be found | ||
PYTHONPATH=/src:/usr/local/lib64/python3.12/site-packages:/usr/lib64/python312.zip:/usr/lib64/python3.12/:/usr/lib64/python3.12/lib-dynload:/usr/local/lib/python3.12/site-packages:/usr/lib64/python3.12/site-packages:/usr/lib/python3.12/site-packages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,16 @@ | ||
FROM fedora:40 | ||
# Fedora's llama-cpp-python is segfaulting on the mistral model we use :/ | ||
RUN dnf install -y fastapi-cli python3-fastapi python3-requests python3-drain3 python3-pip python3-pydantic-settings python3-starlette+full \ | ||
&& pip3 install sse-starlette starlette-context huggingface_hub[cli] \ | ||
gcc gcc-c++ python3-scikit-build git-core \ | ||
&& dnf clean all | ||
# the newest 0.2.86 fails to build, it seems vendored llama-cpp is missing in the archive | ||
RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context \ | ||
&& mkdir /src | ||
|
||
# we need to bind mount models: this takes a lot of time to download and makes the image huge | ||
RUN mkdir /models \ | ||
&& huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir /models --local-dir-use-symlinks False | ||
|
||
# Fedora's llama-cpp-python is segfaulting on the mistral model above :/ | ||
RUN dnf install -y gcc gcc-c++ python3-scikit-build \ | ||
&& pip3 install -U llama_cpp_python | ||
# uncomment below if you need to download the model, otherwise just bindmount your local | ||
# models inside the container | ||
# RUN pip3 install huggingface_hub[cli] \ | ||
# && mkdir /models \ | ||
# && huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir /models --local-dir-use-symlinks False | ||
|
||
COPY ./logdetective/ /src/logdetective/ | ||
|
||
# --no-reload: doesn't work in a container - `PermissionError: Permission denied (os error 13) about ["/proc"]` | ||
CMD ["fastapi", "dev", "/src/logdetective/server.py", "--host", "0.0.0.0", "--port", "8080", "--no-reload"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters