Merge branch 'main' into prompt-test

opendatahub-io · Aug 23, 2024 · c8ec205 · c8ec205
2 parents 3438634 + b0e81ce
commit c8ec205
Showing 1 changed file with 9 additions and 3 deletions.
diff --git a/Dockerfile.ubi b/Dockerfile.ubi
@@ -185,7 +185,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     uv pip install peft==0.12.0
 
 ENV HF_HUB_OFFLINE=1 \
-    PORT=8000 \
     HOME=/home/vllm \
     # Allow requested max length to exceed what is extracted from the
     # config.json
@@ -210,8 +209,15 @@ FROM vllm-openai as vllm-grpc-adapter
 USER root
 
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install vllm-tgis-adapter==0.3.0
+    pip install vllm-tgis-adapter==0.4.0
+
+ENV GRPC_PORT=8033 \
+    PORT=8000 \
+    # As an optimization, vLLM disables logprobs when using spec decoding by
+    # default, but this would be unexpected to users of a hosted model that
+    # happens to have spec decoding
+    # see: https://github.com/vllm-project/vllm/pull/6485
+    DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
 
-ENV GRPC_PORT=8033
 USER 2000
 ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]