diff --git a/Dockerfile.ubi b/Dockerfile.ubi index c03fea4c093c9..8858969c0312a 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -185,7 +185,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \ uv pip install peft==0.12.0 ENV HF_HUB_OFFLINE=1 \ - PORT=8000 \ HOME=/home/vllm \ # Allow requested max length to exceed what is extracted from the # config.json @@ -210,8 +209,15 @@ FROM vllm-openai as vllm-grpc-adapter USER root RUN --mount=type=cache,target=/root/.cache/pip \ - pip install vllm-tgis-adapter==0.3.0 + pip install vllm-tgis-adapter==0.4.0 + +ENV GRPC_PORT=8033 \ + PORT=8000 \ + # As an optimization, vLLM disables logprobs when using spec decoding by + # default, but this would be unexpected to users of a hosted model that + # happens to have spec decoding + # see: https://github.com/vllm-project/vllm/pull/6485 + DISABLE_LOGPROBS_DURING_SPEC_DECODING=false -ENV GRPC_PORT=8033 USER 2000 ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]