From b361484b24e6586aea4d97eeac0fa158d0170c80 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Tue, 20 Aug 2024 12:25:56 -0600 Subject: [PATCH 1/2] fix: enable logprobs during spec decoding by default Signed-off-by: Travis Johnson --- Dockerfile.ubi | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index e185ac549f513..5308d690015c0 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -181,7 +181,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \ uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.2/flashinfer-0.1.2+cu121torch2.4-cp311-cp311-linux_x86_64.whl ENV HF_HUB_OFFLINE=1 \ - PORT=8000 \ HOME=/home/vllm \ # Allow requested max length to exceed what is extracted from the # config.json @@ -208,6 +207,13 @@ USER root RUN --mount=type=cache,target=/root/.cache/pip \ pip install vllm-tgis-adapter==0.3.0 -ENV GRPC_PORT=8033 +ENV GRPC_PORT=8033 \ + PORT=8000 \ + # As an optimization, vLLM disables logprobs when using spec decoding by + # default, but this would be unexpected to users of a hosted model that + # happens to have spec decoding + # see: https://github.com/vllm-project/vllm/pull/6485 + DISABLE_LOGPROBS_DURING_SPEC_DECODING=false + USER 2000 ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] From b0e81ce31581cb6ecdd8b1230ad78b6274dee35b Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Wed, 21 Aug 2024 21:42:24 +0530 Subject: [PATCH 2/2] deps: bump vllm-tgis-adapter to 0.4.0 (#132) [changelog for 0.4.0](https://github.com/opendatahub-io/vllm-tgis-adapter/releases/tag/0.4.0) https://issues.redhat.com/browse/RHOAIENG-11591 --- Dockerfile.ubi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 5308d690015c0..3019951df1177 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -205,7 +205,7 @@ FROM vllm-openai as vllm-grpc-adapter USER root RUN --mount=type=cache,target=/root/.cache/pip \ - pip install vllm-tgis-adapter==0.3.0 + pip install vllm-tgis-adapter==0.4.0 ENV GRPC_PORT=8033 \ PORT=8000 \