From 936e400ad71e802ae5e69e7bd062377f1d1a6619 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Thu, 8 Aug 2024 15:43:20 -0600 Subject: [PATCH] feat: allow long max seq length Signed-off-by: Travis Johnson --- Dockerfile.ubi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 53a6c90ecfbf9..39ee4a63c84a4 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -183,6 +183,10 @@ RUN --mount=type=cache,target=/root/.cache/pip \ ENV HF_HUB_OFFLINE=1 \ PORT=8000 \ HOME=/home/vllm \ + # Allow requested max length to exceed what is extracted from the + # config.json + # see: https://github.com/vllm-project/vllm/pull/7080 + VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ VLLM_USAGE_SOURCE=production-docker-image \ VLLM_WORKER_MULTIPROC_METHOD=fork