-
Notifications
You must be signed in to change notification settings - Fork 495
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Serve] Support headers in Readiness Probe (#3552)
* inti * probe_str remobve heades and delete env vars * remove header values in replica manager logging
- Loading branch information
Showing
4 changed files
with
81 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# service.yaml | ||
# The newly-added `service` section to the `serve-openai-api.yaml` file. | ||
service: | ||
# Specifying the path to the endpoint to check the readiness of the service. | ||
readiness_probe: | ||
path: /v1/models | ||
# Set authorization headers here if needed. | ||
headers: | ||
Authorization: Bearer $AUTH_TOKEN | ||
# How many replicas to manage. | ||
replicas: 1 | ||
|
||
# Fields below are the same with `serve-openai-api.yaml`. | ||
envs: | ||
MODEL_NAME: meta-llama/Llama-2-7b-chat-hf | ||
HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. | ||
AUTH_TOKEN: # TODO: Fill with your own auth token (a random string), or use --env to pass. | ||
|
||
resources: | ||
accelerators: {L4:1, A10G:1, A10:1, A100:1, A100-80GB:1} | ||
ports: 8000 | ||
|
||
setup: | | ||
conda activate vllm | ||
if [ $? -ne 0 ]; then | ||
conda create -n vllm python=3.10 -y | ||
conda activate vllm | ||
fi | ||
pip install transformers==4.38.0 | ||
pip install vllm==0.3.2 | ||
python -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')" | ||
run: | | ||
conda activate vllm | ||
echo 'Starting vllm openai api server...' | ||
python -m vllm.entrypoints.openai.api_server \ | ||
--model $MODEL_NAME --tokenizer hf-internal-testing/llama-tokenizer \ | ||
--host 0.0.0.0 --port 8000 --api-key $AUTH_TOKEN | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters