Bump to v0.1.0 and r24.09

triton-inference-server · Oct 25, 2024 · 6b4597e · 6b4597e
1 parent c73d78d
commit 6b4597e
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -22,8 +22,8 @@ and running the CLI from within the latest corresponding `tritonserver`
 container image, which should have all necessary system dependencies installed.
 
 For vLLM and TRT-LLM, you can use their respective images:
-- `nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3`
-- `nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3`
+- `nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3`
+- `nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3`
 
 If you decide to run the CLI on the host or in a custom image, please
 see this list of [additional dependencies](#additional-dependencies-for-custom-environments)
@@ -38,13 +38,14 @@ matrix below:
 
 | Triton CLI Version | TRT-LLM Version | Triton Container Tag |
 |:------------------:|:---------------:|:--------------------:|
+| 0.1.0  | v0.13.0 | 24.09 |
 | 0.0.11 | v0.12.0 | 24.08 |
 | 0.0.10 | v0.11.0 | 24.07 |
-| 0.0.9 | v0.10.0 | 24.06 |
-| 0.0.8 | v0.9.0 | 24.05 |
-| 0.0.7 | v0.9.0 | 24.04 |
-| 0.0.6 | v0.8.0 | 24.02, 24.03 |
-| 0.0.5 | v0.7.1 | 24.01 |
+| 0.0.9  | v0.10.0 | 24.06 |
+| 0.0.8  | v0.9.0  | 24.05 |
+| 0.0.7  | v0.9.0  | 24.04 |
+| 0.0.6  | v0.8.0  | 24.02, 24.03 |
+| 0.0.5  | v0.7.1  | 24.01 |
 
 ### Install from GitHub
 
@@ -58,7 +59,7 @@ It is also possible to install from a specific branch name, a commit hash
 or a tag name. For example to install `triton_cli` with a specific tag:
 
 ```bash
-GIT_REF="0.0.11"
+GIT_REF="0.1.0"
 pip install git+https://github.com/triton-inference-server/triton_cli.git@${GIT_REF}
 ```
 
@@ -93,7 +94,7 @@ triton -h
 triton import -m gpt2
 
 # Start server pointing at the default model repository
-triton start --image nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3
+triton start --image nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3
 
 # Infer with CLI
 triton infer -m gpt2 --prompt "machine learning is"
@@ -173,10 +174,10 @@ docker run -ti \
   --shm-size=1g --ulimit memlock=-1 \
   -v ${HOME}/models:/root/models \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-  nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3
+  nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3
 
 # Install the Triton CLI
-pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.11
+pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.0
 
 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3
 huggingface-cli login
@@ -238,10 +239,10 @@ docker run -ti \
   -v /tmp:/tmp \
   -v ${HOME}/models:/root/models \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-  nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3
+  nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3
 
 # Install the Triton CLI
-pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.11
+pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.0
 
 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3
 huggingface-cli login

diff --git a/pyproject.toml b/pyproject.toml
@@ -50,7 +50,7 @@ dependencies = [
     "grpcio>=1.65.5",
     "directory-tree == 0.0.4", # may remove in future
     "docker == 6.1.3",
-    "genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r24.08#subdirectory=genai-perf",
+    "genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r24.09#subdirectory=genai-perf",
     # TODO: rely on tritonclient to pull in protobuf and numpy dependencies?
     "numpy >=1.21,<2",
     "protobuf>=3.7.0",
@@ -59,7 +59,7 @@ dependencies = [
     "rich == 13.5.2",
     # TODO: Test on cpu-only machine if [cuda] dependency is an issue,
     # Use explicit client version matching genai-perf version for tagged release
-    "tritonclient[all] == 2.49",
+    "tritonclient[all] == 2.50",
     "huggingface-hub >= 0.19.4",
     # Testing
     "pytest >= 8.1.1", # may remove later

diff --git a/src/triton_cli/__init__.py b/src/triton_cli/__init__.py
@@ -24,4 +24,4 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-__version__ = "0.1.0dev"
+__version__ = "0.1.0"
diff --git a/src/triton_cli/docker/Dockerfile b/src/triton_cli/docker/Dockerfile
@@ -1,9 +1,9 @@
 # TRT-LLM image contains engine building and runtime dependencies
-FROM nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3
+FROM nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3
 
 # Setup vLLM Triton backend
 RUN mkdir -p /opt/tritonserver/backends/vllm && \
-    git clone -b r24.08 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \
+    git clone -b r24.09 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \
     cp -r /tmp/vllm_backend/src/* /opt/tritonserver/backends/vllm && \
     rm -r /tmp/vllm_backend