diff --git a/README.md b/README.md index 0233f59..0514105 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ and running the CLI from within the latest corresponding `tritonserver` container image, which should have all necessary system dependencies installed. For vLLM and TRT-LLM, you can use their respective images: -- `nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3` -- `nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3` +- `nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3` +- `nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3` If you decide to run the CLI on the host or in a custom image, please see this list of [additional dependencies](#additional-dependencies-for-custom-environments) @@ -38,13 +38,14 @@ matrix below: | Triton CLI Version | TRT-LLM Version | Triton Container Tag | |:------------------:|:---------------:|:--------------------:| +| 0.1.0 | v0.13.0 | 24.09 | | 0.0.11 | v0.12.0 | 24.08 | | 0.0.10 | v0.11.0 | 24.07 | -| 0.0.9 | v0.10.0 | 24.06 | -| 0.0.8 | v0.9.0 | 24.05 | -| 0.0.7 | v0.9.0 | 24.04 | -| 0.0.6 | v0.8.0 | 24.02, 24.03 | -| 0.0.5 | v0.7.1 | 24.01 | +| 0.0.9 | v0.10.0 | 24.06 | +| 0.0.8 | v0.9.0 | 24.05 | +| 0.0.7 | v0.9.0 | 24.04 | +| 0.0.6 | v0.8.0 | 24.02, 24.03 | +| 0.0.5 | v0.7.1 | 24.01 | ### Install from GitHub @@ -58,7 +59,7 @@ It is also possible to install from a specific branch name, a commit hash or a tag name. For example to install `triton_cli` with a specific tag: ```bash -GIT_REF="0.0.11" +GIT_REF="0.1.0" pip install git+https://github.com/triton-inference-server/triton_cli.git@${GIT_REF} ``` @@ -93,7 +94,7 @@ triton -h triton import -m gpt2 # Start server pointing at the default model repository -triton start --image nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3 +triton start --image nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3 # Infer with CLI triton infer -m gpt2 --prompt "machine learning is" @@ -173,10 +174,10 @@ docker run -ti \ --shm-size=1g --ulimit memlock=-1 \ -v ${HOME}/models:/root/models \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3 + nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3 # Install the Triton CLI -pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.11 +pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.0 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3 huggingface-cli login @@ -238,10 +239,10 @@ docker run -ti \ -v /tmp:/tmp \ -v ${HOME}/models:/root/models \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3 + nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3 # Install the Triton CLI -pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.11 +pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.0 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3 huggingface-cli login diff --git a/pyproject.toml b/pyproject.toml index 080aa17..c68606c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ dependencies = [ "grpcio>=1.65.5", "directory-tree == 0.0.4", # may remove in future "docker == 6.1.3", - "genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r24.08#subdirectory=genai-perf", + "genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r24.09#subdirectory=genai-perf", # TODO: rely on tritonclient to pull in protobuf and numpy dependencies? "numpy >=1.21,<2", "protobuf>=3.7.0", @@ -59,7 +59,7 @@ dependencies = [ "rich == 13.5.2", # TODO: Test on cpu-only machine if [cuda] dependency is an issue, # Use explicit client version matching genai-perf version for tagged release - "tritonclient[all] == 2.49", + "tritonclient[all] == 2.50", "huggingface-hub >= 0.19.4", # Testing "pytest >= 8.1.1", # may remove later diff --git a/src/triton_cli/__init__.py b/src/triton_cli/__init__.py index 4758a14..dae273d 100644 --- a/src/triton_cli/__init__.py +++ b/src/triton_cli/__init__.py @@ -24,4 +24,4 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "0.1.0dev" +__version__ = "0.1.0" diff --git a/src/triton_cli/docker/Dockerfile b/src/triton_cli/docker/Dockerfile index 7bc4c87..4701f21 100644 --- a/src/triton_cli/docker/Dockerfile +++ b/src/triton_cli/docker/Dockerfile @@ -1,9 +1,9 @@ # TRT-LLM image contains engine building and runtime dependencies -FROM nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3 +FROM nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3 # Setup vLLM Triton backend RUN mkdir -p /opt/tritonserver/backends/vllm && \ - git clone -b r24.08 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \ + git clone -b r24.09 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \ cp -r /tmp/vllm_backend/src/* /opt/tritonserver/backends/vllm && \ rm -r /tmp/vllm_backend