Skip to content

Commit

Permalink
Bump to v0.1.0 and r24.09
Browse files Browse the repository at this point in the history
  • Loading branch information
rmccorm4 committed Oct 25, 2024
1 parent c73d78d commit 6b4597e
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 18 deletions.
27 changes: 14 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ and running the CLI from within the latest corresponding `tritonserver`
container image, which should have all necessary system dependencies installed.

For vLLM and TRT-LLM, you can use their respective images:
- `nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3`
- `nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3`
- `nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3`
- `nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3`

If you decide to run the CLI on the host or in a custom image, please
see this list of [additional dependencies](#additional-dependencies-for-custom-environments)
Expand All @@ -38,13 +38,14 @@ matrix below:

| Triton CLI Version | TRT-LLM Version | Triton Container Tag |
|:------------------:|:---------------:|:--------------------:|
| 0.1.0 | v0.13.0 | 24.09 |
| 0.0.11 | v0.12.0 | 24.08 |
| 0.0.10 | v0.11.0 | 24.07 |
| 0.0.9 | v0.10.0 | 24.06 |
| 0.0.8 | v0.9.0 | 24.05 |
| 0.0.7 | v0.9.0 | 24.04 |
| 0.0.6 | v0.8.0 | 24.02, 24.03 |
| 0.0.5 | v0.7.1 | 24.01 |
| 0.0.9 | v0.10.0 | 24.06 |
| 0.0.8 | v0.9.0 | 24.05 |
| 0.0.7 | v0.9.0 | 24.04 |
| 0.0.6 | v0.8.0 | 24.02, 24.03 |
| 0.0.5 | v0.7.1 | 24.01 |

### Install from GitHub

Expand All @@ -58,7 +59,7 @@ It is also possible to install from a specific branch name, a commit hash
or a tag name. For example to install `triton_cli` with a specific tag:

```bash
GIT_REF="0.0.11"
GIT_REF="0.1.0"
pip install git+https://github.com/triton-inference-server/triton_cli.git@${GIT_REF}
```

Expand Down Expand Up @@ -93,7 +94,7 @@ triton -h
triton import -m gpt2

# Start server pointing at the default model repository
triton start --image nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3
triton start --image nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3

# Infer with CLI
triton infer -m gpt2 --prompt "machine learning is"
Expand Down Expand Up @@ -173,10 +174,10 @@ docker run -ti \
--shm-size=1g --ulimit memlock=-1 \
-v ${HOME}/models:/root/models \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
nvcr.io/nvidia/tritonserver:24.08-vllm-python-py3
nvcr.io/nvidia/tritonserver:24.09-vllm-python-py3

# Install the Triton CLI
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.11
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.0

# Authenticate with huggingface for restricted models like Llama-2 and Llama-3
huggingface-cli login
Expand Down Expand Up @@ -238,10 +239,10 @@ docker run -ti \
-v /tmp:/tmp \
-v ${HOME}/models:/root/models \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3
nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3

# Install the Triton CLI
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.11
pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.0

# Authenticate with huggingface for restricted models like Llama-2 and Llama-3
huggingface-cli login
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ dependencies = [
"grpcio>=1.65.5",
"directory-tree == 0.0.4", # may remove in future
"docker == 6.1.3",
"genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r24.08#subdirectory=genai-perf",
"genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r24.09#subdirectory=genai-perf",
# TODO: rely on tritonclient to pull in protobuf and numpy dependencies?
"numpy >=1.21,<2",
"protobuf>=3.7.0",
Expand All @@ -59,7 +59,7 @@ dependencies = [
"rich == 13.5.2",
# TODO: Test on cpu-only machine if [cuda] dependency is an issue,
# Use explicit client version matching genai-perf version for tagged release
"tritonclient[all] == 2.49",
"tritonclient[all] == 2.50",
"huggingface-hub >= 0.19.4",
# Testing
"pytest >= 8.1.1", # may remove later
Expand Down
2 changes: 1 addition & 1 deletion src/triton_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

__version__ = "0.1.0dev"
__version__ = "0.1.0"
4 changes: 2 additions & 2 deletions src/triton_cli/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# TRT-LLM image contains engine building and runtime dependencies
FROM nvcr.io/nvidia/tritonserver:24.08-trtllm-python-py3
FROM nvcr.io/nvidia/tritonserver:24.09-trtllm-python-py3

# Setup vLLM Triton backend
RUN mkdir -p /opt/tritonserver/backends/vllm && \
git clone -b r24.08 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \
git clone -b r24.09 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \
cp -r /tmp/vllm_backend/src/* /opt/tritonserver/backends/vllm && \
rm -r /tmp/vllm_backend

Expand Down

0 comments on commit 6b4597e

Please sign in to comment.