Skip to content

Commit

Permalink
Merge branch 'main' of github.com:triton-inference-server/server into…
Browse files Browse the repository at this point in the history
… rmccormick-openai
  • Loading branch information
rmccorm4 committed Oct 10, 2024
2 parents 44b2282 + fde6e58 commit dc7bdf4
Show file tree
Hide file tree
Showing 137 changed files with 5,490 additions and 663 deletions.
12 changes: 11 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -72,3 +72,13 @@ repos:
- id: mixed-line-ending
- id: requirements-txt-fixer
- id: trailing-whitespace

- repo: local
hooks:
- id: add-license
name: Add License
entry: python tools/add_copyright.py
language: python
stages: [pre-commit]
verbose: true
require_serial: true
16 changes: 6 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,13 @@ FetchContent_Declare(

# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
set (LIB_DIR "lib")
# /etc/os-release does not exist on Windows
if(EXISTS "/etc/os-release")
file(STRINGS /etc/os-release DISTRO REGEX "^NAME=")
string(REGEX REPLACE "NAME=\"(.*)\"" "\\1" DISTRO "${DISTRO}")
message(STATUS "Distro Name: ${DISTRO}")
if(DISTRO MATCHES "CentOS.*")
set(LIB_DIR "lib")
if(LINUX)
file(STRINGS "/etc/os-release" DISTRO_ID_LIKE REGEX "ID_LIKE")
if(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
set (LIB_DIR "lib64")
endif()
endif()

endif(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
endif(LINUX)
set(TRITON_CORE_HEADERS_ONLY OFF)

FetchContent_MakeAvailable(repo-third-party repo-core)
Expand Down
10 changes: 10 additions & 0 deletions Dockerfile.QA
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ RUN cp -r qa/L0_decoupled/models qa/L0_decoupled/python_models/ && \
cp /workspace/tritonbuild/python/examples/decoupled/square_config.pbtxt \
qa/L0_decoupled/python_models/square_int32/.

RUN mkdir -p qa/L0_decoupled_grpc_error && \
cp -r qa/L0_decoupled/. qa/L0_decoupled_grpc_error

RUN mkdir -p qa/L0_grpc_error_state_cleanup && \
cp -r qa/L0_grpc_state_cleanup/. qa/L0_grpc_error_state_cleanup

RUN mkdir -p qa/L0_repoagent_checksum/models/identity_int32/1 && \
cp tritonbuild/identity/install/backends/identity/libtriton_identity.so \
qa/L0_repoagent_checksum/models/identity_int32/1/.
Expand Down Expand Up @@ -384,6 +390,10 @@ RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \
RUN find qa/pkgs/ -maxdepth 1 -type f -name \
"tritonserver-*.whl" | xargs -I {} pip3 install --upgrade {}[all]

# Install Triton Frontend Python API
RUN find qa/pkgs/ -type f -name \
"tritonfrontend-*.whl" | xargs -I {} pip3 install --upgrade {}[all]

ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}

# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile.sdk
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
#

# Base image on the minimum Triton container
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.07-py3-min
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.09-py3-min

ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_CLIENT_REPO_TAG=main
Expand Down Expand Up @@ -217,6 +218,7 @@ WORKDIR /workspace
COPY TRITON_VERSION .
COPY NVIDIA_Deep_Learning_Container_License.pdf .
COPY --from=sdk_build /workspace/client/ client/
COPY --from=sdk_build /workspace/perf_analyzer/ perf_analyzer/
COPY --from=sdk_build /workspace/install/ install/
RUN cd install && \
export VERSION=`cat /workspace/TRITON_VERSION` && \
Expand Down
18 changes: 9 additions & 9 deletions Dockerfile.win10.min
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ RUN choco install unzip -y
#
# Installing TensorRT
#
ARG TENSORRT_VERSION=10.2.0.19
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.5.zip"
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/zip/TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5.zip
ARG TENSORRT_VERSION=10.4.0.26
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows.win10.cuda-12.6.zip"
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
RUN unzip /tmp/%TENSORRT_ZIP%
Expand All @@ -51,9 +51,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
#
# Installing cuDNN
#
ARG CUDNN_VERSION=9.2.1.18
ARG CUDNN_VERSION=9.4.0.58
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.2.1.18_cuda12-archive.zip
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.4.0.58_cuda12-archive.zip
ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
RUN unzip /tmp/%CUDNN_ZIP%
RUN move cudnn-* cudnn
Expand Down Expand Up @@ -101,14 +101,14 @@ LABEL CMAKE_VERSION=${CMAKE_VERSION}
#
# Installing Visual Studio BuildTools: VS17 2022
#
ARG BUILDTOOLS_VERSION=17.9.34622.214
ARG BUILDTOOLS_VERSION=17.10.35201.131
# Download collect.exe in case of an install failure.
ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"

# Use the latest release channel. For more control, specify the location of an internal layout.
# Download the Build Tools bootstrapper.
# ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/5e7b923b-7d89-4e14-95b8-a84ab168e243/96b21d216c7954aaf606c6d7ba59a3de991884a8a86c578c767ba349c23188a9/vs_BuildTools.exe
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/28626b4b-f88f-4b55-a0cf-f3eaa2c643fb/e6c43d4dfb36338d954cdb3ad9010ab2a479e712088f4f6b016eadcc721bab28/vs_BuildTools.exe
ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
ARG VS_INSTALL_PATH_WP="C:\BuildTools"
Expand Down Expand Up @@ -175,15 +175,15 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi

RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"

ARG CUDNN_VERSION=9.2.1.18
ARG CUDNN_VERSION=9.4.0.58
ENV CUDNN_VERSION ${CUDNN_VERSION}
COPY --from=dependency_base /cudnn /cudnn
RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
LABEL CUDNN_VERSION="${CUDNN_VERSION}"

ARG TENSORRT_VERSION=10.2.0.19
ARG TENSORRT_VERSION=10.4.0.26
ENV TRT_VERSION ${TENSORRT_VERSION}
COPY --from=dependency_base /TensorRT /TensorRT
RUN setx PATH "c:\TensorRT\lib;%PATH%"
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Expand Down
17 changes: 8 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@

[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)

[!WARNING]

##### LATEST RELEASE
You are currently on the `main` branch which tracks under-development progress towards the next release.
The current release is version [2.48.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.07 container release on NVIDIA GPU Cloud (NGC).
>[!WARNING]
>You are currently on the `main` branch which tracks under-development progress
>towards the next release. The current release is version [2.50.0](https://github.com/triton-inference-server/server/releases/latest)
>and corresponds to the 24.09 container release on NVIDIA GPU Cloud (NGC).
Triton Inference Server is an open source inference serving software that
streamlines AI inferencing. Triton enables teams to deploy any AI model from
Expand Down Expand Up @@ -92,16 +91,16 @@ Inference Server with the

```bash
# Step 1: Create the example model repository
git clone -b r24.07 https://github.com/triton-inference-server/server.git
git clone -b r24.09 https://github.com/triton-inference-server/server.git
cd server/docs/examples
./fetch_models.sh

# Step 2: Launch triton from the NGC Triton container
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.07-py3 tritonserver --model-repository=/models
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.09-py3 tritonserver --model-repository=/models

# Step 3: Sending an Inference Request
# In a separate console, launch the image_client example from the NGC Triton SDK container
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.07-py3-sdk
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.09-py3-sdk
/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg

# Inference should return the following
Expand Down Expand Up @@ -179,7 +178,7 @@ configuration](docs/user_guide/model_configuration.md) for the model.
[Backend-Platform Support Matrix](https://github.com/triton-inference-server/backend/blob/main/docs/backend_platform_support_matrix.md)
to learn which backends are supported on your target platform.
- Learn how to [optimize performance](docs/user_guide/optimization.md) using the
[Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
[Performance Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
and
[Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
- Learn how to [manage loading and unloading models](docs/user_guide/model_management.md) in
Expand Down
2 changes: 1 addition & 1 deletion TRITON_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.49.0dev
2.51.0dev
Loading

0 comments on commit dc7bdf4

Please sign in to comment.