From f1b817f929a3319fd69395e1bce11f407fb6157d Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Wed, 13 Dec 2023 21:09:07 -0800 Subject: [PATCH 1/5] Basic MPI support --- src/triton_cli/parser.py | 5 +++++ src/triton_cli/server/server_docker.py | 29 ++++++++++++++++++++----- src/triton_cli/server/server_factory.py | 9 +++++++- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/triton_cli/parser.py b/src/triton_cli/parser.py index 3dedd63..0d26062 100644 --- a/src/triton_cli/parser.py +++ b/src/triton_cli/parser.py @@ -242,6 +242,11 @@ def parse_args_server(subcommands): default="nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3", help="Image to use when starting Triton with 'docker' mode", ) + server_start.add_argument( + "--trtllm", + action="store_true", + help="Whether the model repo contains a trtllm model.", + ) add_repo_args([server_start]) server_metrics = server_commands.add_parser( diff --git a/src/triton_cli/server/server_docker.py b/src/triton_cli/server/server_docker.py index 1ae49fe..bdeb976 100755 --- a/src/triton_cli/server/server_docker.py +++ b/src/triton_cli/server/server_docker.py @@ -29,7 +29,9 @@ class TritonServerDocker(TritonServer): triton in a docker container. """ - def __init__(self, image, config, gpus, mounts, labels, shm_size, args): + def __init__( + self, image, trtllm_model, config, gpus, mounts, labels, shm_size, args + ): """ Parameters ---------- @@ -60,6 +62,7 @@ def __init__(self, image, config, gpus, mounts, labels, shm_size, args): self._gpus = gpus self._shm_size = shm_size self._args = args if args else {} + self._trtllm_model = trtllm_model assert self._server_config[ "model-repository" @@ -105,11 +108,27 @@ def start(self, env=None): server_grpc_port: server_grpc_port, server_metrics_port: server_metrics_port, } - # Construct run command - command = " ".join( - env_cmds + ["tritonserver", self._server_config.to_cli_string()] - ) + # TRTLLM models require special handling. For now, + # we will 'spell-out' the command. + if self._trtllm_model: + command = " ".join( + [ + "mpirun", + "--allow-run-as-root", + "-n", + "1", + "tritonserver", + self._server_config.to_cli_string(), + "--backend-config=shm-region-prefix-name=prefix1_", + "--disable-auto-complete-config", + ] + ) + else: + command = " ".join( + env_cmds + ["tritonserver", self._server_config.to_cli_string()] + ) + try: # Run the docker container and run the command in the container self._tritonserver_container = self._docker_client.containers.run( diff --git a/src/triton_cli/server/server_factory.py b/src/triton_cli/server/server_factory.py index a44c962..c755841 100755 --- a/src/triton_cli/server/server_factory.py +++ b/src/triton_cli/server/server_factory.py @@ -38,6 +38,7 @@ class TritonServerFactory: @staticmethod def create_server_docker( image, + trtllm_model, config, gpus, mounts=None, @@ -50,6 +51,8 @@ def create_server_docker( ---------- image : str The tritonserver docker image to pull and run + image : bool + Whether the model repo contains a trtllm model config : TritonServerConfig the config object containing arguments for this server instance gpus : list of str @@ -71,6 +74,7 @@ def create_server_docker( return TritonServerDocker( image=image, + trtllm_model=trtllm_model, config=config, gpus=gpus, mounts=mounts, @@ -148,10 +152,13 @@ def _get_local_server_handle(config, gpus): def _get_docker_server_handle(config, gpus): triton_config = TritonServerConfig() triton_config["model-repository"] = os.path.abspath(config.model_repository) - + # Can only do this when assuming world_size=1 + if config.trtllm: + triton_config["backend-config"] = "shm-region-prefix-name=prefix1_" logger.info("Starting a Triton Server using docker") server = TritonServerFactory.create_server_docker( image=config.image, + trtllm_model=config.trtllm, config=triton_config, gpus=gpus, mounts=None, From bb78fbc4780de19b1f6bb803b533d08eb72148fb Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Wed, 13 Dec 2023 22:38:33 -0800 Subject: [PATCH 2/5] Cleanup --- src/triton_cli/server/server_docker.py | 3 ++- src/triton_cli/server/server_factory.py | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/triton_cli/server/server_docker.py b/src/triton_cli/server/server_docker.py index bdeb976..354c128 100755 --- a/src/triton_cli/server/server_docker.py +++ b/src/triton_cli/server/server_docker.py @@ -120,8 +120,9 @@ def start(self, env=None): "1", "tritonserver", self._server_config.to_cli_string(), - "--backend-config=shm-region-prefix-name=prefix1_", + "--backend-config=python,shm-region-prefix-name=prefix1_", "--disable-auto-complete-config", + ":", ] ) else: diff --git a/src/triton_cli/server/server_factory.py b/src/triton_cli/server/server_factory.py index c755841..c2f6d27 100755 --- a/src/triton_cli/server/server_factory.py +++ b/src/triton_cli/server/server_factory.py @@ -152,9 +152,6 @@ def _get_local_server_handle(config, gpus): def _get_docker_server_handle(config, gpus): triton_config = TritonServerConfig() triton_config["model-repository"] = os.path.abspath(config.model_repository) - # Can only do this when assuming world_size=1 - if config.trtllm: - triton_config["backend-config"] = "shm-region-prefix-name=prefix1_" logger.info("Starting a Triton Server using docker") server = TritonServerFactory.create_server_docker( image=config.image, From f9cb7c85abf727c0e4319cbcd98f967834b8d658 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Thu, 14 Dec 2023 09:44:36 -0800 Subject: [PATCH 3/5] Refactor for world size --- src/triton_cli/parser.py | 8 +++-- src/triton_cli/server/server_docker.py | 24 ++++--------- src/triton_cli/server/server_factory.py | 10 +++--- src/triton_cli/server/server_utils.py | 46 +++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 25 deletions(-) create mode 100644 src/triton_cli/server/server_utils.py diff --git a/src/triton_cli/parser.py b/src/triton_cli/parser.py index 0d26062..d06d5a6 100644 --- a/src/triton_cli/parser.py +++ b/src/triton_cli/parser.py @@ -243,9 +243,11 @@ def parse_args_server(subcommands): help="Image to use when starting Triton with 'docker' mode", ) server_start.add_argument( - "--trtllm", - action="store_true", - help="Whether the model repo contains a trtllm model.", + "--world-size", + type=int, + required=False, + default=-1, + help="Number of devices to deploy a tensorrtllm model.", ) add_repo_args([server_start]) diff --git a/src/triton_cli/server/server_docker.py b/src/triton_cli/server/server_docker.py index 354c128..a7e6743 100755 --- a/src/triton_cli/server/server_docker.py +++ b/src/triton_cli/server/server_docker.py @@ -18,6 +18,7 @@ import logging from .server import TritonServer +from .server_utils import TritonServerUtils from triton_cli.constants import LOGGER_NAME logger = logging.getLogger(LOGGER_NAME) @@ -29,9 +30,7 @@ class TritonServerDocker(TritonServer): triton in a docker container. """ - def __init__( - self, image, trtllm_model, config, gpus, mounts, labels, shm_size, args - ): + def __init__(self, image, world_size, config, gpus, mounts, labels, shm_size, args): """ Parameters ---------- @@ -62,7 +61,7 @@ def __init__( self._gpus = gpus self._shm_size = shm_size self._args = args if args else {} - self._trtllm_model = trtllm_model + self._world_size = world_size assert self._server_config[ "model-repository" @@ -111,25 +110,16 @@ def start(self, env=None): # Construct run command # TRTLLM models require special handling. For now, # we will 'spell-out' the command. - if self._trtllm_model: + if self._world_size > -1: command = " ".join( - [ - "mpirun", - "--allow-run-as-root", - "-n", - "1", - "tritonserver", - self._server_config.to_cli_string(), - "--backend-config=python,shm-region-prefix-name=prefix1_", - "--disable-auto-complete-config", - ":", - ] + TritonServerUtils.mpi_run( + self._world_size, self._server_config["model-repository"] + ) ) else: command = " ".join( env_cmds + ["tritonserver", self._server_config.to_cli_string()] ) - try: # Run the docker container and run the command in the container self._tritonserver_container = self._docker_client.containers.run( diff --git a/src/triton_cli/server/server_factory.py b/src/triton_cli/server/server_factory.py index c2f6d27..b595516 100755 --- a/src/triton_cli/server/server_factory.py +++ b/src/triton_cli/server/server_factory.py @@ -38,7 +38,7 @@ class TritonServerFactory: @staticmethod def create_server_docker( image, - trtllm_model, + world_size, config, gpus, mounts=None, @@ -51,8 +51,8 @@ def create_server_docker( ---------- image : str The tritonserver docker image to pull and run - image : bool - Whether the model repo contains a trtllm model + world_size : int + Number of devices to deploy a tensorrtllm model. config : TritonServerConfig the config object containing arguments for this server instance gpus : list of str @@ -74,7 +74,7 @@ def create_server_docker( return TritonServerDocker( image=image, - trtllm_model=trtllm_model, + world_size=world_size, config=config, gpus=gpus, mounts=mounts, @@ -155,7 +155,7 @@ def _get_docker_server_handle(config, gpus): logger.info("Starting a Triton Server using docker") server = TritonServerFactory.create_server_docker( image=config.image, - trtllm_model=config.trtllm, + world_size=config.world_size, config=triton_config, gpus=gpus, mounts=None, diff --git a/src/triton_cli/server/server_utils.py b/src/triton_cli/server/server_utils.py new file mode 100644 index 0000000..86fe2ce --- /dev/null +++ b/src/triton_cli/server/server_utils.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import subprocess + + +class TritonServerUtils: + @staticmethod + def mpi_run(world_size: int, model_repo: str) -> str: + """ + Parameters + ---------- + world_size : int + The path to the model repository + model_repo : str + The path to the model repository + Returns + ------- + The appropriate world size to use to run the tensorrtllm + engine(s) stored in the model repository + """ + cmd = ["mpirun", "--allow-run-as-root"] + for i in range(world_size): + cmd += ["-n", "1", "/opt/tritonserver/bin/tritonserver"] + cmd += [ + f"--model-repository={model_repo}", + "--disable-auto-complete-config", + f"--backend-config=python,shm-region-prefix-name=prefix{i}_", + ":", + ] + return cmd From 2f009887c37817e8b2da7d2223bba311a318d18d Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Thu, 14 Dec 2023 09:53:34 -0800 Subject: [PATCH 4/5] Remove currently unused libraries --- src/triton_cli/server/server_utils.py | 4 ---- 1 file changed, 4 deletions(-) mode change 100644 => 100755 src/triton_cli/server/server_utils.py diff --git a/src/triton_cli/server/server_utils.py b/src/triton_cli/server/server_utils.py old mode 100644 new mode 100755 index 86fe2ce..7a33bb4 --- a/src/triton_cli/server/server_utils.py +++ b/src/triton_cli/server/server_utils.py @@ -14,10 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import os -import subprocess - class TritonServerUtils: @staticmethod From 93fae9762dabfacb4964da49778658b828e8d273 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Thu, 14 Dec 2023 10:47:02 -0800 Subject: [PATCH 5/5] Review comments --- src/triton_cli/server/server_docker.py | 2 +- src/triton_cli/server/server_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/triton_cli/server/server_docker.py b/src/triton_cli/server/server_docker.py index a7e6743..deb182a 100755 --- a/src/triton_cli/server/server_docker.py +++ b/src/triton_cli/server/server_docker.py @@ -110,7 +110,7 @@ def start(self, env=None): # Construct run command # TRTLLM models require special handling. For now, # we will 'spell-out' the command. - if self._world_size > -1: + if self._world_size >= 1: command = " ".join( TritonServerUtils.mpi_run( self._world_size, self._server_config["model-repository"] diff --git a/src/triton_cli/server/server_utils.py b/src/triton_cli/server/server_utils.py index 7a33bb4..01e4ffa 100755 --- a/src/triton_cli/server/server_utils.py +++ b/src/triton_cli/server/server_utils.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.