triton-inference-server · RAMitchell · Mar 17, 2023 · Mar 19, 2023 · Mar 19, 2023 · Mar 20, 2023
diff --git a/ci/gitlab/build.sh b/ci/gitlab/build.sh
@@ -18,7 +18,7 @@ set -e
 REPO_DIR=$(cd $(dirname $0)/../../; pwd)
 QA_DIR="${REPO_DIR}/qa"
 MODEL_DIR="${QA_DIR}/L0_e2e/model_repository"
-CPU_MODEL_DIR="${QA_DIR}/L0_e2e/cpu_model_repository"
+MODEL_CACHE_DIR="${QA_DIR}/L0_e2e/.model_cache"
 BUILDPY=${BUILDPY:-0}
 CPU_ONLY=${CPU_ONLY:-0}
 NO_CACHE=${NO_CACHE:-1}
@@ -105,18 +105,6 @@ then
   DOCKER_ARGS="$DOCKER_ARGS --label RUNNER_ID=${RUNNER_ID}"
 fi
 
-echo "Generating example models..."
-docker run \
-  -e RETRAIN=1 \
-  -e OWNER_ID=$(id -u) \
-  -e OWNER_GID=$(id -g) \
-  $GPU_DOCKER_ARGS \
-  $DOCKER_ARGS \
-  -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
-  -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
-  $MODEL_BUILDER_IMAGE \
-  bash -c 'source /conda/test/bin/activate && /qa/generate_example_models.sh'
-
 if [ $CPU_ONLY -eq 1 ]
 then
   DOCKER_ARGS="${DOCKER_ARGS} -e TRITON_ENABLE_GPU=OFF"
@@ -125,9 +113,10 @@ else
 fi
 
 echo "Running tests..."
+mkdir -p "${MODEL_CACHE_DIR}"
 docker run \
   -e TEST_PROFILE=ci \
   $DOCKER_ARGS \
   -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
-  -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
+  -v "${MODEL_CACHE_DIR}:/qa/L0_e2e/.model_cache" \
   --rm $TEST_TAG
diff --git a/ci/local/build.sh b/ci/local/build.sh
@@ -12,7 +12,7 @@ set -e
 REPO_DIR=$(cd $(dirname $0)/../../; pwd)
 QA_DIR="${REPO_DIR}/qa"
 MODEL_DIR="${QA_DIR}/L0_e2e/model_repository"
-CPU_MODEL_DIR="${QA_DIR}/L0_e2e/cpu_model_repository"
+MODEL_CACHE_DIR="${QA_DIR}/L0_e2e/.model_cache"
 HOST_BUILD="${HOST_BUILD:-0}"
 TEST_PROFILE="${TEST_PROFILE:-dev}"
 
@@ -44,27 +44,16 @@ if [ -z $CUDA_VISIBLE_DEVICES ]
 then
   DOCKER_ARGS="$DOCKER_ARGS --gpus all"
 else
-  DOCKER_ARGS="$DOCKER_ARGS --gpus $CUDA_VISIBLE_DEVICES"
+  DOCKER_ARGS="$DOCKER_ARGS --gpus \"device=$CUDA_VISIBLE_DEVICES\""
 fi
 
-echo "Generating example models..."
-docker run \
-  -e RETRAIN=${RETRAIN:-0} \
-  -e OWNER_ID=$(id -u) \
-  -e OWNER_GID=$(id -g) \
-  -e TEST_PROFILE=$TEST_PROFILE \
-  $DOCKER_ARGS \
-  -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
-  -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
-  --rm $TEST_TAG \
-  bash -c 'source /conda/test/bin/activate && /qa/generate_example_models.sh'
-
 echo "Running GPU-enabled tests..."
+mkdir -p "${MODEL_CACHE_DIR}"
 docker run \
   $DOCKER_ARGS \
   -e TEST_PROFILE=$TEST_PROFILE \
   -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
-  -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
+  -v "${MODEL_CACHE_DIR}:/qa/L0_e2e/.model_cache" \
   --rm $TEST_TAG
 
 export SERVER_TAG=triton_fil:cpu
@@ -79,5 +68,5 @@ docker run \
   -e TRITON_ENABLE_GPU=OFF \
   -e TEST_PROFILE=$TEST_PROFILE \
   -v "${MODEL_DIR}:/qa/L0_e2e/model_repository" \
-  -v "${CPU_MODEL_DIR}:/qa/L0_e2e/cpu_model_repository" \
+  -v "${MODEL_CACHE_DIR}:/qa/L0_e2e/.model_cache" \
   --rm $TEST_TAG
diff --git a/docs/tests.md b/docs/tests.md
@@ -33,17 +33,9 @@ invoke the `ci/local/build.sh` script, which will build the server image
 and a test image then run a container based on that image which runs the
 complete test suite.
 
-One of the most time-consuming parts of running the test suite is
-training the end-to-end test models. The `ci/local/build.sh` script will
-cache trained models between runs in `qa/L0_e2e/model_repository` and
-`qa/L0_e2e/cpu_model_repository`. Sometimes, you may make a change which
-invalidates previously generated models. In such cases, you can clear these
-directories in order to start fresh.
-
 The `ci/local/build.sh` script uses the following environment variables to
 control build and execution of tests:
 
-- `RETRAIN`: If set to 1, retrain test models.
 - `USE_CLIENT_WHEEL`: If set to 1, install the Triton client from a wheel
   copied from Triton's SDK image. This is useful for testing on ARM
   machines, where the Triton Python client is not available via pip.
@@ -60,8 +52,7 @@ control build and execution of tests:
 In addition to `ci/local/build.sh`, the repo contains a
 `ci/gitlab/build.sh` script which is used to run tests in CI. It is
 sometimes useful to invoke this script to more closely replicate the CI
-environment. This script does *not* cache models in between runs and will
-generally run more and slower tests than those used for the `local` script.
+environment.
 
 The `ci/gitlab/build.sh` script uses the following environment variables
 to control build and execution of tests:
@@ -88,8 +79,7 @@ to control build and execution of tests:
 
 ## Running Tests Manually
 It is *strongly* recommended that you use the provided test scripts for running
-tests. If you wish to run tests manually, you must generate test models using
-the `qa/generate_example_models.sh` script, start the Triton server against
+tests. If you wish to run tests manually, start the Triton server against
 the generated model repository, and then run `pytest --repo qa/L0_e2e/model_repository qa/L0_e2e`.
 
 This approach is not an officially supported testing method, and minimal

diff --git a/qa/L0_e2e/conftest.py b/qa/L0_e2e/conftest.py
@@ -9,8 +9,17 @@ def pytest_addoption(parser):
         os.path.dirname(os.path.abspath(__file__)),
         'model_repository'
     )
+    default_cache_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)),
+        '.model_cache'
+    )
     parser.addoption(
         "--repo",
         action="store",
         default=default_repo_path
     )
+    parser.addoption(
+        "--model_cache_dir",
+        action="store",
+        default=default_cache_path
+    )