diff --git a/Dockerfile.QA b/Dockerfile.QA index 2c43f735a5..8c51ab1075 100644 --- a/Dockerfile.QA +++ b/Dockerfile.QA @@ -149,8 +149,8 @@ RUN mkdir -p qa/common && \ cp bin/triton_json_test qa/L0_json/. && \ cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \ cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \ - cp bin/input_byte_size_test qa/L0_input_validation/. && \ - cp -r docs/examples/model_repository/simple_identity qa/L0_input_validation/models + cp -r docs/examples/model_repository/{simple,simple_identity,simple_string} qa/L0_input_validation/models && \ + cp bin/input_byte_size_test qa/L0_input_validation/. RUN mkdir -p qa/pkgs && \ cp python/triton*.whl qa/pkgs/. && \ diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py index 33360b7a08..895fe8522f 100755 --- a/qa/L0_input_validation/input_validation_test.py +++ b/qa/L0_input_validation/input_validation_test.py @@ -34,8 +34,10 @@ import infer_util as iu import numpy as np import tritonclient.grpc as tritongrpcclient +import tritonclient.http as tritonhttpclient +import tritonclient.utils as utils import tritonclient.utils.shared_memory as shm -from tritonclient.utils import InferenceServerException, np_to_triton_dtype +from tritonclient.utils import InferenceServerException class InputValTest(unittest.TestCase): @@ -116,101 +118,113 @@ def test_input_validation_all_optional(self): class InputShapeTest(unittest.TestCase): - def test_input_shape_validation(self): - input_size = 8 - model_name = "pt_identity" - triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") + def test_client_input_shape_validation(self): + model_name = "simple" - # Pass - input_data = np.arange(input_size)[None].astype(np.float32) - inputs = [ - tritongrpcclient.InferInput( - "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype) - ) - ] - inputs[0].set_data_from_numpy(input_data) - triton_client.infer(model_name=model_name, inputs=inputs) - - # Larger input byte size than expected - input_data = np.arange(input_size + 2)[None].astype(np.float32) - inputs = [ - tritongrpcclient.InferInput( - "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype) - ) - ] - inputs[0].set_data_from_numpy(input_data) - # Compromised input shape - inputs[0].set_shape((1, input_size)) - with self.assertRaises(InferenceServerException) as e: - triton_client.infer( - model_name=model_name, - inputs=inputs, + for client_type in ["http", "grpc"]: + if client_type == "http": + triton_client = tritonhttpclient.InferenceServerClient("localhost:8000") + else: + triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") + + # Infer + inputs = [] + if client_type == "http": + inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32")) + inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32")) + else: + inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "INT32")) + inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "INT32")) + + # Create the data for the two input tensors. Initialize the first + # to unique integers and the second to all ones. + input0_data = np.arange(start=0, stop=16, dtype=np.int32) + input0_data = np.expand_dims(input0_data, axis=0) + input1_data = np.ones(shape=(1, 16), dtype=np.int32) + + # Initialize the data + inputs[0].set_data_from_numpy(input0_data) + inputs[1].set_data_from_numpy(input1_data) + + # Compromised input shapes + inputs[0].set_shape([2, 8]) + inputs[1].set_shape([2, 8]) + + with self.assertRaises(InferenceServerException) as e: + triton_client.infer(model_name=model_name, inputs=inputs) + err_str = str(e.exception) + self.assertIn( + f"unexpected shape for input 'INPUT1' for model 'simple'. Expected [-1,16], got [2,8]", + err_str, ) - err_str = str(e.exception) - self.assertIn( - "input byte size mismatch for input 'INPUT0' for model 'pt_identity'. Expected 32, got 40", - err_str, - ) - def test_input_string_shape_validation(self): - input_size = 16 - model_name = "graphdef_object_int32_int32" - np_dtype_string = np.dtype(object) - triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") + # Compromised input shapes + inputs[0].set_shape([1, 8]) + inputs[1].set_shape([1, 8]) - def get_input_array(input_size, np_dtype): - rinput_dtype = iu._range_repr_dtype(np_dtype) - input_array = np.random.randint( - low=0, high=127, size=(1, input_size), dtype=rinput_dtype + with self.assertRaises(InferenceServerException) as e: + triton_client.infer(model_name=model_name, inputs=inputs) + err_str = str(e.exception) + self.assertIn( + f"input 'INPUT0' got unexpected elements count 16, expected 8", + err_str, ) - # Convert to string type - inn = np.array( - [str(x) for x in input_array.reshape(input_array.size)], dtype=object - ) - input_array = inn.reshape(input_array.shape) + def test_client_input_string_shape_validation(self): + for client_type in ["http", "grpc"]: - inputs = [] - inputs.append( - tritongrpcclient.InferInput( - "INPUT0", input_array.shape, np_to_triton_dtype(np_dtype) - ) - ) - inputs.append( - tritongrpcclient.InferInput( - "INPUT1", input_array.shape, np_to_triton_dtype(np_dtype) - ) - ) + def identity_inference(triton_client, np_array, binary_data): + model_name = "simple_identity" - inputs[0].set_data_from_numpy(input_array) - inputs[1].set_data_from_numpy(input_array) - return inputs + # Total elements no change + inputs = [] + if client_type == "http": + inputs.append( + tritonhttpclient.InferInput("INPUT0", np_array.shape, "BYTES") + ) + inputs[0].set_data_from_numpy(np_array, binary_data=binary_data) + inputs[0].set_shape([2, 8]) + else: + inputs.append( + tritongrpcclient.InferInput("INPUT0", np_array.shape, "BYTES") + ) + inputs[0].set_data_from_numpy(np_array) + inputs[0].set_shape([2, 8]) + triton_client.infer(model_name=model_name, inputs=inputs) - # Input size is less than expected - inputs = get_input_array(input_size - 2, np_dtype_string) - # Compromised input shape - inputs[0].set_shape((1, input_size)) - inputs[1].set_shape((1, input_size)) - with self.assertRaises(InferenceServerException) as e: - triton_client.infer(model_name=model_name, inputs=inputs) - err_str = str(e.exception) - self.assertIn( - f"expected {input_size} string elements for inference input 'INPUT1', got {input_size-2}", - err_str, - ) + # Compromised input shape + inputs[0].set_shape([1, 8]) - # Input size is greater than expected - inputs = get_input_array(input_size + 2, np_dtype_string) - # Compromised input shape - inputs[0].set_shape((1, input_size)) - inputs[1].set_shape((1, input_size)) - with self.assertRaises(InferenceServerException) as e: - triton_client.infer(model_name=model_name, inputs=inputs) - err_str = str(e.exception) - self.assertIn( - f"expected {input_size} string elements for inference input 'INPUT1', got {input_size+2}", - err_str, - ) + with self.assertRaises(InferenceServerException) as e: + triton_client.infer(model_name=model_name, inputs=inputs) + err_str = str(e.exception) + self.assertIn( + f"input 'INPUT0' got unexpected elements count 16, expected 8", + err_str, + ) + + if client_type == "http": + triton_client = tritonhttpclient.InferenceServerClient("localhost:8000") + else: + triton_client = tritongrpcclient.InferenceServerClient("localhost:8001") + + # Example using BYTES input tensor with utf-8 encoded string that + # has an embedded null character. + null_chars_array = np.array( + ["he\x00llo".encode("utf-8") for i in range(16)], dtype=np.object_ + ) + null_char_data = null_chars_array.reshape([1, 16]) + identity_inference(triton_client, null_char_data, True) # Using binary data + identity_inference(triton_client, null_char_data, False) # Using JSON data + + # Example using BYTES input tensor with 16 elements, where each + # element is a 4-byte binary blob with value 0x00010203. Can use + # dtype=np.bytes_ in this case. + bytes_data = [b"\x00\x01\x02\x03" for i in range(16)] + np_bytes_data = np.array(bytes_data, dtype=np.bytes_) + np_bytes_data = np_bytes_data.reshape([1, 16]) + identity_inference(triton_client, np_bytes_data, True) # Using binary data + identity_inference(triton_client, np_bytes_data, False) # Using JSON data def test_wrong_input_shape_tensor_size(self): def inference_helper(model_name, batch_size=1): @@ -246,12 +260,12 @@ def inference_helper(model_name, batch_size=1): tritongrpcclient.InferInput( "DUMMY_INPUT0", dummy_input_data.shape, - np_to_triton_dtype(np.float32), + utils.np_to_triton_dtype(np.float32), ), tritongrpcclient.InferInput( "INPUT0", shape_tensor_data.shape, - np_to_triton_dtype(np.int32), + utils.np_to_triton_dtype(np.int32), ), ] inputs[0].set_data_from_numpy(dummy_input_data) diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh index fc70abd969..7c7d6feeed 100755 --- a/qa/L0_input_validation/test.sh +++ b/qa/L0_input_validation/test.sh @@ -68,6 +68,7 @@ set +e python3 -m pytest --junitxml="input_validation.report.xml" $TEST_PY::InputValTest >> $CLIENT_LOG 2>&1 if [ $? -ne 0 ]; then + cat $CLIENT_LOG echo -e "\n***\n*** input_validation_test.py FAILED. \n***" RET=1 fi @@ -80,49 +81,6 @@ wait $SERVER_PID pip install torch pip install pytest-asyncio -mkdir -p models/pt_identity/1 -PYTHON_CODE=$(cat < models/pt_identity/config.pbtxt << EOL -name: "pt_identity" -backend: "pytorch" -max_batch_size: 8 -input [ - { - name: "INPUT0" - data_type: TYPE_FP32 - dims: [8] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_FP32 - dims: [8] - } -] -# ensure we batch requests together -dynamic_batching { - max_queue_delay_microseconds: 1000000 -} -EOL - -cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/. cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/. cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/. @@ -138,6 +96,7 @@ set +e python3 -m pytest --junitxml="input_shape_validation.report.xml" $TEST_PY::InputShapeTest >> $CLIENT_LOG 2>&1 if [ $? -ne 0 ]; then + cat $CLIENT_LOG echo -e "\n***\n*** input_validation_test.py FAILED. \n***" RET=1 fi @@ -147,10 +106,13 @@ kill $SERVER_PID wait $SERVER_PID # input_byte_size_test +cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/{savedmodel_zero_1_float32,savedmodel_zero_1_object} ./models + set +e LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** Query Unit Test Failed\n***" + cat $TEST_LOG + echo -e "\n***\n*** input_byte_size_test FAILED\n***" RET=1 fi set -e @@ -158,7 +120,6 @@ set -e if [ $RET -eq 0 ]; then echo -e "\n***\n*** Input Validation Test Passed\n***" else - cat $CLIENT_LOG cat $SERVER_LOG echo -e "\n***\n*** Input Validation Test FAILED\n***" fi