diff --git a/.clang-format b/.clang-format index 1589d5c4..da4c3928 100644 --- a/.clang-format +++ b/.clang-format @@ -74,6 +74,7 @@ SpacesInContainerLiterals: true SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Cpp11 +StatementMacros: [API_ENTRY_TRY] TabWidth: 4 UseTab: Never ... diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ef9e135..5264c94e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,23 @@ # TensorRT OSS Release Changelog -## 10.0.1 GA - 2024-04-30 +## 10.1.0 GA - 2024-06-17 + +Key Features and Updates: + + - Parser changes + - Added `supportsModelV2` API + - Added support for `DeformConv` operation + - Added support for `PluginV3` TensorRT Plugins + - Marked all IParser and IParserRefitter APIs as `noexcept` + - Plugin changes + - Added version 2 of ROIAlign_TRT plugin, which implements the IPluginV3 plugin interface. When importing an ONNX model with the RoiAlign op, this new version of the plugin will be inserted to the TRT network. + - Samples changes + - Added a new sample [non_zero_plugin](samples/python/non_zero_plugin), which is a Python version of the C++ sample [sampleNonZeroPlugin](samples/sampleNonZeroPlugin). + - Updated tooling + - Polygraphy v0.49.12 + - ONNX-GraphSurgeon v0.5.3 + +## 10.0.1 GA - 2024-04-24 Key Features and Updates: diff --git a/LICENSE b/LICENSE index e4dec1f0..e80db763 100644 --- a/LICENSE +++ b/LICENSE @@ -353,3 +353,30 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + > demo/Diffusion/utils_sd3/sd3_impls.py + > demo/Diffusion/utils_sd3/other_impls.py + > demo/Diffusion/utils_sd3/mmdit.py + > demo/Diffusion/stable_diffusion_3_pipeline.py + + MIT License + + Copyright (c) 2024 Stability AI + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. diff --git a/README.md b/README.md index 44cd7442..f7835016 100644 --- a/README.md +++ b/README.md @@ -26,13 +26,13 @@ You can skip the **Build** section to enjoy TensorRT with Python. To build the TensorRT-OSS components, you will first need the following software packages. **TensorRT GA build** -* TensorRT v10.0.1.6 +* TensorRT v10.1.0.27 * Available from direct download links listed below **System Packages** * [CUDA](https://developer.nvidia.com/cuda-toolkit) * Recommended versions: - * cuda-12.2.0 + cuDNN-8.9 + * cuda-12.4.0 + cuDNN-8.9 * cuda-11.8.0 + cuDNN-8.9 * [GNU make](https://ftp.gnu.org/gnu/make/) >= v4.1 * [cmake](https://github.com/Kitware/CMake/releases) >= v3.13 @@ -73,24 +73,25 @@ To build the TensorRT-OSS components, you will first need the following software If using the TensorRT OSS build container, TensorRT libraries are preinstalled under `/usr/lib/x86_64-linux-gnu` and you may skip this step. Else download and extract the TensorRT GA build from [NVIDIA Developer Zone](https://developer.nvidia.com) with the direct links below: - - [TensorRT 10.0.1.6 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz) - - [TensorRT 10.0.1.6 for CUDA 12.4, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz) - - [TensorRT 10.0.1.6 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-11.8.zip) - - [TensorRT 10.0.1.6 for CUDA 12.4, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip) + - [TensorRT 10.1.0.27 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz) + - [TensorRT 10.1.0.27 for CUDA 12.4, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz) + - [TensorRT 10.1.0.27 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/zip/TensorRT-10.1.0.27.Windows.win10.cuda-11.8.zip) + - [TensorRT 10.1.0.27 for CUDA 12.4, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/zip/TensorRT-10.1.0.27.Windows.win10.cuda-12.4.zip) + **Example: Ubuntu 20.04 on x86-64 with cuda-12.4** ```bash cd ~/Downloads - tar -xvzf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz - export TRT_LIBPATH=`pwd`/TensorRT-10.0.1.6 + tar -xvzf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz + export TRT_LIBPATH=`pwd`/TensorRT-10.1.0.27 ``` **Example: Windows on x86-64 with cuda-12.4** ```powershell - Expand-Archive -Path TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip - $env:TRT_LIBPATH="$pwd\TensorRT-10.0.1.6\lib" + Expand-Archive -Path TensorRT-10.1.0.27.Windows.win10.cuda-12.4.zip + $env:TRT_LIBPATH="$pwd\TensorRT-10.1.0.27\lib" ``` ## Setting Up The Build Environment @@ -162,7 +163,7 @@ For Linux platforms, we recommend that you generate a docker container for build make -j$(nproc) ``` - **Example: Native builds on Windows (x86) with cuda-12.4** + **Example: Native builds on Windows (x86) with cuda-12.4** ```powershell cd $TRT_OSSPATH mkdir -p build diff --git a/VERSION b/VERSION index db243822..1bf602d7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.0.1.6 +10.1.0.27 diff --git a/demo/BERT/README.md b/demo/BERT/README.md index 52b86197..a814652d 100755 --- a/demo/BERT/README.md +++ b/demo/BERT/README.md @@ -30,6 +30,7 @@ This subfolder of the BERT TensorFlow repository, tested and maintained by NVIDI * [TensorRT inference benchmark](#tensorrt-inference-benchmark) * [Results](#results) * [Inference performance: NVIDIA A100](#inference-performance-nvidia-a100-40gb) + * [Inference performance: NVIDIA A30](#inference-performance-nvidia-a30) ## Model overview @@ -73,7 +74,7 @@ The following software version configuration has been tested: |Software|Version| |--------|-------| |Python|>=3.8| -|TensorRT|10.0.1.6| +|TensorRT|10.1.0.27| |CUDA|12.4| ## Setup @@ -433,75 +434,240 @@ Results were obtained by running `scripts/inference_benchmark.sh --gpu Ampere` o | Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | | |-----------------|------------|-----------------|-----------------|---------|-----------------|-----------------|---------| | | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average | -| 128 | 1 | 0.68 | 0.68 | 0.55 | 0.67 | 0.79 | 0.63 | -| 128 | 2 | 0.60 | 0.76 | 0.60 | 0.91 | 0.91 | 0.73 | -| 128 | 4 | 0.73 | 0.93 | 0.73 | 1.19 | 1.19 | 0.94 | -| 128 | 8 | 1.21 | 1.21 | 0.96 | 1.31 | 1.31 | 1.31 | -| 128 | 12 | 1.20 | 1.52 | 1.20 | 1.72 | 1.72 | 1.71 | -| 128 | 16 | 1.34 | 1.72 | 1.35 | 2.07 | 2.32 | 2.06 | -| 128 | 24 | 1.82 | 1.82 | 1.82 | 3.02 | 3.08 | 3.02 | -| 128 | 32 | 2.24 | 2.24 | 2.24 | 3.91 | 3.91 | 3.89 | -| 128 | 64 | 4.15 | 4.19 | 4.12 | 7.62 | 7.64 | 7.57 | -| 128 | 128 | 8.11 | 8.12 | 8.03 | 15.34 | 15.38 | 15.21 | -| 384 | 1 | 1.13 | 1.13 | 1.13 | 1.24 | 1.60 | 1.25 | -| 384 | 2 | 1.31 | 1.31 | 1.31 | 1.54 | 1.54 | 1.54 | -| 384 | 4 | 1.66 | 1.66 | 1.66 | 2.08 | 2.08 | 2.08 | -| 384 | 8 | 2.21 | 2.21 | 2.21 | 3.37 | 3.37 | 3.32 | -| 384 | 12 | 3.32 | 3.32 | 3.32 | 4.78 | 4.82 | 4.77 | -| 384 | 16 | 4.01 | 4.01 | 4.00 | 6.37 | 6.37 | 6.36 | -| 384 | 24 | 5.70 | 5.70 | 5.70 | 9.34 | 9.39 | 9.29 | -| 384 | 32 | 7.63 | 7.63 | 7.63 | 12.99 | 13.03 | 12.85 | -| 384 | 64 | 14.86 | 14.87 | 14.72 | 24.89 | 25.12 | 24.70 | -| 384 | 128 | 28.96 | 28.96 | 28.69 | 48.93 | 49.02 | 48.59 | +| 128 | 1 | 0.54 | 0.69 | 0.54 | 0.79 | 0.79 | 0.63 | +| 128 | 2 | 0.76 | 0.76 | 0.61 | 0.72 | 0.92 | 0.72 | +| 128 | 4 | 0.93 | 0.93 | 0.74 | 0.93 | 1.19 | 0.93 | +| 128 | 8 | 0.94 | 1.20 | 0.94 | 1.31 | 1.31 | 1.31 | +| 128 | 12 | 1.20 | 1.53 | 1.21 | 1.70 | 2.15 | 1.69 | +| 128 | 16 | 1.33 | 1.34 | 1.33 | 2.08 | 2.08 | 2.06 | +| 128 | 24 | 1.82 | 1.82 | 1.82 | 3.05 | 3.05 | 3.03 | +| 128 | 32 | 2.23 | 2.24 | 2.23 | 3.92 | 3.92 | 3.90 | +| 128 | 64 | 4.19 | 4.19 | 4.14 | 7.75 | 7.76 | 7.68 | +| 128 | 128 | 8.14 | 8.14 | 8.08 | 15.37 | 15.44 | 15.29 | +| 384 | 1 | 1.13 | 1.13 | 1.14 | 1.25 | 1.61 | 1.26 | +| 384 | 2 | 1.32 | 1.56 | 1.32 | 1.55 | 1.55 | 1.54 | +| 384 | 4 | 1.66 | 2.12 | 1.66 | 2.12 | 2.12 | 2.12 | +| 384 | 8 | 2.21 | 2.30 | 2.21 | 3.34 | 3.40 | 3.33 | +| 384 | 12 | 3.31 | 3.32 | 3.31 | 4.84 | 4.84 | 4.79 | +| 384 | 16 | 4.00 | 4.00 | 4.00 | 6.39 | 6.39 | 6.36 | +| 384 | 24 | 5.70 | 5.70 | 5.69 | 9.49 | 9.49 | 9.41 | +| 384 | 32 | 7.70 | 7.72 | 7.64 | 13.02 | 13.03 | 12.89 | +| 384 | 64 | 14.89 | 14.90 | 14.79 | 25.16 | 25.18 | 24.85 | +| 384 | 128 | 29.01 | 29.02 | 28.78 | 49.11 | 49.24 | 48.73 | ##### BERT Large | Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | | |-----------------|------------|-----------------|-----------------|---------|-----------------|-----------------|---------| | | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average | -| 128 | 1 | 1.39 | 1.39 | 1.24 | 1.54 | 1.55 | 1.54 | -| 128 | 2 | 1.42 | 1.42 | 1.41 | 1.82 | 1.82 | 1.82 | -| 128 | 4 | 1.78 | 1.95 | 1.79 | 2.50 | 2.50 | 2.50 | -| 128 | 8 | 2.64 | 2.64 | 2.64 | 3.97 | 3.97 | 3.97 | -| 128 | 12 | 3.09 | 3.09 | 3.09 | 5.02 | 5.03 | 4.99 | -| 128 | 16 | 4.03 | 4.03 | 4.03 | 6.93 | 6.93 | 6.86 | -| 128 | 24 | 5.28 | 5.31 | 5.28 | 9.64 | 9.65 | 9.56 | -| 128 | 32 | 7.01 | 7.01 | 6.95 | 12.95 | 13.07 | 12.86 | -| 128 | 64 | 12.84 | 12.86 | 12.72 | 24.80 | 25.05 | 24.68 | -| 128 | 128 | 25.26 | 25.27 | 25.01 | 49.09 | 49.25 | 48.71 | -| 384 | 1 | 2.55 | 2.55 | 2.55 | 2.96 | 2.96 | 2.95 | -| 384 | 2 | 3.04 | 3.04 | 3.04 | 3.90 | 3.90 | 3.90 | -| 384 | 4 | 4.01 | 4.02 | 4.01 | 5.74 | 5.80 | 5.68 | -| 384 | 8 | 7.18 | 7.18 | 7.17 | 10.98 | 11.00 | 10.91 | -| 384 | 12 | 9.15 | 9.15 | 9.14 | 15.43 | 15.44 | 15.33 | -| 384 | 16 | 12.28 | 12.29 | 12.28 | 21.13 | 21.14 | 20.90 | -| 384 | 24 | 17.67 | 17.67 | 17.56 | 30.98 | 31.07 | 30.71 | -| 384 | 32 | 23.22 | 23.23 | 23.02 | 41.22 | 41.28 | 40.63 | -| 384 | 64 | 45.16 | 45.30 | 44.83 | 79.64 | 79.98 | 79.24 | -| 384 | 128 | 87.81 | 87.82 | 87.73 | 156.66 | 157.03 | 155.65 | +| 128 | 1 | 1.24 | 1.25 | 1.24 | 1.55 | 1.55 | 1.55 | +| 128 | 2 | 1.43 | 1.80 | 1.43 | 1.82 | 1.82 | 1.82 | +| 128 | 4 | 1.78 | 1.79 | 1.78 | 2.53 | 2.54 | 2.53 | +| 128 | 8 | 2.64 | 2.64 | 2.64 | 3.99 | 4.01 | 3.96 | +| 128 | 12 | 3.08 | 3.09 | 3.08 | 5.08 | 5.08 | 5.02 | +| 128 | 16 | 4.03 | 4.03 | 4.03 | 6.94 | 6.94 | 6.89 | +| 128 | 24 | 5.32 | 5.34 | 5.28 | 9.71 | 9.80 | 9.69 | +| 128 | 32 | 7.02 | 7.09 | 6.99 | 12.95 | 13.08 | 12.89 | +| 128 | 64 | 12.89 | 12.89 | 12.80 | 24.83 | 25.00 | 24.65 | +| 128 | 128 | 25.28 | 25.29 | 25.05 | 49.15 | 49.41 | 48.82 | +| 384 | 1 | 2.55 | 2.56 | 2.55 | 2.96 | 2.96 | 2.96 | +| 384 | 2 | 3.04 | 3.04 | 3.03 | 4.00 | 4.01 | 4.00 | +| 384 | 4 | 4.04 | 4.04 | 4.04 | 5.73 | 5.75 | 5.70 | +| 384 | 8 | 7.17 | 7.17 | 7.16 | 11.14 | 11.16 | 11.07 | +| 384 | 12 | 9.14 | 9.14 | 9.13 | 15.46 | 15.47 | 15.36 | +| 384 | 16 | 12.28 | 12.40 | 12.28 | 21.20 | 21.31 | 21.06 | +| 384 | 24 | 17.70 | 17.84 | 17.63 | 31.03 | 31.04 | 30.76 | +| 384 | 32 | 23.29 | 23.30 | 23.11 | 41.07 | 41.31 | 40.74 | +| 384 | 64 | 44.94 | 45.20 | 44.87 | 80.15 | 80.36 | 79.42 | +| 384 | 128 | 87.97 | 87.99 | 87.81 | 157.22 | 157.81 | 156.05 | ##### Megatron Large with Sparsity | Sequence Length | Batch Size | INT8 QAT Latency (ms) | | | |-----------------|------------|-----------------|-----------------|---------| | | | 95th Percentile | 99th Percentile | Average | -| 128 | 1 | 1.12 | 1.41 | 1.13 | -| 128 | 2 | 1.37 | 1.70 | 1.38 | -| 128 | 4 | 1.77 | 1.78 | 1.77 | +| 128 | 1 | 1.11 | 1.41 | 1.12 | +| 128 | 2 | 1.33 | 1.34 | 1.33 | +| 128 | 4 | 1.78 | 1.78 | 1.77 | | 128 | 8 | 2.54 | 2.54 | 2.53 | -| 128 | 12 | 3.13 | 3.13 | 3.12 | -| 128 | 16 | 3.99 | 3.99 | 3.98 | -| 128 | 24 | 4.90 | 4.90 | 4.90 | -| 128 | 32 | 7.04 | 7.06 | 7.00 | -| 128 | 64 | 11.62 | 11.63 | 11.61 | -| 128 | 128 | 21.24 | 21.34 | 21.12 | -| 384 | 1 | 1.71 | 2.15 | 1.71 | +| 128 | 12 | 2.97 | 2.97 | 2.96 | +| 128 | 16 | 3.90 | 3.91 | 3.90 | +| 128 | 24 | 4.89 | 4.89 | 4.88 | +| 128 | 32 | 6.99 | 7.01 | 6.94 | +| 128 | 64 | 11.62 | 11.69 | 11.60 | +| 128 | 128 | 21.38 | 21.39 | 21.21 | +| 384 | 1 | 1.68 | 1.68 | 1.68 | | 384 | 2 | 2.21 | 2.21 | 2.21 | -| 384 | 4 | 3.63 | 3.64 | 3.63 | -| 384 | 8 | 5.74 | 5.74 | 5.73 | -| 384 | 12 | 8.22 | 8.23 | 8.21 | -| 384 | 16 | 10.33 | 10.33 | 10.31 | -| 384 | 24 | 14.52 | 14.52 | 14.51 | -| 384 | 32 | 18.72 | 18.73 | 18.71 | -| 384 | 64 | 35.79 | 35.81 | 35.50 | -| 384 | 128 | 67.72 | 67.86 | 67.55 | +| 384 | 4 | 3.48 | 3.48 | 3.47 | +| 384 | 8 | 5.73 | 5.74 | 5.73 | +| 384 | 12 | 8.37 | 8.37 | 8.35 | +| 384 | 16 | 10.35 | 10.36 | 10.33 | +| 384 | 24 | 14.62 | 14.62 | 14.61 | +| 384 | 32 | 18.91 | 18.95 | 18.75 | +| 384 | 64 | 35.84 | 35.86 | 35.61 | +| 384 | 128 | 67.81 | 67.83 | 67.73 | + +### Inference Performance NVIDIA L4 + +Results were obtained by running `scripts/inference_benchmark.sh --gpu Ampere` on NVIDIA L4. + +##### BERT Base + +| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | | +|-----------------|------------|-----------------|-----------------|---------|-----------------|-----------------|---------| +| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average | +| 128 | 1 | 0.62 | 0.62 | 0.61 | 1.03 | 1.03 | 1.01 | +| 128 | 2 | 0.81 | 0.81 | 0.78 | 1.35 | 1.37 | 1.33 | +| 128 | 4 | 1.16 | 1.16 | 1.14 | 2.17 | 2.18 | 2.14 | +| 128 | 8 | 1.95 | 2.00 | 1.92 | 3.68 | 3.68 | 3.60 | +| 128 | 12 | 2.70 | 2.71 | 2.64 | 5.24 | 5.26 | 5.14 | +| 128 | 16 | 3.44 | 3.44 | 3.34 | 6.77 | 6.77 | 6.64 | +| 128 | 24 | 4.91 | 4.94 | 4.80 | 10.19 | 10.42 | 10.15 | +| 128 | 32 | 6.31 | 6.40 | 6.23 | 13.57 | 13.72 | 13.41 | +| 128 | 64 | 13.69 | 13.85 | 13.46 | 30.35 | 30.72 | 29.58 | +| 128 | 128 | 28.90 | 29.15 | 28.61 | 66.75 | 67.06 | 66.09 | +| 384 | 1 | 1.30 | 1.30 | 1.30 | 2.10 | 2.10 | 2.09 | +| 384 | 2 | 1.85 | 1.86 | 1.84 | 3.18 | 3.20 | 3.17 | +| 384 | 4 | 3.02 | 3.02 | 2.96 | 5.49 | 5.53 | 5.48 | +| 384 | 8 | 5.60 | 5.64 | 5.50 | 11.10 | 11.11 | 10.90 | +| 384 | 12 | 8.37 | 8.39 | 8.20 | 16.61 | 16.76 | 16.51 | +| 384 | 16 | 11.18 | 11.19 | 11.02 | 23.24 | 23.56 | 23.16 | +| 384 | 24 | 17.09 | 17.29 | 16.96 | 35.94 | 35.95 | 35.39 | +| 384 | 32 | 23.38 | 23.57 | 23.17 | 50.65 | 50.92 | 50.51 | +| 384 | 64 | 49.52 | 49.54 | 49.01 | 104.52 | 104.94 | 103.73 | +| 384 | 128 | 104.93 | 105.33 | 103.94 | 197.12 | 197.56 | 196.03 | + +##### BERT Large + +| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | | +|-----------------|------------|-----------------|-----------------|---------|-----------------|-----------------|---------| +| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average | +| 128 | 1 | 1.81 | 1.82 | 1.79 | 3.15 | 3.16 | 3.12 | +| 128 | 2 | 2.50 | 2.55 | 2.47 | 4.49 | 4.58 | 4.44 | +| 128 | 4 | 3.60 | 3.62 | 3.59 | 6.94 | 6.95 | 6.90 | +| 128 | 8 | 6.44 | 6.50 | 6.34 | 12.93 | 12.99 | 12.79 | +| 128 | 12 | 8.53 | 8.53 | 8.35 | 18.26 | 18.27 | 18.08 | +| 128 | 16 | 11.37 | 11.37 | 11.23 | 25.17 | 25.40 | 25.04 | +| 128 | 24 | 16.13 | 16.14 | 16.09 | 35.45 | 35.45 | 35.26 | +| 128 | 32 | 21.66 | 21.66 | 21.56 | 47.66 | 47.66 | 47.63 | +| 128 | 64 | 47.07 | 47.08 | 46.65 | 102.00 | 102.24 | 101.29 | +| 128 | 128 | 91.60 | 92.23 | 91.19 | 219.24 | 219.55 | 218.06 | +| 384 | 1 | 3.47 | 3.48 | 3.47 | 6.53 | 6.63 | 6.36 | +| 384 | 2 | 5.58 | 5.58 | 5.53 | 10.51 | 10.62 | 10.44 | +| 384 | 4 | 9.91 | 10.01 | 9.73 | 20.58 | 20.80 | 20.10 | +| 384 | 8 | 18.45 | 18.47 | 18.23 | 38.06 | 38.24 | 37.60 | +| 384 | 12 | 27.03 | 27.03 | 26.72 | 58.94 | 59.27 | 58.09 | +| 384 | 16 | 37.47 | 37.51 | 36.77 | 79.40 | 79.70 | 78.36 | +| 384 | 24 | 55.02 | 55.25 | 54.56 | 123.06 | 123.32 | 121.71 | +| 384 | 32 | 77.22 | 77.54 | 76.48 | 167.99 | 168.34 | 167.10 | +| 384 | 64 | 157.21 | 157.53 | 155.69 | 335.31 | 335.96 | 333.65 | +| 384 | 128 | 337.82 | 338.55 | 335.23 | 640.65 | 641.04 | 639.38 | + +##### Megatron Large with Sparsity + +| Sequence Length | Batch Size | INT8 QAT Latency (ms) | | | +|-----------------|------------|-----------------|-----------------|---------| +| | | 95th Percentile | 99th Percentile | Average | +| 128 | 1 | 1.51 | 1.51 | 1.49 | +| 128 | 2 | 2.05 | 2.06 | 2.01 | +| 128 | 4 | 3.00 | 3.01 | 2.94 | +| 128 | 8 | 5.06 | 5.08 | 5.05 | +| 128 | 12 | 6.71 | 6.78 | 6.63 | +| 128 | 16 | 8.83 | 8.84 | 8.75 | +| 128 | 24 | 13.38 | 13.39 | 13.16 | +| 128 | 32 | 17.61 | 17.63 | 17.50 | +| 128 | 64 | 36.49 | 36.55 | 36.16 | +| 128 | 128 | 80.34 | 80.39 | 79.62 | +| 384 | 1 | 2.81 | 2.82 | 2.77 | +| 384 | 2 | 4.20 | 4.23 | 4.12 | +| 384 | 4 | 7.62 | 7.66 | 7.53 | +| 384 | 8 | 15.13 | 15.15 | 14.97 | +| 384 | 12 | 21.74 | 21.87 | 21.56 | +| 384 | 16 | 28.83 | 29.00 | 28.70 | +| 384 | 24 | 47.51 | 47.58 | 47.12 | +| 384 | 32 | 61.31 | 61.50 | 60.79 | +| 384 | 64 | 126.97 | 127.06 | 126.69 | +| 384 | 128 | 256.27 | 256.61 | 255.09 | + +### Inference Performance NVIDIA L40S + +Results were obtained by running `scripts/inference_benchmark.sh --gpu Ampere` on NVIDIA L40S. + +##### BERT Base + +| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | | +|-----------------|------------|-----------------|-----------------|---------|-----------------|-----------------|---------| +| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average | +| 128 | 1 | 0.34 | 0.34 | 0.34 | 0.48 | 0.48 | 0.48 | +| 128 | 2 | 0.41 | 0.41 | 0.41 | 0.57 | 0.57 | 0.56 | +| 128 | 4 | 0.50 | 0.50 | 0.50 | 0.78 | 0.78 | 0.78 | +| 128 | 8 | 0.67 | 0.67 | 0.67 | 1.30 | 1.30 | 1.29 | +| 128 | 12 | 0.92 | 0.93 | 0.91 | 1.78 | 1.78 | 1.76 | +| 128 | 16 | 1.10 | 1.10 | 1.10 | 2.30 | 2.31 | 2.29 | +| 128 | 24 | 1.48 | 1.48 | 1.47 | 3.30 | 3.31 | 3.26 | +| 128 | 32 | 1.83 | 1.84 | 1.82 | 3.98 | 3.99 | 3.96 | +| 128 | 64 | 3.52 | 3.53 | 3.49 | 8.46 | 8.52 | 8.40 | +| 128 | 128 | 7.63 | 7.64 | 7.58 | 17.47 | 17.57 | 17.33 | +| 384 | 1 | 0.73 | 0.73 | 0.73 | 1.04 | 1.04 | 1.03 | +| 384 | 2 | 0.88 | 0.88 | 0.88 | 1.36 | 1.36 | 1.36 | +| 384 | 4 | 1.17 | 1.17 | 1.16 | 2.21 | 2.21 | 2.19 | +| 384 | 8 | 1.73 | 1.73 | 1.72 | 3.53 | 3.53 | 3.51 | +| 384 | 12 | 2.73 | 2.74 | 2.72 | 5.25 | 5.26 | 5.18 | +| 384 | 16 | 3.28 | 3.29 | 3.27 | 7.58 | 7.59 | 7.53 | +| 384 | 24 | 4.97 | 4.98 | 4.94 | 10.37 | 10.40 | 10.27 | +| 384 | 32 | 6.47 | 6.49 | 6.40 | 14.17 | 14.20 | 14.03 | +| 384 | 64 | 14.05 | 14.07 | 13.89 | 31.25 | 31.34 | 30.90 | +| 384 | 128 | 29.55 | 29.77 | 28.85 | 64.72 | 65.01 | 63.83 | + +##### BERT Large + +| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | | +|-----------------|------------|-----------------|-----------------|---------|-----------------|-----------------|---------| +| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average | +| 128 | 1 | 0.88 | 0.88 | 0.88 | 1.30 | 1.30 | 1.29 | +| 128 | 2 | 0.99 | 0.99 | 0.98 | 1.51 | 1.51 | 1.50 | +| 128 | 4 | 1.37 | 1.37 | 1.36 | 2.30 | 2.30 | 2.28 | +| 128 | 8 | 1.96 | 1.96 | 1.95 | 3.92 | 3.93 | 3.90 | +| 128 | 12 | 2.83 | 2.86 | 2.81 | 5.92 | 5.93 | 5.90 | +| 128 | 16 | 3.27 | 3.27 | 3.24 | 6.81 | 6.82 | 6.75 | +| 128 | 24 | 4.64 | 4.64 | 4.61 | 10.25 | 10.28 | 10.19 | +| 128 | 32 | 5.73 | 5.74 | 5.68 | 13.17 | 13.19 | 13.01 | +| 128 | 64 | 12.00 | 12.08 | 11.89 | 28.33 | 28.35 | 28.01 | +| 128 | 128 | 26.06 | 26.22 | 25.74 | 65.44 | 65.68 | 64.41 | +| 384 | 1 | 1.68 | 1.68 | 1.67 | 2.72 | 2.72 | 2.71 | +| 384 | 2 | 2.29 | 2.29 | 2.28 | 3.95 | 3.96 | 3.94 | +| 384 | 4 | 3.31 | 3.31 | 3.30 | 6.50 | 6.55 | 6.45 | +| 384 | 8 | 5.15 | 5.16 | 5.13 | 10.84 | 10.87 | 10.69 | +| 384 | 12 | 8.14 | 8.15 | 8.10 | 19.89 | 19.99 | 19.37 | +| 384 | 16 | 9.96 | 9.98 | 9.86 | 22.65 | 22.68 | 22.45 | +| 384 | 24 | 15.37 | 15.42 | 15.23 | 35.42 | 35.49 | 35.08 | +| 384 | 32 | 20.32 | 20.45 | 20.04 | 48.00 | 48.01 | 47.26 | +| 384 | 64 | 44.74 | 44.94 | 43.95 | 104.17 | 104.49 | 102.96 | +| 384 | 128 | 90.01 | 90.24 | 88.73 | 205.73 | 206.26 | 203.73 | + +##### Megatron Large with Sparsity + +| Sequence Length | Batch Size | INT8 QAT Latency (ms) | | | +|-----------------|------------|-----------------|-----------------|---------| +| | | 95th Percentile | 99th Percentile | Average | +| 128 | 1 | 0.76 | 0.76 | 0.76 | +| 128 | 2 | 0.90 | 0.90 | 0.90 | +| 128 | 4 | 1.14 | 1.14 | 1.13 | +| 128 | 8 | 1.72 | 1.72 | 1.71 | +| 128 | 12 | 2.28 | 2.28 | 2.28 | +| 128 | 16 | 2.74 | 2.74 | 2.74 | +| 128 | 24 | 4.53 | 4.53 | 4.52 | +| 128 | 32 | 5.17 | 5.23 | 5.14 | +| 128 | 64 | 10.19 | 10.20 | 10.13 | +| 128 | 128 | 21.23 | 21.30 | 20.96 | +| 384 | 1 | 1.13 | 1.13 | 1.13 | +| 384 | 2 | 1.65 | 1.65 | 1.64 | +| 384 | 4 | 2.53 | 2.53 | 2.52 | +| 384 | 8 | 4.99 | 5.00 | 4.98 | +| 384 | 12 | 6.55 | 6.55 | 6.50 | +| 384 | 16 | 8.55 | 8.56 | 8.50 | +| 384 | 24 | 12.72 | 12.73 | 12.68 | +| 384 | 32 | 16.78 | 16.85 | 16.67 | +| 384 | 64 | 36.48 | 36.55 | 35.85 | +| 384 | 128 | 78.19 | 79.69 | 76.16 | + diff --git a/demo/BERT/builder.py b/demo/BERT/builder.py index c5f21b0a..90060ed2 100755 --- a/demo/BERT/builder.py +++ b/demo/BERT/builder.py @@ -503,7 +503,7 @@ def main(): parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False) parser.add_argument("-i", "--int8", action="store_true", help="Indicates that inference should be run in INT8 precision", required=False) parser.add_argument("-t", "--strict", action="store_true", help="Indicates that inference should be run in strict precision mode", required=False) - parser.add_argument("-w", "--workspace-size", default=1200, help="Workspace size in MiB for building the BERT engine", type=int) + parser.add_argument("-w", "--workspace-size", default=2500, help="Workspace size in MiB for building the BERT engine", type=int) parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False) parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False) parser.add_argument("-n", "--calib-num", default=100, help="calibration batch numbers", type=int) diff --git a/demo/BERT/builder_varseqlen.py b/demo/BERT/builder_varseqlen.py index 66a9d571..33dd5060 100755 --- a/demo/BERT/builder_varseqlen.py +++ b/demo/BERT/builder_varseqlen.py @@ -471,7 +471,7 @@ def main(): help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google") parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False) parser.add_argument("-i", "--int8", action="store_true", help="Indicates that inference should be run in INT8 precision", required=False) - parser.add_argument("-w", "--workspace-size", default=1200, help="Workspace size in MiB for building the BERT engine", type=int) + parser.add_argument("-w", "--workspace-size", default=2500, help="Workspace size in MiB for building the BERT engine", type=int) parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False) parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False) parser.add_argument("-n", "--calib-num", default=100, help="calibration batch numbers", type=int) diff --git a/demo/BERT/notebooks/benchmark.ipynb b/demo/BERT/notebooks/benchmark.ipynb index d09ec429..442b28f5 100755 --- a/demo/BERT/notebooks/benchmark.ipynb +++ b/demo/BERT/notebooks/benchmark.ipynb @@ -145,9 +145,10 @@ " bench_times = {}\n", " stream = cuda.Stream()\n", "\n", + " tensor_name = engine.get_tensor_name(engine.num_io_tensors-1)\n", " for idx, batch_size in enumerate(sorted(args.batch_size)):\n", " for idx in range(engine.num_optimization_profiles):\n", - " profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * engine.num_io_tensors)\n", + " profile_shape = engine.get_tensor_profile_shape(name = tensor_name, profile_index = idx)\n", " if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n", " context.set_optimization_profile_async(idx, stream.handle)\n", " binding_idx_offset = idx * engine.num_io_tensors\n", @@ -233,9 +234,10 @@ " bench_times = {}\n", " stream = cuda.Stream()\n", "\n", + " tensor_name = engine.get_tensor_name(engine.num_io_tensors - 1)\n", " for idx, batch_size in enumerate(sorted(args.batch_size)):\n", " for idx in range(engine.num_optimization_profiles):\n", - " profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * engine.num_io_tensors)\n", + " profile_shape = engine.get_tensor_profile_shape(name = tensor_name, profile_index = idx)\n", " if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n", " context.set_optimization_profile_async(idx, stream.handle)\n", " binding_idx_offset = idx * engine.num_io_tensors\n", diff --git a/demo/DeBERTa/README.md b/demo/DeBERTa/README.md index 5623fbea..202ba16f 100644 --- a/demo/DeBERTa/README.md +++ b/demo/DeBERTa/README.md @@ -75,7 +75,7 @@ Note that the performance gap between BERT's self-attention and DeBERTa's disent ## Environment Setup It is recommended to use docker for reproducing the following steps. Follow the setup steps in TensorRT OSS [README](https://github.com/NVIDIA/TensorRT#setting-up-the-build-environment) to build and launch the container and build OSS: -**Example: Ubuntu 20.04 on x86-64 with cuda-11.6.2 (default)** +**Example: Ubuntu 20.04 on x86-64 with cuda-12.4 (default)** ```bash # Download this TensorRT OSS repo git clone -b main https://github.com/nvidia/TensorRT TensorRT @@ -84,10 +84,10 @@ git submodule update --init --recursive ## at root of TensorRT OSS # build container -./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda11.6 +./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.4 # launch container -./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda11.6 --gpus all +./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.4 --gpus all ## now inside container # build OSS (only required for pre-8.4.3 TensorRT versions) diff --git a/demo/Diffusion/README.md b/demo/Diffusion/README.md index 42949381..0da43474 100644 --- a/demo/Diffusion/README.md +++ b/demo/Diffusion/README.md @@ -7,7 +7,7 @@ This demo application ("demoDiffusion") showcases the acceleration of Stable Dif ### Clone the TensorRT OSS repository ```bash -git clone git@github.com:NVIDIA/TensorRT.git -b release/10.0 --single-branch +git clone git@github.com:NVIDIA/TensorRT.git -b release/10.1 --single-branch cd TensorRT ``` @@ -16,7 +16,7 @@ cd TensorRT Install nvidia-docker using [these intructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker). ```bash -docker run --rm -it --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:24.01-py3 /bin/bash +docker run --rm -it --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:24.05-py3 /bin/bash ``` NOTE: The demo supports CUDA>=11.8 @@ -48,12 +48,12 @@ onnx 1.15.0 onnx-graphsurgeon 0.5.2 onnxruntime 1.16.3 polygraphy 0.49.9 -tensorrt 10.0.1.6 +tensorrt 10.1.0.27 tokenizers 0.13.3 torch 2.2.0 transformers 4.33.1 controlnet-aux 0.0.6 -nvidia-ammo 0.9.4 +nvidia-modelopt 0.11.2 ``` > NOTE: optionally install HuggingFace [accelerate](https://pypi.org/project/accelerate/) package for faster and less memory-intense model loading. @@ -139,13 +139,15 @@ python3 demo_txt2img_xl.py "a photo of an astronaut riding a horse on mars" --hf python3 demo_txt2img_xl.py "Picture of a rustic Italian village with Olive trees and mountains" --version=xl-1.0 --lora-path "ostris/crayon_style_lora_sdxl" "ostris/watercolor_style_lora_sdxl" --lora-scale 0.3 0.7 --onnx-dir onnx-sdxl-lora --engine-dir engine-sdxl-lora --build-enable-refit ``` -### Faster Text-to-image using SDXL & INT8 quantization using AMMO +### Faster Text-to-image using SDXL & INT8 quantization using ModelOpt ```bash python3 demo_txt2img_xl.py "a photo of an astronaut riding a horse on mars" --version xl-1.0 --onnx-dir onnx-sdxl --engine-dir engine-sdxl --int8 ``` > Note that INT8 quantization is only supported for SDXL, and won't work with LoRA weights. Some prompts may produce better inputs with fewer denoising steps (e.g. `--denoising-steps 20`) but this will repeat the calibration, ONNX export, and engine building processes for the U-Net. +For step-by-step tutorials to run INT8 inference on stable diffusion models, please refer to examples in [TensorRT ModelOpt diffusers sample](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/diffusers). + ### Faster Text-to-Image using SDXL + LCM (Latent Consistency Model) LoRA weights [LCM-LoRA](https://arxiv.org/abs/2311.05556) produces good quality images in 4 to 8 denoising steps instead of 30+ needed base model. Note that we use LCM scheduler and disable classifier-free-guidance by setting `--guidance-scale` to 0. LoRA weights are fused into the ONNX and finalized TensorRT plan files in this example. @@ -158,6 +160,24 @@ Even faster image generation than LCM, producing coherent images in just 1 step. python3 demo_txt2img_xl.py "Einstein" --version xl-turbo --onnx-dir onnx-sdxl-turbo --engine-dir engine-sdxl-turbo --denoising-steps 1 --scheduler EulerA --guidance-scale 0.0 --width 512 --height 512 ``` +### Generate an image guided by a text prompt using Stable Diffusion 3 + +Run the command below to generate an image using Stable Diffusion 3 + +```bash +python3 demo_txt2img_sd3.py "A vibrant street wall covered in colorful graffiti, the centerpiece spells \"SD3 MEDIUM\", in a storm of colors" --version sd3 --hf-token=$HF_TOKEN +``` + +You can also specify an input image conditioning as shown below + +```bash +wget https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png -O dog-on-bench.png + +python3 demo_txt2img_sd3.py "dog wearing a sweater and a blue collar" --version sd3 --input-image dog-on-bench.png --hf-token=$HF_TOKEN +``` + +Note that a denosing-percentage is applied to the number of denoising-steps when an input image conditioning is provided. Its default value is set to 0.6. This parameter can be updated using `--denoising-percentage` + ## Configuration options - Noise scheduler can be set using `--scheduler `. Note: not all schedulers are available for every version. - To accelerate engine building time use `--timing-cache `. The cache file will be created if it does not already exist. Note that performance may degrade if cache files are used across multiple GPU targets. It is recommended to use timing caches only during development. To achieve the best perfromance in deployment, please build engines without timing cache. diff --git a/demo/Diffusion/demo_txt2img_sd3.py b/demo/Diffusion/demo_txt2img_sd3.py new file mode 100644 index 00000000..197de964 --- /dev/null +++ b/demo/Diffusion/demo_txt2img_sd3.py @@ -0,0 +1,153 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +from PIL import Image +from cuda import cudart + +from stable_diffusion_3_pipeline import StableDiffusion3Pipeline +from utilities import PIPELINE_TYPE +from utils_sd3.other_impls import preprocess_image_sd3 + +def add_arguments(parser): + # Stable Diffusion configuration + parser.add_argument('--version', type=str, default="sd3", choices=["sd3"], help="Version of Stable Diffusion") + parser.add_argument('prompt', nargs = '*', help="Text prompt(s) to guide image generation") + parser.add_argument('--negative-prompt', nargs = '*', default=[''], help="The negative prompt(s) to guide the image generation.") + parser.add_argument('--batch-size', type=int, default=1, choices=[1, 2, 4], help="Batch size (repeat prompt)") + parser.add_argument('--batch-count', type=int, default=1, help="Number of images to generate in sequence, one at a time.") + parser.add_argument('--height', type=int, default=1024, help="Height of image to generate (must be multiple of 8)") + parser.add_argument('--width', type=int, default=1024, help="Height of image to generate (must be multiple of 8)") + parser.add_argument('--shift', type=int, default=1.0, help="Shift parameter for SD3") + parser.add_argument('--cfg-scale', type=int, default=5, help="CFG Scale for SD3") + parser.add_argument('--denoising-steps', type=int, default=50, help="Number of denoising steps") + parser.add_argument('--denoising-percentage', type=float, default=0.6, help="Percentage of denoising steps to run. This parameter is only used if input-image is provided") + parser.add_argument('--input-image', type=str, default="", help="Path to the input image") + + # ONNX export + parser.add_argument('--onnx-opset', type=int, default=19, choices=range(7,20), help="Select ONNX opset version to target for exported models") + parser.add_argument('--onnx-dir', default='onnx', help="Output directory for ONNX export") + + # Framework model ckpt + parser.add_argument('--framework-model-dir', default='pytorch_model', help="Directory for HF saved models") + + # TensorRT engine build + parser.add_argument('--engine-dir', default='engine', help="Output directory for TensorRT engines") + parser.add_argument('--build-static-batch', action='store_true', help="Build TensorRT engines with fixed batch size.") + parser.add_argument('--build-dynamic-shape', action='store_true', help="Build TensorRT engines with dynamic image shapes.") + parser.add_argument('--build-all-tactics', action='store_true', help="Build TensorRT engines using all tactic sources.") + parser.add_argument('--timing-cache', default=None, type=str, help="Path to the precached timing measurements to accelerate build.") + + # TensorRT inference + parser.add_argument('--num-warmup-runs', type=int, default=5, help="Number of warmup runs before benchmarking performance") + parser.add_argument('--use-cuda-graph', action='store_true', help="Enable cuda graph") + parser.add_argument('--nvtx-profile', action='store_true', help="Enable NVTX markers for performance profiling") + parser.add_argument('--torch-inference', default='', help="Run inference with PyTorch (using specified compilation mode) instead of TensorRT.") + + parser.add_argument('--seed', type=int, default=None, help="Seed for random generator to get consistent results") + parser.add_argument('--output-dir', default='output', help="Output directory for logs and image artifacts") + parser.add_argument('--hf-token', type=str, help="HuggingFace API access token for downloading model checkpoints") + parser.add_argument('-v', '--verbose', action='store_true', help="Show verbose output") + return parser + +def process_pipeline_args(args): + if args.height % 8 != 0 or args.width % 8 != 0: + raise ValueError(f"Image height and width have to be divisible by 8 but specified as: {args.image_height} and {args.width}.") + + max_batch_size = 4 + if args.batch_size > max_batch_size: + raise ValueError(f"Batch size {args.batch_size} is larger than allowed {max_batch_size}.") + + if args.use_cuda_graph and (not args.build_static_batch or args.build_dynamic_shape): + raise ValueError(f"Using CUDA graph requires static dimensions. Enable `--build-static-batch` and do not specify `--build-dynamic-shape`") + + input_image = None + if args.input_image: + input_image = Image.open(args.input_image) + + image_width, image_height = input_image.size + if image_height != args.height or image_width != args.width: + print(f"[I] Resizing input_image to {args.height}x{args.width}") + input_image = input_image.resize((args.height, args.width), Image.LANCZOS) + image_height, image_width = args.height, args.width + + input_image = preprocess_image_sd3(input_image) + + kwargs_init_pipeline = { + 'version': args.version, + 'max_batch_size': max_batch_size, + 'output_dir': args.output_dir, + 'hf_token': args.hf_token, + 'verbose': args.verbose, + 'nvtx_profile': args.nvtx_profile, + 'use_cuda_graph': args.use_cuda_graph, + 'framework_model_dir': args.framework_model_dir, + 'torch_inference': args.torch_inference, + 'shift': args.shift, + 'cfg_scale': args.cfg_scale, + 'denoising_steps': args.denoising_steps, + 'denoising_percentage': args.denoising_percentage, + 'input_image': input_image + } + + kwargs_load_engine = { + 'onnx_opset': args.onnx_opset, + 'opt_batch_size': args.batch_size, + 'opt_image_height': args.height, + 'opt_image_width': args.width, + 'static_batch': args.build_static_batch, + 'static_shape': not args.build_dynamic_shape, + 'enable_all_tactics': args.build_all_tactics, + 'timing_cache': args.timing_cache, + } + + args_run_demo = (args.prompt, args.negative_prompt, args.height, args.width, args.batch_size, args.batch_count, args.num_warmup_runs, args.use_cuda_graph) + + return kwargs_init_pipeline, kwargs_load_engine, args_run_demo + +def parseArgs(): + parser = argparse.ArgumentParser(description="Options for Stable Diffusion 3 Demo") + parser = add_arguments(parser) + return parser.parse_args() + +if __name__ == "__main__": + print("[I] Initializing Stable Diffusion 3 demo using TensorRT") + args = parseArgs() + + kwargs_init_pipeline, kwargs_load_engine, args_run_demo = process_pipeline_args(args) + + # Initialize demo + demo = StableDiffusion3Pipeline( + pipeline_type=PIPELINE_TYPE.TXT2IMG, + **kwargs_init_pipeline) + + # Load TensorRT engines and pytorch modules + demo.loadEngines( + args.engine_dir, + args.framework_model_dir, + args.onnx_dir, + **kwargs_load_engine) + + # Load resources + _, shared_device_memory = cudart.cudaMalloc(demo.calculateMaxDeviceMemory()) + demo.activateEngines(shared_device_memory) + demo.loadResources(args.height, args.width, args.batch_size, args.seed) + + # Run inference + demo.run(*args_run_demo) + + demo.teardown() diff --git a/demo/Diffusion/models.py b/demo/Diffusion/models.py index b48028ff..162eb6ad 100644 --- a/demo/Diffusion/models.py +++ b/demo/Diffusion/models.py @@ -27,6 +27,7 @@ import onnx from onnx import numpy_helper, shape_inference import onnx_graphsurgeon as gs +from safetensors import safe_open import os from polygraphy.backend.onnx.loader import fold_constants import re @@ -38,7 +39,11 @@ CLIPTextModelWithProjection, CLIPTokenizer ) +from huggingface_hub import hf_hub_download from utilities import merge_loras +from utils_sd3.sd3_impls import BaseModel as BaseModelSD3 +from utils_sd3.sd3_impls import SDVAE +from utils_sd3.other_impls import load_into, SDClipModel, SDXLClipG, T5XXLModel class Optimizer(): def __init__( @@ -160,7 +165,6 @@ def fuse_mha_qkv_int8_sq(self): print(f"Removed {removed} QDQ nodes") return removed # expected 72 for L2.5 - def get_path(version, pipeline, controlnets=None): if controlnets is not None: return ["lllyasviel/sd-controlnet-" + modality for modality in controlnets] @@ -203,6 +207,8 @@ def get_path(version, pipeline, controlnets=None): return "stabilityai/sdxl-turbo" else: raise ValueError(f"Unsupported SDXL Turbo pipeline {pipeline.name}") + elif version == 'sd3': + return "stabilityai/stable-diffusion-3-medium" else: raise ValueError(f"Incorrect version {version}") @@ -213,6 +219,8 @@ def get_clip_embedding_dim(version, pipeline): return 1024 elif version in ("xl-1.0", "xl-turbo") and pipeline.is_sd_xl_base(): return 768 + elif version in ("sd3"): + return 4096 else: raise ValueError(f"Invalid version {version} + pipeline {pipeline}") @@ -343,6 +351,12 @@ def get_pipeline(self): **model_opts, ).to(self.device) + def get_model_path(self, model_dir, model_opts, model_name="diffusion_pytorch_model"): + variant = "." + model_opts.get("variant") if "variant" in model_opts else "" + suffix = ".safetensors" if self.hf_safetensor else ".bin" + model_file = model_name + variant + suffix + return os.path.join(model_dir, model_file) + def get_model(self, torch_inference=''): pass @@ -641,6 +655,126 @@ def get_shape_dict(self, batch_size, image_height, image_width): return output +class SD3_CLIPGModel(CLIPModel): + def __init__(self, + version, + pipeline, + device, + hf_token, + verbose, + framework_model_dir, + max_batch_size, + embedding_dim, + fp16=False, + ): + super(SD3_CLIPGModel, self).__init__(version, pipeline, device=device, hf_token=hf_token, verbose=verbose, framework_model_dir=framework_model_dir, fp16=fp16, max_batch_size=max_batch_size, embedding_dim=embedding_dim) + self.CLIPG_CONFIG = { + "hidden_act": "gelu", + "hidden_size": 1280, + "intermediate_size": 5120, + "num_attention_heads": 20, + "num_hidden_layers": 32 + } + self.subfolder = 'text_encoders' + + def get_model(self, torch_inference=''): + clip_g_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) + clip_g_filename="clip_g.safetensors" + clip_g_model_path = f"{clip_g_model_dir}/{clip_g_filename}" + if not os.path.exists(clip_g_model_path): + hf_hub_download( + repo_id=self.path, + filename=clip_g_filename, + local_dir=get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, '', torch_inference), + subfolder=self.subfolder + ) + with safe_open(clip_g_model_path, framework="pt", device=self.device) as f: + dtype = torch.float16 if self.fp16 else torch.float32 + model = SDXLClipG(self.CLIPG_CONFIG, device=self.device, dtype=dtype) + load_into(f, model.transformer, "", self.device, dtype) + model = optimize_checkpoint(model, torch_inference) + return model + +class SD3_CLIPLModel(CLIPModel): + def __init__(self, + version, + pipeline, + device, + hf_token, + verbose, + framework_model_dir, + max_batch_size, + embedding_dim, + fp16=False, + ): + super(SD3_CLIPLModel, self).__init__(version, pipeline, device=device, hf_token=hf_token, verbose=verbose, framework_model_dir=framework_model_dir, fp16=fp16, max_batch_size=max_batch_size, embedding_dim=embedding_dim) + self.CLIPL_CONFIG = { + "hidden_act": "quick_gelu", + "hidden_size": 768, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12 + } + self.subfolder = 'text_encoders' + + def get_model(self, torch_inference=''): + clip_l_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) + clip_l_filename="clip_l.safetensors" + clip_l_model_path = f"{clip_l_model_dir}/{clip_l_filename}" + if not os.path.exists(clip_l_model_path): + hf_hub_download( + repo_id=self.path, + filename=clip_l_filename, + local_dir=get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, '', torch_inference), + subfolder=self.subfolder + ) + with safe_open(clip_l_model_path, framework="pt", device=self.device) as f: + dtype = torch.float16 if self.fp16 else torch.float32 + model = SDClipModel(layer="hidden", layer_idx=-2, device=self.device, dtype=dtype, layer_norm_hidden_state=False, return_projected_pooled=False, textmodel_json_config=self.CLIPL_CONFIG) + load_into(f, model.transformer, "", self.device, dtype) + model = optimize_checkpoint(model, torch_inference) + return model + +class SD3_T5XXLModel(CLIPModel): + def __init__(self, + version, + pipeline, + device, + hf_token, + verbose, + framework_model_dir, + max_batch_size, + embedding_dim, + fp16=False, + ): + super(SD3_T5XXLModel, self).__init__(version, pipeline, device=device, hf_token=hf_token, verbose=verbose, framework_model_dir=framework_model_dir, fp16=fp16, max_batch_size=max_batch_size, embedding_dim=embedding_dim) + self.T5_CONFIG = { + "d_ff": 10240, + "d_model": 4096, + "num_heads": 64, + "num_layers": 24, + "vocab_size": 32128 + } + self.subfolder = 'text_encoders' + + def get_model(self, torch_inference=''): + t5xxl_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) + t5xxl_filename="t5xxl_fp16.safetensors" + t5xxl_model_path = f"{t5xxl_model_dir}/{t5xxl_filename}" + if not os.path.exists(t5xxl_model_path): + hf_hub_download( + repo_id=self.path, + filename=t5xxl_filename, + local_dir=get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, '', torch_inference), + subfolder=self.subfolder + ) + with safe_open(t5xxl_model_path, framework="pt", device=self.device) as f: + dtype = torch.float16 if self.fp16 else torch.float32 + model = T5XXLModel(self.T5_CONFIG, device=self.device, dtype=dtype) + load_into(f, model.transformer, "", self.device, dtype) + model = optimize_checkpoint(model, torch_inference) + return model + class UNet2DConditionControlNetModel(torch.nn.Module): def __init__(self, unet, controlnets) -> None: super().__init__() @@ -725,16 +859,17 @@ def get_model(self, torch_inference=''): model = UNet2DConditionControlNetModel(unet_model, controlnets) else: unet_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) - if not os.path.exists(unet_model_dir): + unet_path = self.get_model_path(unet_model_dir, model_opts) + if not os.path.exists(unet_path): model = UNet2DConditionModel.from_pretrained(self.path, subfolder=self.subfolder, use_safetensors=self.hf_safetensor, use_auth_token=self.hf_token, **model_opts).to(self.device) - model.save_pretrained(unet_model_dir) + model.save_pretrained(unet_model_dir, **model_opts) else: - print(f"[I] Load UNet pytorch model from: {unet_model_dir}") - model = UNet2DConditionModel.from_pretrained(unet_model_dir).to(self.device) + print(f"[I] Load UNet pytorch model from: {unet_path}") + model = UNet2DConditionModel.from_pretrained(unet_model_dir, **model_opts).to(self.device) if torch_inference: model.to(memory_format=torch.channels_last) model = optimize_checkpoint(model, torch_inference) @@ -862,7 +997,8 @@ def __init__(self, def get_model(self, torch_inference=''): model_opts = {'variant': 'fp16', 'torch_dtype': torch.float16} if self.fp16 else {} unet_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) - if not os.path.exists(unet_model_dir): + unet_path = self.get_model_path(unet_model_dir, model_opts) + if not os.path.exists(unet_path): model = UNet2DConditionModel.from_pretrained(self.path, subfolder=self.subfolder, use_safetensors=self.hf_safetensor, @@ -871,11 +1007,10 @@ def get_model(self, torch_inference=''): # Use default attention processor for ONNX export if not torch_inference: model.set_default_attn_processor() - model.save_pretrained(unet_model_dir) + model.save_pretrained(unet_model_dir, **model_opts) else: - print(f"[I] Load UNet pytorch model from: {unet_model_dir}") - model_load_opts = {'torch_dtype': torch.float16} if self.fp16 else {} - model = UNet2DConditionModel.from_pretrained(unet_model_dir, **model_load_opts).to(self.device) + print(f"[I] Load UNet pytorch model from: {unet_path}") + model = UNet2DConditionModel.from_pretrained(unet_model_dir, **model_opts).to(self.device) model = optimize_checkpoint(model, torch_inference) return model @@ -944,6 +1079,87 @@ def get_sample_input(self, batch_size, image_height, image_width, static_shape): def optimize(self, onnx_graph): return super().optimize(onnx_graph, fuse_mha_qkv_int8=True) +class SD3_MMDiTModel(BaseModel): + def __init__(self, + version, + pipeline, + device, + hf_token, + verbose, + framework_model_dir, + shift=1.0, + fp16 = False, + max_batch_size = 16, + text_maxlen = 77, + ): + + super(SD3_MMDiTModel, self).__init__(version, pipeline, device=device, hf_token=hf_token, verbose=verbose, framework_model_dir=framework_model_dir, fp16=fp16, max_batch_size=max_batch_size, text_maxlen=text_maxlen) + self.subfolder = 'sd3' + self.mmdit_dim = 16 + self.shift = shift + self.xB = 2 + + def get_model(self, torch_inference=''): + sd3_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) + sd3_filename="sd3_medium.safetensors" + sd3_model_path = f"{sd3_model_dir}/{sd3_filename}" + if not os.path.exists(sd3_model_path): + hf_hub_download(repo_id=self.path, filename=sd3_filename, local_dir=sd3_model_dir) + with safe_open(sd3_model_path, framework="pt", device=self.device) as f: + model = BaseModelSD3(shift=self.shift, file=f, prefix="model.diffusion_model.", device=self.device, dtype=torch.float16).eval() + load_into(f, model, "model.", self.device, torch.float16) + model = optimize_checkpoint(model, torch_inference) + return model + + def get_input_names(self): + return ['sample', 'sigma', 'c_crossattn', 'y'] + + def get_output_names(self): + return ['latent'] + + def get_dynamic_axes(self): + xB = '2B' if self.xB == 2 else 'B' + return { + 'sample': {0: xB, 2: 'H', 3: 'W'}, + 'sigma': {0: xB}, + 'c_crossattn': {0: xB}, + 'y': {0: xB}, + 'latent': {0: xB, 2: 'H', 3: 'W'} + } + + def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_shape): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + min_batch, max_batch, _, _, _, _, min_latent_height, max_latent_height, min_latent_width, max_latent_width = \ + self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_shape) + return { + 'sample': [(self.xB*min_batch, self.mmdit_dim, min_latent_height, min_latent_width), (self.xB*batch_size, self.mmdit_dim, latent_height, latent_width), (self.xB*max_batch, self.mmdit_dim, max_latent_height, max_latent_width)], + 'sigma': [(self.xB*min_batch,), (self.xB*batch_size,), (self.xB*max_batch,)], + 'c_crossattn': [(self.xB*min_batch, 154, 4096), (self.xB*batch_size, 154, 4096), (self.xB*max_batch, 154, 4096)], + 'y': [(self.xB*min_batch, 2048), (self.xB*batch_size, 2048), (self.xB*max_batch, 2048)] + } + + def get_shape_dict(self, batch_size, image_height, image_width): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + return { + 'sample': (self.xB*batch_size, self.mmdit_dim, latent_height, latent_width), + 'sigma': (self.xB*batch_size,), + 'c_crossattn': (self.xB*batch_size, 154, 4096), + 'y': (self.xB*batch_size, 2048), + 'latent': (self.xB*batch_size, self.mmdit_dim, latent_height, latent_width) + } + + def get_sample_input(self, batch_size, image_height, image_width, static_shape): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + dtype = torch.float16 if self.fp16 else torch.float32 + return ( + torch.randn(batch_size, self.mmdit_dim, latent_height, latent_width, dtype=dtype, device=self.device), + torch.randn(batch_size, dtype=dtype, device=self.device), + { + 'c_crossattn': torch.randn(batch_size, 154, 4096, dtype=dtype, device=self.device), + 'y': torch.randn(batch_size, 2048, dtype=dtype, device=self.device), + } + ) + class VAEModel(BaseModel): def __init__(self, version, @@ -1004,6 +1220,56 @@ def get_sample_input(self, batch_size, image_height, image_width, static_shape): latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) return torch.randn(batch_size, 4, latent_height, latent_width, dtype=torch.float32, device=self.device) +class SD3_VAEDecoderModel(VAEModel): + def __init__(self, + version, + pipeline, + device, + hf_token, + verbose, + framework_model_dir, + max_batch_size, + fp16=False, + ): + super(SD3_VAEDecoderModel, self).__init__(version, pipeline, device=device, hf_token=hf_token, verbose=verbose, framework_model_dir=framework_model_dir, fp16=fp16, max_batch_size=max_batch_size) + self.subfolder = 'sd3' + + def get_model(self, torch_inference=''): + dtype = torch.float16 if self.fp16 else torch.float32 + sd3_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) + sd3_filename="sd3_medium.safetensors" + sd3_model_path = f"{sd3_model_dir}/{sd3_filename}" + if not os.path.exists(sd3_model_path): + hf_hub_download(repo_id=self.path, filename=sd3_filename, local_dir=sd3_model_dir) + with safe_open(sd3_model_path, framework="pt", device=self.device) as f: + model = SDVAE(device=self.device, dtype=dtype).eval().cuda() + prefix = "" + if any(k.startswith("first_stage_model.") for k in f.keys()): + prefix = "first_stage_model." + load_into(f, model, prefix, self.device, dtype) + model.forward = model.decode + model = optimize_checkpoint(model, torch_inference) + return model + + def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_shape): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + min_batch, max_batch, _, _, _, _, min_latent_height, max_latent_height, min_latent_width, max_latent_width = \ + self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_shape) + return { + 'latent': [(min_batch, 16, min_latent_height, min_latent_width), (batch_size, 16, latent_height, latent_width), (max_batch, 16, max_latent_height, max_latent_width)] + } + + def get_shape_dict(self, batch_size, image_height, image_width): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + return { + 'latent': (batch_size, 16, latent_height, latent_width), + 'images': (batch_size, 3, image_height, image_width) + } + + def get_sample_input(self, batch_size, image_height, image_width, static_shape): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + dtype = torch.float16 if self.fp16 else torch.float32 + return torch.randn(batch_size, 16, latent_height, latent_width, dtype=dtype, device=self.device) class TorchVAEEncoder(torch.nn.Module): def __init__(self, version, pipeline, hf_token, device, path, framework_model_dir, hf_safetensor=False): @@ -1075,6 +1341,54 @@ def get_sample_input(self, batch_size, image_height, image_width, static_shape): self.check_dims(batch_size, image_height, image_width) return torch.randn(batch_size, 3, image_height, image_width, dtype=torch.float32, device=self.device) +class SD3_VAEEncoderModel(VAEEncoderModel): + def __init__(self, + version, + pipeline, + device, + hf_token, + verbose, + framework_model_dir, + max_batch_size, + fp16=False, + ): + super(SD3_VAEEncoderModel, self).__init__(version, pipeline, device=device, hf_token=hf_token, verbose=verbose, framework_model_dir=framework_model_dir, fp16=fp16, max_batch_size=max_batch_size) + self.subfolder = 'sd3' + + def get_model(self, torch_inference=''): + dtype = torch.float16 if self.fp16 else torch.float32 + sd3_model_dir = get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder, torch_inference) + sd3_filename="sd3_medium.safetensors" + sd3_model_path = f"{sd3_model_dir}/{sd3_filename}" + if not os.path.exists(sd3_model_path): + hf_hub_download(repo_id=self.path, filename=sd3_filename, local_dir=sd3_model_dir) + with safe_open(sd3_model_path, framework="pt", device=self.device) as f: + model = SDVAE(device=self.device, dtype=dtype).eval().cuda() + prefix = "" + if any(k.startswith("first_stage_model.") for k in f.keys()): + prefix = "first_stage_model." + load_into(f, model, prefix, self.device, dtype) + model.forward = model.encode + model = optimize_checkpoint(model, torch_inference) + return model + + def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_shape): + min_batch, max_batch, _, _, _, _, _, _, _, _ = \ + self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_shape) + return { + 'images': [(min_batch, 3, image_height, image_width), (batch_size, 3, image_height, image_width), (max_batch, 3, image_height, image_width)] + } + + def get_shape_dict(self, batch_size, image_height, image_width): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + return { + 'images': (batch_size, 3, image_height, image_width), + 'latent': (batch_size, 16, latent_height, latent_width) + } + + def get_sample_input(self, batch_size, image_height, image_width, static_shape): + dtype = torch.float16 if self.fp16 else torch.float32 + return torch.randn(batch_size, 3, image_height, image_width, dtype=dtype, device=self.device) def make_tokenizer(version, pipeline, hf_token, framework_model_dir, subfolder="tokenizer", **kwargs): tokenizer_model_dir = get_checkpoint_dir(framework_model_dir, version, pipeline.name, subfolder, '') diff --git a/demo/Diffusion/requirements.txt b/demo/Diffusion/requirements.txt index 5fa939ec..8874c661 100644 --- a/demo/Diffusion/requirements.txt +++ b/demo/Diffusion/requirements.txt @@ -10,6 +10,8 @@ onnxruntime==1.16.3 opencv-python==4.8.0.74 scipy transformers==4.33.1 -nvidia-ammo==0.9.4 +--extra-index-url https://pypi.nvidia.com +nvidia-modelopt==0.11.2 onnx-graphsurgeon polygraphy==0.49.9 +sentencepiece diff --git a/demo/Diffusion/stable_diffusion_3_pipeline.py b/demo/Diffusion/stable_diffusion_3_pipeline.py new file mode 100644 index 00000000..33059ea4 --- /dev/null +++ b/demo/Diffusion/stable_diffusion_3_pipeline.py @@ -0,0 +1,592 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cuda import cudart +from models import ( + get_clip_embedding_dim, + SD3_CLIPGModel, + SD3_CLIPLModel, + SD3_T5XXLModel, + SD3_MMDiTModel, + SD3_VAEEncoderModel, + SD3_VAEDecoderModel +) +import nvtx +import os +import math +import pathlib +import tensorrt as trt +import time +import torch +from utilities import ( + PIPELINE_TYPE, + TRT_LOGGER, + Engine, + save_image, +) +from utils_sd3.other_impls import SD3Tokenizer +from utils_sd3.sd3_impls import SD3LatentFormat, sample_euler + +class StableDiffusion3Pipeline: + """ + Application showcasing the acceleration of Stable Diffusion 3 pipelines using NVidia TensorRT. + """ + def __init__( + self, + version='sd3', + pipeline_type=PIPELINE_TYPE.TXT2IMG, + max_batch_size=16, + shift=1.0, + cfg_scale=5, + denoising_steps=50, + denoising_percentage=0.6, + input_image=None, + device='cuda', + output_dir='.', + hf_token=None, + verbose=False, + nvtx_profile=False, + use_cuda_graph=False, + framework_model_dir='pytorch_model', + torch_inference='', + ): + """ + Initializes the Stable Diffusion 3 pipeline. + + Args: + version (str): + The version of the pipeline. Should be one of ['sd3] + pipeline_type (PIPELINE_TYPE): + Type of current pipeline. + max_batch_size (int): + Maximum batch size for dynamic batch engine. + shift (float): + Shift parameter for MMDiT model. Default: 1.0 + cfg_scale (int): + CFG Scale used for denoising. Default: 5 + denoising_steps (int): + Number of denoising steps. Default: 1.0 + denoising_percentage (float): + Denoising percentage. Default: 0.6 + input_image (float): + Input image for conditioning. Default: None + device (str): + PyTorch device to run inference. Default: 'cuda' + output_dir (str): + Output directory for log files and image artifacts + hf_token (str): + HuggingFace User Access Token to use for downloading Stable Diffusion model checkpoints. + verbose (bool): + Enable verbose logging. + nvtx_profile (bool): + Insert NVTX profiling markers. + use_cuda_graph (bool): + Use CUDA graph to capture engine execution and then launch inference + framework_model_dir (str): + cache directory for framework checkpoints + torch_inference (str): + Run inference with PyTorch (using specified compilation mode) instead of TensorRT. + """ + + self.max_batch_size = max_batch_size + self.shift = shift + self.cfg_scale = cfg_scale + self.denoising_steps = denoising_steps + self.input_image = input_image + self.denoising_percentage = denoising_percentage if input_image is not None else 1.0 + + self.framework_model_dir = framework_model_dir + self.output_dir = output_dir + for directory in [self.framework_model_dir, self.output_dir]: + if not os.path.exists(directory): + print(f"[I] Create directory: {directory}") + pathlib.Path(directory).mkdir(parents=True) + + self.hf_token = hf_token + self.device = device + self.verbose = verbose + self.nvtx_profile = nvtx_profile + + self.version = version + + # Pipeline type + self.pipeline_type = pipeline_type + self.stages = ['clip_g', 'clip_l', 't5xxl', 'mmdit', 'vae_decoder'] + if input_image is not None: + self.stages += ['vae_encoder'] + + self.config = {} + self.config['clip_hidden_states'] = True + self.torch_inference = torch_inference + if self.torch_inference: + torch._inductor.config.conv_1x1_as_mm = True + torch._inductor.config.coordinate_descent_tuning = True + torch._inductor.config.epilogue_fusion = False + torch._inductor.config.coordinate_descent_check_all_directions = True + self.use_cuda_graph = use_cuda_graph + + # initialized in loadEngines() + self.models = {} + self.torch_models = {} + self.engine = {} + self.shared_device_memory = None + + # initialized in loadResources() + self.events = {} + self.generator = None + self.markers = {} + self.seed = None + self.stream = None + self.tokenizer = None + + def loadResources(self, image_height, image_width, batch_size, seed): + # Initialize noise generator + if seed: + self.seed = seed + self.generator = torch.Generator(device="cuda").manual_seed(seed) + + # Create CUDA events and stream + for stage in ['clip_g', 'clip_l', 't5xxl', 'denoise', 'vae_encode', 'vae_decode']: + self.events[stage] = [cudart.cudaEventCreate()[1], cudart.cudaEventCreate()[1]] + self.stream = cudart.cudaStreamCreate()[1] + + # Allocate TensorRT I/O buffers + if not self.torch_inference: + for model_name, obj in self.models.items(): + if self.torch_fallback[model_name]: + continue + self.engine[model_name].allocate_buffers(shape_dict=obj.get_shape_dict(batch_size, image_height, image_width), device=self.device) + + def teardown(self): + for e in self.events.values(): + cudart.cudaEventDestroy(e[0]) + cudart.cudaEventDestroy(e[1]) + + for engine in self.engine.values(): + del engine + + if self.shared_device_memory: + cudart.cudaFree(self.shared_device_memory) + + cudart.cudaStreamDestroy(self.stream) + del self.stream + + def getOnnxPath(self, model_name, onnx_dir, opt=True, suffix=''): + onnx_model_dir = os.path.join(onnx_dir, model_name+suffix+('.opt' if opt else '')) + os.makedirs(onnx_model_dir, exist_ok=True) + return os.path.join(onnx_model_dir, 'model.onnx') + + def getEnginePath(self, model_name, engine_dir, enable_refit=False, suffix=''): + return os.path.join(engine_dir, model_name+suffix+('.refit' if enable_refit else '')+'.trt'+trt.__version__+'.plan') + + def loadEngines( + self, + engine_dir, + framework_model_dir, + onnx_dir, + onnx_opset, + opt_batch_size, + opt_image_height, + opt_image_width, + static_batch=False, + static_shape=True, + enable_all_tactics=False, + timing_cache=None, + ): + """ + Build and load engines for TensorRT accelerated inference. + Export ONNX models first, if applicable. + + Args: + engine_dir (str): + Directory to store the TensorRT engines. + framework_model_dir (str): + Directory to store the framework model ckpt. + onnx_dir (str): + Directory to store the ONNX models. + onnx_opset (int): + ONNX opset version to export the models. + opt_batch_size (int): + Batch size to optimize for during engine building. + opt_image_height (int): + Image height to optimize for during engine building. Must be a multiple of 8. + opt_image_width (int): + Image width to optimize for during engine building. Must be a multiple of 8. + static_batch (bool): + Build engine only for specified opt_batch_size. + static_shape (bool): + Build engine only for specified opt_image_height & opt_image_width. Default = True. + enable_all_tactics (bool): + Enable all tactic sources during TensorRT engine builds. + timing_cache (str): + Path to the timing cache to speed up TensorRT build. + """ + # Create directories if missing + for directory in [engine_dir, onnx_dir]: + if not os.path.exists(directory): + print(f"[I] Create directory: {directory}") + pathlib.Path(directory).mkdir(parents=True) + + # Load pipeline models + models_args = {'version': self.version, 'pipeline': self.pipeline_type, 'device': self.device, + 'hf_token': self.hf_token, 'verbose': self.verbose, 'framework_model_dir': framework_model_dir, + 'max_batch_size': self.max_batch_size} + + # Load text tokenizer + self.tokenizer = SD3Tokenizer() + + # Load text encoders + embedding_dim = get_clip_embedding_dim(self.version, self.pipeline_type) + if 'clip_g' in self.stages: + self.models['clip_g'] = SD3_CLIPGModel(**models_args, fp16=True, embedding_dim=embedding_dim) + + if 'clip_l' in self.stages: + self.models['clip_l'] = SD3_CLIPLModel(**models_args, fp16=True, embedding_dim=embedding_dim) + + if 't5xxl' in self.stages: + self.models['t5xxl'] = SD3_T5XXLModel(**models_args, fp16=True, embedding_dim=embedding_dim) + + # Load MMDiT model + if 'mmdit' in self.stages: + self.models['mmdit'] = SD3_MMDiTModel(**models_args, fp16=True, shift=self.shift) + + # Load VAE Encoder model + if 'vae_encoder' in self.stages: + self.models['vae_encoder'] = SD3_VAEEncoderModel(**models_args, fp16=True) + + # Load VAE Decoder model + if 'vae_decoder' in self.stages: + self.models['vae_decoder'] = SD3_VAEDecoderModel(**models_args, fp16=True) + + # Configure pipeline models to load + model_names = self.models.keys() + # Torch fallback + self.torch_fallback = dict(zip(model_names, [self.torch_inference or model_name in ('clip_g', 'clip_l', 't5xxl') for model_name in model_names])) + + onnx_path = dict(zip(model_names, [self.getOnnxPath(model_name, onnx_dir, opt=False) for model_name in model_names])) + onnx_opt_path = dict(zip(model_names, [self.getOnnxPath(model_name, onnx_dir) for model_name in model_names])) + engine_path = dict(zip(model_names, [self.getEnginePath(model_name, engine_dir) for model_name in model_names])) + + for model_name, obj in self.models.items(): + if self.torch_fallback[model_name]: + continue + # Export models to ONNX + do_export_onnx = not os.path.exists(engine_path[model_name]) and not os.path.exists(onnx_opt_path[model_name]) + if do_export_onnx: + obj.export_onnx(onnx_path[model_name], onnx_opt_path[model_name], onnx_opset, opt_image_height, opt_image_width, static_shape=static_shape) + + # Build TensorRT engines + for model_name, obj in self.models.items(): + if self.torch_fallback[model_name]: + continue + engine = Engine(engine_path[model_name]) + if not os.path.exists(engine_path[model_name]): + update_output_names = obj.get_output_names() + obj.extra_output_names if obj.extra_output_names else None + extra_build_args = {'verbose': self.verbose} + fp16amp = obj.fp16 + engine.build(onnx_opt_path[model_name], + fp16=fp16amp, + input_profile=obj.get_input_profile( + opt_batch_size, opt_image_height, opt_image_width, + static_batch=static_batch, static_shape=static_shape + ), + enable_all_tactics=enable_all_tactics, + timing_cache=timing_cache, + update_output_names=update_output_names, + **extra_build_args) + self.engine[model_name] = engine + + # Load TensorRT engines + for model_name, obj in self.models.items(): + if self.torch_fallback[model_name]: + continue + self.engine[model_name].load() + + # Load torch models + for model_name, obj in self.models.items(): + if self.torch_fallback[model_name] or model_name == 'mmdit': + self.torch_models[model_name] = obj.get_model(torch_inference=self.torch_inference) + + def calculateMaxDeviceMemory(self): + max_device_memory = 0 + for model_name, engine in self.engine.items(): + max_device_memory = max(max_device_memory, engine.engine.device_memory_size) + return max_device_memory + + def activateEngines(self, shared_device_memory=None): + if shared_device_memory is None: + max_device_memory = self.calculateMaxDeviceMemory() + _, shared_device_memory = cudart.cudaMalloc(max_device_memory) + self.shared_device_memory = shared_device_memory + # Load and activate TensorRT engines + for engine in self.engine.values(): + engine.activate(reuse_device_memory=self.shared_device_memory) + + def runEngine(self, model_name, feed_dict): + engine = self.engine[model_name] + return engine.infer(feed_dict, self.stream, use_cuda_graph=self.use_cuda_graph) + + def initialize_latents(self, batch_size, unet_channels, latent_height, latent_width): + return torch.ones(batch_size, unet_channels, latent_height, latent_width, device="cuda") * 0.0609 + + def profile_start(self, name, color='blue'): + if self.nvtx_profile: + self.markers[name] = nvtx.start_range(message=name, color=color) + if name in self.events: + cudart.cudaEventRecord(self.events[name][0], 0) + + def profile_stop(self, name): + if name in self.events: + cudart.cudaEventRecord(self.events[name][1], 0) + if self.nvtx_profile: + nvtx.end_range(self.markers[name]) + + def print_summary(self, denoising_steps, walltime_ms, batch_size): + print('|-----------------|--------------|') + print('| {:^15} | {:^12} |'.format('Module', 'Latency')) + print('|-----------------|--------------|') + if 'vae_encoder' in self.stages: + print('| {:^15} | {:>9.2f} ms |'.format('VAE Encoder', cudart.cudaEventElapsedTime(self.events['vae_encode'][0], self.events['vae_encode'][1])[1])) + print('| {:^15} | {:>9.2f} ms |'.format('CLIP-G', cudart.cudaEventElapsedTime(self.events['clip_g'][0], self.events['clip_g'][1])[1])) + print('| {:^15} | {:>9.2f} ms |'.format('CLIP-L', cudart.cudaEventElapsedTime(self.events['clip_l'][0], self.events['clip_l'][1])[1])) + print('| {:^15} | {:>9.2f} ms |'.format('T5XXL', cudart.cudaEventElapsedTime(self.events['t5xxl'][0], self.events['t5xxl'][1])[1])) + print('| {:^15} | {:>9.2f} ms |'.format('MMDiT'+' x '+str(denoising_steps), cudart.cudaEventElapsedTime(self.events['denoise'][0], self.events['denoise'][1])[1])) + print('| {:^15} | {:>9.2f} ms |'.format('VAE Decoder', cudart.cudaEventElapsedTime(self.events['vae_decode'][0], self.events['vae_decode'][1])[1])) + print('|-----------------|--------------|') + print('| {:^15} | {:>9.2f} ms |'.format('Pipeline', walltime_ms)) + print('|-----------------|--------------|') + print('Throughput: {:.2f} image/s'.format(batch_size*1000./walltime_ms)) + + def save_image(self, images, pipeline, prompt, seed): + # Save image + image_name_prefix = pipeline+''.join(set(['-'+prompt[i].replace(' ','_')[:10] for i in range(len(prompt))]))+'-'+str(seed)+'-' + save_image(images, self.output_dir, image_name_prefix) + + def encode_prompt(self, prompt, negative_prompt): + def encode_token_weights(model_name, token_weight_pairs): + self.profile_start(model_name, color='green') + + tokens = list(map(lambda a: a[0], token_weight_pairs[0])) + tokens = torch.tensor([tokens], dtype=torch.int64, device=self.device) + if self.torch_inference or self.torch_fallback[model_name]: + out, pooled = self.torch_models[model_name](tokens) + else: + out = self.runEngine('t5xxl', {'input_ids': tokens})['text_embeddings'] + pooled = None + + self.profile_stop(model_name) + + if pooled is not None: + first_pooled = pooled[0:1].cuda() + else: + first_pooled = pooled + output = [out[0:1]] + return torch.cat(output, dim=-2).cuda(), first_pooled + + def tokenize(prompt): + tokens = self.tokenizer.tokenize_with_weights(prompt) + l_out, l_pooled = encode_token_weights('clip_l', tokens["l"]) + g_out, g_pooled = encode_token_weights('clip_g', tokens["g"]) + t5_out, _ = encode_token_weights('t5xxl', tokens["t5xxl"]) + lg_out = torch.cat([l_out, g_out], dim=-1) + lg_out = torch.nn.functional.pad(lg_out, (0, 4096 - lg_out.shape[-1])) + + return torch.cat([lg_out, t5_out], dim=-2), torch.cat((l_pooled, g_pooled), dim=-1) + + conditioning = tokenize(prompt[0]) + neg_conditioning = tokenize(negative_prompt[0]) + return conditioning, neg_conditioning + + def denoise_latent(self, latent, conditioning, neg_conditioning, model_name='mmdit'): + def get_noise(latent): + return torch.randn(latent.size(), dtype=torch.float32, layout=latent.layout, generator=self.generator, device="cuda").to(latent.dtype) + + def get_sigmas(sampling, steps): + start = sampling.timestep(sampling.sigma_max) + end = sampling.timestep(sampling.sigma_min) + timesteps = torch.linspace(start, end, steps) + sigs = [] + for x in range(len(timesteps)): + ts = timesteps[x] + sigs.append(sampling.sigma(ts)) + sigs += [0.0] + return torch.FloatTensor(sigs) + + def max_denoise(sigmas): + max_sigma = float(self.torch_models[model_name].model_sampling.sigma_max) + sigma = float(sigmas[0]) + return math.isclose(max_sigma, sigma, rel_tol=1e-05) or sigma > max_sigma + + def fix_cond(cond): + cond, pooled = (cond[0].half().cuda(), cond[1].half().cuda()) + return { "c_crossattn": cond, "y": pooled } + + def cfg_denoiser(x, timestep, cond, uncond, cond_scale): + # Run cond and uncond in a batch together + sample = torch.cat([x, x]) + sigma = torch.cat([timestep, timestep]) + c_crossattn = torch.cat([cond["c_crossattn"], uncond["c_crossattn"]]) + y = torch.cat([cond["y"], uncond["y"]]) + if self.torch_inference: + with torch.autocast("cuda", dtype=torch.float16): + batched = self.torch_models[model_name](sample, sigma, c_crossattn=c_crossattn, y=y) + else: + input_dict = {'sample': sample, 'sigma': sigma, 'c_crossattn': c_crossattn, 'y': y} + batched = self.runEngine(model_name, input_dict)['latent'] + + # Then split and apply CFG Scaling + pos_out, neg_out = batched.chunk(2) + scaled = neg_out + (pos_out - neg_out) * cond_scale + return scaled + + self.profile_start('denoise', color='blue') + + latent = latent.half().cuda() + noise = get_noise(latent).cuda() + sigmas = get_sigmas(self.torch_models[model_name].model_sampling, self.denoising_steps).cuda() + sigmas = sigmas[int(self.denoising_steps * (1 - self.denoising_percentage)):] + conditioning = fix_cond(conditioning) + neg_conditioning = fix_cond(neg_conditioning) + + noise_scaled = self.torch_models[model_name].model_sampling.noise_scaling(sigmas[0], noise, latent, max_denoise(sigmas)) + extra_args = { "cond": conditioning, "uncond": neg_conditioning, "cond_scale": self.cfg_scale } + latent = sample_euler(cfg_denoiser, noise_scaled, sigmas, extra_args=extra_args) + latent = SD3LatentFormat().process_out(latent) + + self.profile_stop('denoise') + + return latent + + def encode_image(self): + self.input_image = self.input_image.to(self.device) + self.profile_start('vae_encode', color='orange') + if self.torch_inference: + with torch.autocast("cuda", dtype=torch.float16): + latent = self.torch_models['vae_encoder'](self.input_image) + else: + latent = self.runEngine('vae_encoder', {'images': self.input_image})['latent'] + + latent = SD3LatentFormat().process_in(latent) + self.profile_stop('vae_encode') + return latent + + def decode_latent(self, latent): + self.profile_start('vae_decode', color='red') + if self.torch_inference: + with torch.autocast("cuda", dtype=torch.float16): + image = self.torch_models['vae_decoder'](latent) + else: + image = self.runEngine('vae_decoder', {'latent': latent})['images'] + image = image.float() + self.profile_stop('vae_decode') + return image + + def infer( + self, + prompt, + negative_prompt, + image_height, + image_width, + warmup=False, + save_image=True, + ): + """ + Run the diffusion pipeline. + + Args: + prompt (str): + The text prompt to guide image generation. + negative_prompt (str): + The prompt not to guide the image generation. + image_height (int): + Height (in pixels) of the image to be generated. Must be a multiple of 8. + image_width (int): + Width (in pixels) of the image to be generated. Must be a multiple of 8. + warmup (bool): + Indicate if this is a warmup run. + save_image (bool): + Save the generated image (if applicable) + """ + assert len(prompt) == len(negative_prompt) + batch_size = len(prompt) + + # Spatial dimensions of latent tensor + latent_height = image_height // 8 + latent_width = image_width // 8 + + if self.generator and self.seed: + self.generator.manual_seed(self.seed) + + with torch.inference_mode(), trt.Runtime(TRT_LOGGER): + torch.cuda.synchronize() + e2e_tic = time.perf_counter() + + # Initialize Latents + latent = self.initialize_latents(batch_size=batch_size, + unet_channels=16, + latent_height=latent_height, + latent_width=latent_width) + + # Encode input image + if self.input_image is not None: + latent = self.encode_image() + + # Get Conditionings + conditioning, neg_conditioning = self.encode_prompt(prompt, negative_prompt) + + # Denoise + latent = self.denoise_latent(latent, conditioning, neg_conditioning) + + # Decode Latents + images = self.decode_latent(latent) + + torch.cuda.synchronize() + e2e_toc = time.perf_counter() + + walltime_ms = (e2e_toc - e2e_tic) * 1000. + if not warmup: + num_inference_steps = int(self.denoising_steps * self.denoising_percentage) + self.print_summary(num_inference_steps, walltime_ms, batch_size) + if save_image: + self.save_image(images, self.pipeline_type.name.lower(), prompt, self.seed) + + return images, walltime_ms + + def run(self, prompt, negative_prompt, height, width, batch_size, batch_count, num_warmup_runs, use_cuda_graph, **kwargs): + # Process prompt + if not isinstance(prompt, list): + raise ValueError(f"`prompt` must be of type `str` list, but is {type(prompt)}") + prompt = prompt * batch_size + + if not isinstance(negative_prompt, list): + raise ValueError(f"`--negative-prompt` must be of type `str` list, but is {type(negative_prompt)}") + if len(negative_prompt) == 1: + negative_prompt = negative_prompt * batch_size + + num_warmup_runs = max(1, num_warmup_runs) if use_cuda_graph else num_warmup_runs + if num_warmup_runs > 0: + print("[I] Warming up ..") + for _ in range(num_warmup_runs): + self.infer(prompt, negative_prompt, height, width, warmup=True, **kwargs) + + for _ in range(batch_count): + print("[I] Running StableDiffusion3 pipeline") + if self.nvtx_profile: + cudart.cudaProfilerStart() + self.infer(prompt, negative_prompt, height, width, warmup=False, **kwargs) + if self.nvtx_profile: + cudart.cudaProfilerStop() diff --git a/demo/Diffusion/stable_diffusion_pipeline.py b/demo/Diffusion/stable_diffusion_pipeline.py index 10b7f57e..9a1761ca 100755 --- a/demo/Diffusion/stable_diffusion_pipeline.py +++ b/demo/Diffusion/stable_diffusion_pipeline.py @@ -15,8 +15,8 @@ # limitations under the License. # -import ammo.torch.opt as ato -import ammo.torch.quantization as atq +import modelopt.torch.opt as mto +import modelopt.torch.quantization as mtq from cuda import cudart from diffusers import ( DDIMScheduler, @@ -61,7 +61,7 @@ save_image, unload_model ) -from utils_ammo import ( +from utils_modelopt import ( filter_func, quantize_lvl, get_int8_config, @@ -433,7 +433,7 @@ def loadEngines( model_suffix = dict(zip(model_names, [lora_suffix if do_lora_merge[model_name] else '' for model_name in model_names])) use_int8 = dict.fromkeys(model_names, False) if int8: - assert self.pipeline_type.is_sd_xl(), "int8 quantization only supported for SDXL pipeline" + assert self.pipeline_type.is_sd_xl_base(), "int8 quantization only supported for SDXL pipeline" use_int8['unetxl'] = True model_suffix['unetxl'] += f"-int8.l{quantization_level}.bs2.s{denoising_steps}.c{calibration_size}.p{quantization_percentile}.a{quantization_alpha}" onnx_path = dict(zip(model_names, [self.getOnnxPath(model_name, onnx_dir, opt=False, suffix=model_suffix[model_name]) for model_name in model_names])) @@ -491,15 +491,15 @@ def calibration_loop(unet): ) print(f"[I] Performing int8 calibration for {calibration_size} steps.") - atq.quantize(model, quant_config, forward_loop=calibration_loop) - ato.save(model, state_dict_path) + mtq.quantize(model, quant_config, forward_loop=calibration_loop) + mto.save(model, state_dict_path) print(f"[I] Generating quantized ONNX model: {onnx_opt_path[model_name]}") if not os.path.exists(onnx_path[model_name]): model = obj.get_model() - ato.restore(model, state_dict_path) + mto.restore(model, state_dict_path) quantize_lvl(model, quantization_level) - atq.disable_quantizer(model, filter_func) + mtq.disable_quantizer(model, filter_func) model.to(torch.float32).to("cpu") # QDQ needs to be in FP32 # WAR to enable ONNX export of quantized UNet obj.device="cpu" diff --git a/demo/Diffusion/utils_ammo.py b/demo/Diffusion/utils_modelopt.py similarity index 75% rename from demo/Diffusion/utils_ammo.py rename to demo/Diffusion/utils_modelopt.py index 8bfe44b8..b8735bfa 100644 --- a/demo/Diffusion/utils_ammo.py +++ b/demo/Diffusion/utils_modelopt.py @@ -18,9 +18,9 @@ import re import torch -from ammo.torch.quantization import utils as quant_utils -from ammo.torch.quantization.calib.max import MaxCalibrator - +from modelopt.torch.quantization import utils as quant_utils +from modelopt.torch.quantization.calib.max import MaxCalibrator +from diffusers.models.attention_processor import Attention from diffusers.models.lora import LoRACompatibleConv, LoRACompatibleLinear @@ -29,7 +29,7 @@ def __init__(self, num_bits=8, axis=None, unsigned=False, track_amax=False, **kw super().__init__(num_bits, axis, unsigned, track_amax) self.percentile = kwargs["percentile"] self.total_step = kwargs["total_step"] - self.global_min = kwargs["global_min"] + self.collect_method = kwargs["collect_method"] self.data = {} self.i = 0 @@ -55,8 +55,10 @@ def collect(self, x): if _cur_step not in self.data.keys(): self.data[_cur_step] = local_amax else: - if self.global_min: + if self.collect_method == "global_min": self.data[_cur_step] = torch.min(self.data[_cur_step], local_amax) + elif self.collect_method == "min-max" or self.collect_method == "mean-max": + self.data[_cur_step] = torch.max(self.data[_cur_step], local_amax) else: self.data[_cur_step] += local_amax if self._track_amax: @@ -66,8 +68,14 @@ def collect(self, x): def compute_amax(self): """Return the absolute max of all tensors collected.""" up_lim = int(self.total_step * self.percentile) - amaxs_values = [self.data[i] / self.total_step for i in range(0, up_lim)] - act_amax = torch.vstack(amaxs_values).min(axis=0)[0] + if self.collect_method == "min-mean": + amaxs_values = [self.data[i] / self.total_step for i in range(0, up_lim)] + else: + amaxs_values = [self.data[i] for i in range(0, up_lim)] + if self.collect_method == "mean-max": + act_amax = torch.vstack(amaxs_values).mean(axis=0)[0] + else: + act_amax = torch.vstack(amaxs_values).min(axis=0)[0] self._calib_amax = act_amax return self._calib_amax @@ -94,7 +102,7 @@ def filter_func(name): def quantize_lvl(unet, quant_level=2.5): """ We should disable the unwanted quantizer when exporting the onnx - Because in the current ammo setting, it will load the quantizer amax for all the layers even + Because in the current modelopt setting, it will load the quantizer amax for all the layers even if we didn't add that unwanted layer into the config during the calibration """ for name, module in unet.named_modules(): @@ -105,16 +113,34 @@ def quantize_lvl(unet, quant_level=2.5): if ( (quant_level >= 2 and "ff.net" in name) or (quant_level >= 2.5 and ("to_q" in name or "to_k" in name or "to_v" in name)) - or quant_level == 3 + or quant_level >= 3 ): module.input_quantizer.enable() module.weight_quantizer.enable() else: module.input_quantizer.disable() module.weight_quantizer.disable() + elif isinstance(module, Attention): + if quant_level >= 4: + module.q_bmm_quantizer.enable() + module.k_bmm_quantizer.enable() + module.v_bmm_quantizer.enable() + module.softmax_quantizer.enable() + module.bmm2_output_quantizer.enable() + else: + module.q_bmm_quantizer.disable() + module.k_bmm_quantizer.disable() + module.v_bmm_quantizer.disable() + module.softmax_quantizer.disable() + module.bmm2_output_quantizer.disable() def get_int8_config( - model, quant_level=2.5, alpha=0.8, percentile=1.0, num_inference_steps=20, global_min=False + model, + quant_level=3, + alpha=0.8, + percentile=1.0, + num_inference_steps=20, + collect_method="min-mean", ): quant_config = { "quant_cfg": { @@ -153,8 +179,9 @@ def get_int8_config( "axis": None, "percentile": percentile, "total_step": num_inference_steps, - "global_min": global_min, + "collect_method": collect_method, }, ), } return quant_config + diff --git a/demo/Diffusion/utils_sd3/__init__.py b/demo/Diffusion/utils_sd3/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/demo/Diffusion/utils_sd3/mmdit.py b/demo/Diffusion/utils_sd3/mmdit.py new file mode 100644 index 00000000..c346b483 --- /dev/null +++ b/demo/Diffusion/utils_sd3/mmdit.py @@ -0,0 +1,639 @@ +# MIT License + +# Copyright (c) 2024 Stability AI + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import math +from typing import Dict, Optional +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange, repeat +from utils_sd3.other_impls import attention, Mlp + +class PatchEmbed(nn.Module): + """ 2D Image to Patch Embedding""" + def __init__( + self, + img_size: Optional[int] = 224, + patch_size: int = 16, + in_chans: int = 3, + embed_dim: int = 768, + flatten: bool = True, + bias: bool = True, + strict_img_size: bool = True, + dynamic_img_pad: bool = False, + dtype=None, + device=None, + ): + super().__init__() + self.patch_size = (patch_size, patch_size) + if img_size is not None: + self.img_size = (img_size, img_size) + self.grid_size = tuple([s // p for s, p in zip(self.img_size, self.patch_size)]) + self.num_patches = self.grid_size[0] * self.grid_size[1] + else: + self.img_size = None + self.grid_size = None + self.num_patches = None + + # flatten spatial dim and transpose to channels last, kept for bwd compat + self.flatten = flatten + self.strict_img_size = strict_img_size + self.dynamic_img_pad = dynamic_img_pad + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias, dtype=dtype, device=device) + + def forward(self, x): + B, C, H, W = x.shape + x = self.proj(x) + if self.flatten: + x = x.flatten(2).transpose(1, 2) # NCHW -> NLC + return x + + +def modulate(x, shift, scale): + if shift is None: + shift = torch.zeros_like(scale) + return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1) + + +################################################################################# +# Sine/Cosine Positional Embedding Functions # +################################################################################# + + +def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0, scaling_factor=None, offset=None): + """ + grid_size: int of the grid height and width + return: + pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) + """ + grid_h = np.arange(grid_size, dtype=np.float32) + grid_w = np.arange(grid_size, dtype=np.float32) + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) + if scaling_factor is not None: + grid = grid / scaling_factor + if offset is not None: + grid = grid - offset + grid = grid.reshape([2, 1, grid_size, grid_size]) + pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) + if cls_token and extra_tokens > 0: + pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0) + return pos_embed + + +def get_2d_sincos_pos_embed_from_grid(embed_dim, grid): + assert embed_dim % 2 == 0 + # use half of dimensions to encode grid_h + emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2) + emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2) + emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D) + return emb + + +def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): + """ + embed_dim: output dimension for each position + pos: a list of positions to be encoded: size (M,) + out: (M, D) + """ + assert embed_dim % 2 == 0 + omega = np.arange(embed_dim // 2, dtype=np.float64) + omega /= embed_dim / 2.0 + omega = 1.0 / 10000**omega # (D/2,) + pos = pos.reshape(-1) # (M,) + out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product + emb_sin = np.sin(out) # (M, D/2) + emb_cos = np.cos(out) # (M, D/2) + return np.concatenate([emb_sin, emb_cos], axis=1) # (M, D) + + +################################################################################# +# Embedding Layers for Timesteps and Class Labels # +################################################################################# + + +class TimestepEmbedder(nn.Module): + """Embeds scalar timesteps into vector representations.""" + + def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None): + super().__init__() + self.mlp = nn.Sequential( + nn.Linear(frequency_embedding_size, hidden_size, bias=True, dtype=dtype, device=device), + nn.SiLU(), + nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device), + ) + self.frequency_embedding_size = frequency_embedding_size + + @staticmethod + def timestep_embedding(t, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + :param t: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an (N, D) Tensor of positional embeddings. + """ + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) + * torch.arange(start=0, end=half, dtype=torch.float32) + / half + ).to(device=t.device) + args = t[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + if torch.is_floating_point(t): + embedding = embedding.to(dtype=t.dtype) + return embedding + + def forward(self, t, dtype, **kwargs): + t_freq = self.timestep_embedding(t, self.frequency_embedding_size).to(dtype) + t_emb = self.mlp(t_freq) + return t_emb + + +class VectorEmbedder(nn.Module): + """Embeds a flat vector of dimension input_dim""" + + def __init__(self, input_dim: int, hidden_size: int, dtype=None, device=None): + super().__init__() + self.mlp = nn.Sequential( + nn.Linear(input_dim, hidden_size, bias=True, dtype=dtype, device=device), + nn.SiLU(), + nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.mlp(x) + + +################################################################################# +# Core DiT Model # +################################################################################# + + +def split_qkv(qkv, head_dim): + qkv = qkv.reshape(qkv.shape[0], qkv.shape[1], 3, -1, head_dim).movedim(2, 0) + return qkv[0], qkv[1], qkv[2] + +def optimized_attention(qkv, num_heads): + return attention(qkv[0], qkv[1], qkv[2], num_heads) + +class SelfAttention(nn.Module): + ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug") + + def __init__( + self, + dim: int, + num_heads: int = 8, + qkv_bias: bool = False, + qk_scale: Optional[float] = None, + attn_mode: str = "xformers", + pre_only: bool = False, + qk_norm: Optional[str] = None, + rmsnorm: bool = False, + dtype=None, + device=None, + ): + super().__init__() + self.num_heads = num_heads + self.head_dim = dim // num_heads + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device) + if not pre_only: + self.proj = nn.Linear(dim, dim, dtype=dtype, device=device) + assert attn_mode in self.ATTENTION_MODES + self.attn_mode = attn_mode + self.pre_only = pre_only + + if qk_norm == "rms": + self.ln_q = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + self.ln_k = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + elif qk_norm == "ln": + self.ln_q = nn.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + self.ln_k = nn.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + elif qk_norm is None: + self.ln_q = nn.Identity() + self.ln_k = nn.Identity() + else: + raise ValueError(qk_norm) + + def pre_attention(self, x: torch.Tensor): + B, L, C = x.shape + qkv = self.qkv(x) + q, k, v = split_qkv(qkv, self.head_dim) + q = self.ln_q(q).reshape(q.shape[0], q.shape[1], -1) + k = self.ln_k(k).reshape(q.shape[0], q.shape[1], -1) + return (q, k, v) + + def post_attention(self, x: torch.Tensor) -> torch.Tensor: + assert not self.pre_only + x = self.proj(x) + return x + + def forward(self, x: torch.Tensor) -> torch.Tensor: + (q, k, v) = self.pre_attention(x) + x = attention(q, k, v, self.num_heads) + x = self.post_attention(x) + return x + + +class RMSNorm(torch.nn.Module): + def __init__( + self, dim: int, elementwise_affine: bool = False, eps: float = 1e-6, device=None, dtype=None + ): + """ + Initialize the RMSNorm normalization layer. + Args: + dim (int): The dimension of the input tensor. + eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6. + Attributes: + eps (float): A small value added to the denominator for numerical stability. + weight (nn.Parameter): Learnable scaling parameter. + """ + super().__init__() + self.eps = eps + self.learnable_scale = elementwise_affine + if self.learnable_scale: + self.weight = nn.Parameter(torch.empty(dim, device=device, dtype=dtype)) + else: + self.register_parameter("weight", None) + + def _norm(self, x): + """ + Apply the RMSNorm normalization to the input tensor. + Args: + x (torch.Tensor): The input tensor. + Returns: + torch.Tensor: The normalized tensor. + """ + return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) + + def forward(self, x): + """ + Forward pass through the RMSNorm layer. + Args: + x (torch.Tensor): The input tensor. + Returns: + torch.Tensor: The output tensor after applying RMSNorm. + """ + x = self._norm(x) + if self.learnable_scale: + return x * self.weight.to(device=x.device, dtype=x.dtype) + else: + return x + + +class SwiGLUFeedForward(nn.Module): + def __init__( + self, + dim: int, + hidden_dim: int, + multiple_of: int, + ffn_dim_multiplier: Optional[float] = None, + ): + """ + Initialize the FeedForward module. + + Args: + dim (int): Input dimension. + hidden_dim (int): Hidden dimension of the feedforward layer. + multiple_of (int): Value to ensure hidden dimension is a multiple of this value. + ffn_dim_multiplier (float, optional): Custom multiplier for hidden dimension. Defaults to None. + + Attributes: + w1 (ColumnParallelLinear): Linear transformation for the first layer. + w2 (RowParallelLinear): Linear transformation for the second layer. + w3 (ColumnParallelLinear): Linear transformation for the third layer. + + """ + super().__init__() + hidden_dim = int(2 * hidden_dim / 3) + # custom dim factor multiplier + if ffn_dim_multiplier is not None: + hidden_dim = int(ffn_dim_multiplier * hidden_dim) + hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) + + self.w1 = nn.Linear(dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, dim, bias=False) + self.w3 = nn.Linear(dim, hidden_dim, bias=False) + + def forward(self, x): + return self.w2(nn.functional.silu(self.w1(x)) * self.w3(x)) + + +class DismantledBlock(nn.Module): + """A DiT block with gated adaptive layer norm (adaLN) conditioning.""" + + ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug") + + def __init__( + self, + hidden_size: int, + num_heads: int, + mlp_ratio: float = 4.0, + attn_mode: str = "xformers", + qkv_bias: bool = False, + pre_only: bool = False, + rmsnorm: bool = False, + scale_mod_only: bool = False, + swiglu: bool = False, + qk_norm: Optional[str] = None, + dtype=None, + device=None, + **block_kwargs, + ): + super().__init__() + assert attn_mode in self.ATTENTION_MODES + if not rmsnorm: + self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + else: + self.norm1 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, attn_mode=attn_mode, pre_only=pre_only, qk_norm=qk_norm, rmsnorm=rmsnorm, dtype=dtype, device=device) + if not pre_only: + if not rmsnorm: + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + else: + self.norm2 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6) + mlp_hidden_dim = int(hidden_size * mlp_ratio) + if not pre_only: + if not swiglu: + self.mlp = Mlp(in_features=hidden_size, hidden_features=mlp_hidden_dim, act_layer=nn.GELU(approximate="tanh"), dtype=dtype, device=device) + else: + self.mlp = SwiGLUFeedForward(dim=hidden_size, hidden_dim=mlp_hidden_dim, multiple_of=256) + self.scale_mod_only = scale_mod_only + if not scale_mod_only: + n_mods = 6 if not pre_only else 2 + else: + n_mods = 4 if not pre_only else 1 + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, n_mods * hidden_size, bias=True, dtype=dtype, device=device)) + self.pre_only = pre_only + + def pre_attention(self, x: torch.Tensor, c: torch.Tensor): + assert x is not None, "pre_attention called with None input" + if not self.pre_only: + if not self.scale_mod_only: + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(c).chunk(6, dim=1) + else: + shift_msa = None + shift_mlp = None + scale_msa, gate_msa, scale_mlp, gate_mlp = self.adaLN_modulation(c).chunk(4, dim=1) + qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa)) + return qkv, (x, gate_msa, shift_mlp, scale_mlp, gate_mlp) + else: + if not self.scale_mod_only: + shift_msa, scale_msa = self.adaLN_modulation(c).chunk(2, dim=1) + else: + shift_msa = None + scale_msa = self.adaLN_modulation(c) + qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa)) + return qkv, None + + def post_attention(self, attn, x, gate_msa, shift_mlp, scale_mlp, gate_mlp): + assert not self.pre_only + x = x + gate_msa.unsqueeze(1) * self.attn.post_attention(attn) + x = x + gate_mlp.unsqueeze(1) * self.mlp(modulate(self.norm2(x), shift_mlp, scale_mlp)) + return x + + def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: + assert not self.pre_only + (q, k, v), intermediates = self.pre_attention(x, c) + attn = attention(q, k, v, self.attn.num_heads) + return self.post_attention(attn, *intermediates) + + +def block_mixing(context, x, context_block, x_block, c): + assert context is not None, "block_mixing called with None context" + context_qkv, context_intermediates = context_block.pre_attention(context, c) + + x_qkv, x_intermediates = x_block.pre_attention(x, c) + + o = [] + for t in range(3): + o.append(torch.cat((context_qkv[t], x_qkv[t]), dim=1)) + q, k, v = tuple(o) + + attn = attention(q, k, v, x_block.attn.num_heads) + context_attn, x_attn = (attn[:, : context_qkv[0].shape[1]], attn[:, context_qkv[0].shape[1] :]) + + if not context_block.pre_only: + context = context_block.post_attention(context_attn, *context_intermediates) + else: + context = None + x = x_block.post_attention(x_attn, *x_intermediates) + return context, x + + +class JointBlock(nn.Module): + """just a small wrapper to serve as a fsdp unit""" + + def __init__(self, *args, **kwargs): + super().__init__() + pre_only = kwargs.pop("pre_only") + qk_norm = kwargs.pop("qk_norm", None) + self.context_block = DismantledBlock(*args, pre_only=pre_only, qk_norm=qk_norm, **kwargs) + self.x_block = DismantledBlock(*args, pre_only=False, qk_norm=qk_norm, **kwargs) + + def forward(self, *args, **kwargs): + return block_mixing(*args, context_block=self.context_block, x_block=self.x_block, **kwargs) + + +class FinalLayer(nn.Module): + """ + The final layer of DiT. + """ + + def __init__(self, hidden_size: int, patch_size: int, out_channels: int, total_out_channels: Optional[int] = None, dtype=None, device=None): + super().__init__() + self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.linear = ( + nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device) + if (total_out_channels is None) + else nn.Linear(hidden_size, total_out_channels, bias=True, dtype=dtype, device=device) + ) + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device)) + + def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: + shift, scale = self.adaLN_modulation(c).chunk(2, dim=1) + x = modulate(self.norm_final(x), shift, scale) + x = self.linear(x) + return x + + +class MMDiT(nn.Module): + """Diffusion model with a Transformer backbone.""" + + def __init__( + self, + input_size: int = 32, + patch_size: int = 2, + in_channels: int = 4, + depth: int = 28, + mlp_ratio: float = 4.0, + learn_sigma: bool = False, + adm_in_channels: Optional[int] = None, + context_embedder_config: Optional[Dict] = None, + register_length: int = 0, + attn_mode: str = "torch", + rmsnorm: bool = False, + scale_mod_only: bool = False, + swiglu: bool = False, + out_channels: Optional[int] = None, + pos_embed_scaling_factor: Optional[float] = None, + pos_embed_offset: Optional[float] = None, + pos_embed_max_size: Optional[int] = None, + num_patches = None, + qk_norm: Optional[str] = None, + qkv_bias: bool = True, + dtype = None, + device = None, + ): + super().__init__() + print(f"mmdit initializing with: {input_size=}, {patch_size=}, {in_channels=}, {depth=}, {mlp_ratio=}, {learn_sigma=}, {adm_in_channels=}, {context_embedder_config=}, {register_length=}, {attn_mode=}, {rmsnorm=}, {scale_mod_only=}, {swiglu=}, {out_channels=}, {pos_embed_scaling_factor=}, {pos_embed_offset=}, {pos_embed_max_size=}, {num_patches=}, {qk_norm=}, {qkv_bias=}, {dtype=}, {device=}") + self.dtype = dtype + self.learn_sigma = learn_sigma + self.in_channels = in_channels + default_out_channels = in_channels * 2 if learn_sigma else in_channels + self.out_channels = out_channels if out_channels is not None else default_out_channels + self.patch_size = patch_size + self.pos_embed_scaling_factor = pos_embed_scaling_factor + self.pos_embed_offset = pos_embed_offset + self.pos_embed_max_size = pos_embed_max_size + + # apply magic --> this defines a head_size of 64 + hidden_size = 64 * depth + num_heads = depth + + self.num_heads = num_heads + + self.x_embedder = PatchEmbed(input_size, patch_size, in_channels, hidden_size, bias=True, strict_img_size=self.pos_embed_max_size is None, dtype=dtype, device=device) + self.t_embedder = TimestepEmbedder(hidden_size, dtype=dtype, device=device) + + if adm_in_channels is not None: + assert isinstance(adm_in_channels, int) + self.y_embedder = VectorEmbedder(adm_in_channels, hidden_size, dtype=dtype, device=device) + + self.context_embedder = nn.Identity() + if context_embedder_config is not None: + if context_embedder_config["target"] == "torch.nn.Linear": + self.context_embedder = nn.Linear(**context_embedder_config["params"], dtype=dtype, device=device) + + self.register_length = register_length + if self.register_length > 0: + self.register = nn.Parameter(torch.randn(1, register_length, hidden_size, dtype=dtype, device=device)) + + # num_patches = self.x_embedder.num_patches + # Will use fixed sin-cos embedding: + # just use a buffer already + if num_patches is not None: + self.register_buffer( + "pos_embed", + torch.zeros(1, num_patches, hidden_size, dtype=dtype, device=device), + ) + else: + self.pos_embed = None + + self.joint_blocks = nn.ModuleList( + [ + JointBlock(hidden_size, num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, attn_mode=attn_mode, pre_only=i == depth - 1, rmsnorm=rmsnorm, scale_mod_only=scale_mod_only, swiglu=swiglu, qk_norm=qk_norm, dtype=dtype, device=device) + for i in range(depth) + ] + ) + + self.final_layer = FinalLayer(hidden_size, patch_size, self.out_channels, dtype=dtype, device=device) + + def cropped_pos_embed(self, hw): + assert self.pos_embed_max_size is not None + p = self.x_embedder.patch_size[0] + h, w = hw + # patched size + h = h // p + w = w // p + assert h <= self.pos_embed_max_size, (h, self.pos_embed_max_size) + assert w <= self.pos_embed_max_size, (w, self.pos_embed_max_size) + top = (self.pos_embed_max_size - h) // 2 + left = (self.pos_embed_max_size - w) // 2 + spatial_pos_embed = rearrange( + self.pos_embed, + "1 (h w) c -> 1 h w c", + h=self.pos_embed_max_size, + w=self.pos_embed_max_size, + ) + spatial_pos_embed = spatial_pos_embed[:, top : top + h, left : left + w, :] + spatial_pos_embed = rearrange(spatial_pos_embed, "1 h w c -> 1 (h w) c") + return spatial_pos_embed + + def unpatchify(self, x, hw=None): + """ + x: (N, T, patch_size**2 * C) + imgs: (N, H, W, C) + """ + c = self.out_channels + p = self.x_embedder.patch_size[0] + if hw is None: + h = w = int(x.shape[1] ** 0.5) + else: + h, w = hw + h = h // p + w = w // p + assert h * w == x.shape[1] + + x = x.reshape(shape=(x.shape[0], h, w, p, p, c)) + x = torch.einsum("nhwpqc->nchpwq", x) + imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p)) + return imgs + + def forward_core_with_concat(self, x: torch.Tensor, c_mod: torch.Tensor, context: Optional[torch.Tensor] = None) -> torch.Tensor: + if self.register_length > 0: + context = torch.cat((repeat(self.register, "1 ... -> b ...", b=x.shape[0]), context if context is not None else torch.Tensor([]).type_as(x)), 1) + + # context is B, L', D + # x is B, L, D + for block in self.joint_blocks: + context, x = block(context, x, c=c_mod) + + x = self.final_layer(x, c_mod) # (N, T, patch_size ** 2 * out_channels) + return x + + def forward(self, x: torch.Tensor, t: torch.Tensor, y: Optional[torch.Tensor] = None, context: Optional[torch.Tensor] = None) -> torch.Tensor: + """ + Forward pass of DiT. + x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images) + t: (N,) tensor of diffusion timesteps + y: (N,) tensor of class labels + """ + hw = x.shape[-2:] + x = self.x_embedder(x) + self.cropped_pos_embed(hw) + c = self.t_embedder(t, dtype=x.dtype) # (N, D) + if y is not None: + y = self.y_embedder(y) # (N, D) + c = c + y # (N, D) + + context = self.context_embedder(context) + + x = self.forward_core_with_concat(x, c, context) + + x = self.unpatchify(x, hw=hw) # (N, out_channels, H, W) + return x diff --git a/demo/Diffusion/utils_sd3/other_impls.py b/demo/Diffusion/utils_sd3/other_impls.py new file mode 100644 index 00000000..a771c197 --- /dev/null +++ b/demo/Diffusion/utils_sd3/other_impls.py @@ -0,0 +1,555 @@ +# MIT License + +# Copyright (c) 2024 Stability AI + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch, math +import numpy as np +from torch import nn +from transformers import CLIPTokenizer, T5TokenizerFast + +def load_into(f, model, prefix, device, dtype=None): + """Just a debugging-friendly hack to apply the weights in a safetensors file to the pytorch module.""" + for key in f.keys(): + if key.startswith(prefix) and not key.startswith("loss."): + path = key[len(prefix):].split(".") + obj = model + for p in path: + if obj is list: + obj = obj[int(p)] + else: + obj = getattr(obj, p, None) + if obj is None: + print(f"Skipping key '{key}' in safetensors file as '{p}' does not exist in python model") + break + if obj is None: + continue + try: + tensor = f.get_tensor(key).to(device=device) + if dtype is not None: + tensor = tensor.to(dtype=dtype) + obj.requires_grad_(False) + obj.set_(tensor) + except Exception as e: + print(f"Failed to load key '{key}' in safetensors file: {e}") + raise e + +def preprocess_image_sd3(image): + image.convert("RGB") + image_np = np.array(image).astype(np.float32) / 255.0 + image_np = np.moveaxis(image_np, 2, 0) + batch_images = np.expand_dims(image_np, axis=0).repeat(1, axis=0) + image_torch = torch.from_numpy(batch_images) + image_torch = 2.0 * image_torch - 1.0 + + return image_torch + + +################################################################################################# +### Core/Utility +################################################################################################# + + +def attention(q, k, v, heads, mask=None): + """Convenience wrapper around a basic attention operation""" + b, _, dim_head = q.shape + dim_head //= heads + q, k, v = map(lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), (q, k, v)) + out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) + return out.transpose(1, 2).reshape(b, -1, heads * dim_head) + + +class Mlp(nn.Module): + """ MLP as used in Vision Transformer, MLP-Mixer and related networks""" + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, bias=True, dtype=None, device=None): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + + self.fc1 = nn.Linear(in_features, hidden_features, bias=bias, dtype=dtype, device=device) + self.act = act_layer + self.fc2 = nn.Linear(hidden_features, out_features, bias=bias, dtype=dtype, device=device) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.fc2(x) + return x + + +################################################################################################# +### CLIP +################################################################################################# + + +class CLIPAttention(torch.nn.Module): + def __init__(self, embed_dim, heads, dtype, device): + super().__init__() + self.heads = heads + self.q_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.out_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + + def forward(self, x, mask=None): + q = self.q_proj(x) + k = self.k_proj(x) + v = self.v_proj(x) + out = attention(q, k, v, self.heads, mask) + return self.out_proj(out) + + +ACTIVATIONS = { + "quick_gelu": lambda a: a * torch.sigmoid(1.702 * a), + "gelu": torch.nn.functional.gelu, +} + +class CLIPLayer(torch.nn.Module): + def __init__(self, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device): + super().__init__() + self.layer_norm1 = nn.LayerNorm(embed_dim, dtype=dtype, device=device) + self.self_attn = CLIPAttention(embed_dim, heads, dtype, device) + self.layer_norm2 = nn.LayerNorm(embed_dim, dtype=dtype, device=device) + #self.mlp = CLIPMLP(embed_dim, intermediate_size, intermediate_activation, dtype, device) + self.mlp = Mlp(embed_dim, intermediate_size, embed_dim, act_layer=ACTIVATIONS[intermediate_activation], dtype=dtype, device=device) + + def forward(self, x, mask=None): + x += self.self_attn(self.layer_norm1(x), mask) + x += self.mlp(self.layer_norm2(x)) + return x + + +class CLIPEncoder(torch.nn.Module): + def __init__(self, num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device): + super().__init__() + self.layers = torch.nn.ModuleList([CLIPLayer(embed_dim, heads, intermediate_size, intermediate_activation, dtype, device) for i in range(num_layers)]) + + def forward(self, x, mask=None, intermediate_output=None): + if intermediate_output is not None: + if intermediate_output < 0: + intermediate_output = len(self.layers) + intermediate_output + intermediate = None + for i, l in enumerate(self.layers): + x = l(x, mask) + if i == intermediate_output: + intermediate = x.clone() + return x, intermediate + + +class CLIPEmbeddings(torch.nn.Module): + def __init__(self, embed_dim, vocab_size=49408, num_positions=77, dtype=None, device=None): + super().__init__() + self.token_embedding = torch.nn.Embedding(vocab_size, embed_dim, dtype=dtype, device=device) + self.position_embedding = torch.nn.Embedding(num_positions, embed_dim, dtype=dtype, device=device) + + def forward(self, input_tokens): + return self.token_embedding(input_tokens) + self.position_embedding.weight + + +class CLIPTextModel_(torch.nn.Module): + def __init__(self, config_dict, dtype, device): + num_layers = config_dict["num_hidden_layers"] + embed_dim = config_dict["hidden_size"] + heads = config_dict["num_attention_heads"] + intermediate_size = config_dict["intermediate_size"] + intermediate_activation = config_dict["hidden_act"] + super().__init__() + self.embeddings = CLIPEmbeddings(embed_dim, dtype=dtype, device=device) + self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device) + self.final_layer_norm = nn.LayerNorm(embed_dim, dtype=dtype, device=device) + + def forward(self, input_tokens, intermediate_output=None, final_layer_norm_intermediate=True): + x = self.embeddings(input_tokens) + causal_mask = torch.empty(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1) + x, i = self.encoder(x, mask=causal_mask, intermediate_output=intermediate_output) + x = self.final_layer_norm(x) + if i is not None and final_layer_norm_intermediate: + i = self.final_layer_norm(i) + pooled_output = x[torch.arange(x.shape[0], device=x.device), input_tokens.to(dtype=torch.int, device=x.device).argmax(dim=-1),] + return x, i, pooled_output + + +class CLIPTextModel(torch.nn.Module): + def __init__(self, config_dict, dtype, device): + super().__init__() + self.num_layers = config_dict["num_hidden_layers"] + self.text_model = CLIPTextModel_(config_dict, dtype, device) + embed_dim = config_dict["hidden_size"] + self.text_projection = nn.Linear(embed_dim, embed_dim, bias=False, dtype=dtype, device=device) + + # WAR for RuntimeError: a leaf Variable that requires grad is being used in an in-place operation. + with torch.no_grad(): + self.text_projection.weight.copy_(torch.eye(embed_dim)) + self.dtype = dtype + + def get_input_embeddings(self): + return self.text_model.embeddings.token_embedding + + def set_input_embeddings(self, embeddings): + self.text_model.embeddings.token_embedding = embeddings + + def forward(self, *args, **kwargs): + x = self.text_model(*args, **kwargs) + out = self.text_projection(x[2]) + return (x[0], x[1], out, x[2]) + + +class SDTokenizer: + def __init__(self, max_length=77, pad_with_end=True, tokenizer=None, has_start_token=True, pad_to_max_length=True, min_length=None): + self.tokenizer = tokenizer + self.max_length = max_length + self.min_length = min_length + empty = self.tokenizer('')["input_ids"] + if has_start_token: + self.tokens_start = 1 + self.start_token = empty[0] + self.end_token = empty[1] + else: + self.tokens_start = 0 + self.start_token = None + self.end_token = empty[0] + self.pad_with_end = pad_with_end + self.pad_to_max_length = pad_to_max_length + vocab = self.tokenizer.get_vocab() + self.inv_vocab = {v: k for k, v in vocab.items()} + self.max_word_length = 8 + + + def tokenize_with_weights(self, text:str): + """Tokenize the text, with weight values - presume 1.0 for all and ignore other features here. The details aren't relevant for a reference impl, and weights themselves has weak effect on SD3.""" + if self.pad_with_end: + pad_token = self.end_token + else: + pad_token = 0 + batch = [] + if self.start_token is not None: + batch.append((self.start_token, 1.0)) + to_tokenize = text.replace("\n", " ").split(' ') + to_tokenize = [x for x in to_tokenize if x != ""] + for word in to_tokenize: + batch.extend([(t, 1) for t in self.tokenizer(word)["input_ids"][self.tokens_start:-1]]) + batch.append((self.end_token, 1.0)) + if self.pad_to_max_length: + batch.extend([(pad_token, 1.0)] * (self.max_length - len(batch))) + if self.min_length is not None and len(batch) < self.min_length: + batch.extend([(pad_token, 1.0)] * (self.min_length - len(batch))) + if len(batch) > self.max_length: + batch = batch[:self.max_length] + return [batch] + + +class SDXLClipGTokenizer(SDTokenizer): + def __init__(self, tokenizer): + super().__init__(pad_with_end=False, tokenizer=tokenizer) + + +class SD3Tokenizer: + def __init__(self): + clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") + self.clip_l = SDTokenizer(tokenizer=clip_tokenizer) + self.clip_g = SDXLClipGTokenizer(clip_tokenizer) + self.t5xxl = T5XXLTokenizer() + + def tokenize_with_weights(self, text:str): + out = {} + out["g"] = self.clip_g.tokenize_with_weights(text) + out["l"] = self.clip_l.tokenize_with_weights(text) + out["t5xxl"] = self.t5xxl.tokenize_with_weights(text) + return out + +class ClipTokenWeightEncoder: + def encode_token_weights(self, token_weight_pairs): + tokens = list(map(lambda a: a[0], token_weight_pairs[0])) + + # model inference + tokens = torch.tensor([tokens], dtype=torch.int64, device="cuda") + out, pooled = self(tokens) + + if pooled is not None: + first_pooled = pooled[0:1].cuda() + else: + first_pooled = pooled + output = [out[0:1]] + + return torch.cat(output, dim=-2).cuda(), first_pooled + +class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): + """Uses the CLIP transformer encoder for text (from huggingface)""" + LAYERS = ["last", "pooled", "hidden"] + def __init__(self, device="cuda", max_length=77, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=CLIPTextModel, + special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, return_projected_pooled=True): + super().__init__() + assert layer in self.LAYERS + self.transformer = model_class(textmodel_json_config, dtype, device) + self.num_layers = self.transformer.num_layers + self.max_length = max_length + self.transformer = self.transformer.eval() + for param in self.parameters(): + param.requires_grad = False + self.layer = layer + self.layer_idx = None + self.special_tokens = special_tokens + self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) + self.layer_norm_hidden_state = layer_norm_hidden_state + self.return_projected_pooled = return_projected_pooled + if layer == "hidden": + assert layer_idx is not None + assert abs(layer_idx) < self.num_layers + self.set_clip_options({"layer": layer_idx}) + self.options_default = (self.layer, self.layer_idx, self.return_projected_pooled) + + def set_clip_options(self, options): + layer_idx = options.get("layer", self.layer_idx) + self.return_projected_pooled = options.get("projected_pooled", self.return_projected_pooled) + if layer_idx is None or abs(layer_idx) > self.num_layers: + self.layer = "last" + else: + self.layer = "hidden" + self.layer_idx = layer_idx + + def forward(self, tokens): + backup_embeds = self.transformer.get_input_embeddings() + outputs = self.transformer(tokens, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state) + self.transformer.set_input_embeddings(backup_embeds) + if self.layer == "last": + z = outputs[0] + else: + z = outputs[1] + pooled_output = None + if len(outputs) >= 3: + if not self.return_projected_pooled and len(outputs) >= 4 and outputs[3] is not None: + pooled_output = outputs[3].float() + elif outputs[2] is not None: + pooled_output = outputs[2].float() + return z.float(), pooled_output + + +class SDXLClipG(SDClipModel): + """Wraps the CLIP-G model into the SD-CLIP-Model interface""" + def __init__(self, config, device="cuda", layer="penultimate", layer_idx=None, dtype=None): + if layer == "penultimate": + layer="hidden" + layer_idx=-2 + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False) + + +class T5XXLModel(SDClipModel): + """Wraps the T5-XXL model into the SD-CLIP-Model interface for convenience""" + def __init__(self, config, device="cuda", layer="last", layer_idx=None, dtype=None): + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=T5) + + +################################################################################################# +### T5 implementation, for the T5-XXL text encoder portion, largely pulled from upstream impl +################################################################################################# + + +class T5XXLTokenizer(SDTokenizer): + """Wraps the T5 Tokenizer from HF into the SDTokenizer interface""" + def __init__(self): + super().__init__(pad_with_end=False, tokenizer=T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl"), has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=77) + + +class T5LayerNorm(torch.nn.Module): + def __init__(self, hidden_size, eps=1e-6, dtype=None, device=None): + super().__init__() + self.weight = torch.nn.Parameter(torch.ones(hidden_size, dtype=dtype, device=device)) + self.variance_epsilon = eps + + def forward(self, x): + variance = x.pow(2).mean(-1, keepdim=True) + x = x * torch.rsqrt(variance + self.variance_epsilon) + return self.weight.to(device=x.device, dtype=x.dtype) * x + + +class T5DenseGatedActDense(torch.nn.Module): + def __init__(self, model_dim, ff_dim, dtype, device): + super().__init__() + self.wi_0 = nn.Linear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) + self.wi_1 = nn.Linear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) + self.wo = nn.Linear(ff_dim, model_dim, bias=False, dtype=dtype, device=device) + + def forward(self, x): + hidden_gelu = torch.nn.functional.gelu(self.wi_0(x), approximate="tanh") + hidden_linear = self.wi_1(x) + x = hidden_gelu * hidden_linear + x = self.wo(x) + return x + + +class T5LayerFF(torch.nn.Module): + def __init__(self, model_dim, ff_dim, dtype, device): + super().__init__() + self.DenseReluDense = T5DenseGatedActDense(model_dim, ff_dim, dtype, device) + self.layer_norm = T5LayerNorm(model_dim, dtype=dtype, device=device) + + def forward(self, x): + forwarded_states = self.layer_norm(x) + forwarded_states = self.DenseReluDense(forwarded_states) + x += forwarded_states + return x + + +class T5Attention(torch.nn.Module): + def __init__(self, model_dim, inner_dim, num_heads, relative_attention_bias, dtype, device): + super().__init__() + # Mesh TensorFlow initialization to avoid scaling before softmax + self.q = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.k = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.v = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.o = nn.Linear(inner_dim, model_dim, bias=False, dtype=dtype, device=device) + self.num_heads = num_heads + self.relative_attention_bias = None + if relative_attention_bias: + self.relative_attention_num_buckets = 32 + self.relative_attention_max_distance = 128 + self.relative_attention_bias = torch.nn.Embedding(self.relative_attention_num_buckets, self.num_heads, device=device, dtype=dtype) + + @staticmethod + def _relative_position_bucket(relative_position, bidirectional=True, num_buckets=32, max_distance=128): + """ + Adapted from Mesh Tensorflow: + https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593 + + Translate relative position to a bucket number for relative attention. The relative position is defined as + memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to + position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for + small absolute relative_position and larger buckets for larger absolute relative_positions. All relative + positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket. + This should allow for more graceful generalization to longer sequences than the model has been trained on + + Args: + relative_position: an int32 Tensor + bidirectional: a boolean - whether the attention is bidirectional + num_buckets: an integer + max_distance: an integer + + Returns: + a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets) + """ + relative_buckets = 0 + if bidirectional: + num_buckets //= 2 + relative_buckets += (relative_position > 0).to(torch.long) * num_buckets + relative_position = torch.abs(relative_position) + else: + relative_position = -torch.min(relative_position, torch.zeros_like(relative_position)) + # now relative_position is in the range [0, inf) + # half of the buckets are for exact increments in positions + max_exact = num_buckets // 2 + is_small = relative_position < max_exact + # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance + relative_position_if_large = max_exact + ( + torch.log(relative_position.float() / max_exact) + / math.log(max_distance / max_exact) + * (num_buckets - max_exact) + ).to(torch.long) + relative_position_if_large = torch.min(relative_position_if_large, torch.full_like(relative_position_if_large, num_buckets - 1)) + relative_buckets += torch.where(is_small, relative_position, relative_position_if_large) + return relative_buckets + + def compute_bias(self, query_length, key_length, device): + """Compute binned relative position bias""" + context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None] + memory_position = torch.arange(key_length, dtype=torch.long, device=device)[None, :] + relative_position = memory_position - context_position # shape (query_length, key_length) + relative_position_bucket = self._relative_position_bucket( + relative_position, # shape (query_length, key_length) + bidirectional=True, + num_buckets=self.relative_attention_num_buckets, + max_distance=self.relative_attention_max_distance, + ) + values = self.relative_attention_bias(relative_position_bucket) # shape (query_length, key_length, num_heads) + values = values.permute([2, 0, 1]).unsqueeze(0) # shape (1, num_heads, query_length, key_length) + return values + + def forward(self, x, past_bias=None): + q = self.q(x) + k = self.k(x) + v = self.v(x) + if self.relative_attention_bias is not None: + past_bias = self.compute_bias(x.shape[1], x.shape[1], x.device) + if past_bias is not None: + mask = past_bias + out = attention(q, k * ((k.shape[-1] / self.num_heads) ** 0.5), v, self.num_heads, mask) + return self.o(out), past_bias + + +class T5LayerSelfAttention(torch.nn.Module): + def __init__(self, model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias, dtype, device): + super().__init__() + self.SelfAttention = T5Attention(model_dim, inner_dim, num_heads, relative_attention_bias, dtype, device) + self.layer_norm = T5LayerNorm(model_dim, dtype=dtype, device=device) + + def forward(self, x, past_bias=None): + output, past_bias = self.SelfAttention(self.layer_norm(x), past_bias=past_bias) + x += output + return x, past_bias + + +class T5Block(torch.nn.Module): + def __init__(self, model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias, dtype, device): + super().__init__() + self.layer = torch.nn.ModuleList() + self.layer.append(T5LayerSelfAttention(model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias, dtype, device)) + self.layer.append(T5LayerFF(model_dim, ff_dim, dtype, device)) + + def forward(self, x, past_bias=None): + x, past_bias = self.layer[0](x, past_bias) + x = self.layer[-1](x) + return x, past_bias + + +class T5Stack(torch.nn.Module): + def __init__(self, num_layers, model_dim, inner_dim, ff_dim, num_heads, vocab_size, dtype, device): + super().__init__() + self.embed_tokens = torch.nn.Embedding(vocab_size, model_dim, device=device, dtype=dtype) + self.block = torch.nn.ModuleList([T5Block(model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias=(i == 0), dtype=dtype, device=device) for i in range(num_layers)]) + self.final_layer_norm = T5LayerNorm(model_dim, dtype=dtype, device=device) + + def forward(self, input_ids, intermediate_output=None, final_layer_norm_intermediate=True): + intermediate = None + x = self.embed_tokens(input_ids) + past_bias = None + for i, l in enumerate(self.block): + x, past_bias = l(x, past_bias) + if i == intermediate_output: + intermediate = x.clone() + x = self.final_layer_norm(x) + if intermediate is not None and final_layer_norm_intermediate: + intermediate = self.final_layer_norm(intermediate) + return x, intermediate + + +class T5(torch.nn.Module): + def __init__(self, config_dict, dtype, device): + super().__init__() + self.num_layers = config_dict["num_layers"] + self.encoder = T5Stack(self.num_layers, config_dict["d_model"], config_dict["d_model"], config_dict["d_ff"], config_dict["num_heads"], config_dict["vocab_size"], dtype, device) + self.dtype = dtype + + def get_input_embeddings(self): + return self.encoder.embed_tokens + + def set_input_embeddings(self, embeddings): + self.encoder.embed_tokens = embeddings + + def forward(self, *args, **kwargs): + return self.encoder(*args, **kwargs) diff --git a/demo/Diffusion/utils_sd3/sd3_impls.py b/demo/Diffusion/utils_sd3/sd3_impls.py new file mode 100644 index 00000000..d3ea7dc9 --- /dev/null +++ b/demo/Diffusion/utils_sd3/sd3_impls.py @@ -0,0 +1,388 @@ +# MIT License + +# Copyright (c) 2024 Stability AI + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch, math, einops +from utils_sd3.mmdit import MMDiT +from PIL import Image + + +################################################################################################# +### MMDiT Model Wrapping +################################################################################################# + + +class ModelSamplingDiscreteFlow(torch.nn.Module): + """Helper for sampler scheduling (ie timestep/sigma calculations) for Discrete Flow models""" + def __init__(self, shift=1.0): + super().__init__() + self.shift = shift + timesteps = 1000 + ts = self.sigma(torch.arange(1, timesteps + 1, 1)) + self.register_buffer('sigmas', ts) + + @property + def sigma_min(self): + return self.sigmas[0] + + @property + def sigma_max(self): + return self.sigmas[-1] + + def timestep(self, sigma): + return sigma * 1000 + + def sigma(self, timestep: torch.Tensor): + timestep = timestep / 1000.0 + if self.shift == 1.0: + return timestep + return self.shift * timestep / (1 + (self.shift - 1) * timestep) + + def calculate_denoised(self, sigma, model_output, model_input): + sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) + return model_input - model_output * sigma + + def noise_scaling(self, sigma, noise, latent_image, max_denoise=False): + return sigma * noise + (1.0 - sigma) * latent_image + + +class BaseModel(torch.nn.Module): + """Wrapper around the core MM-DiT model""" + def __init__(self, shift=1.0, device=None, dtype=torch.float32, file=None, prefix=""): + super().__init__() + # Important configuration values can be quickly determined by checking shapes in the source file + # Some of these will vary between models (eg 2B vs 8B primarily differ in their depth, but also other details change) + patch_size = file.get_tensor(f"{prefix}x_embedder.proj.weight").shape[2] + depth = file.get_tensor(f"{prefix}x_embedder.proj.weight").shape[0] // 64 + num_patches = file.get_tensor(f"{prefix}pos_embed").shape[1] + pos_embed_max_size = round(math.sqrt(num_patches)) + adm_in_channels = file.get_tensor(f"{prefix}y_embedder.mlp.0.weight").shape[1] + context_shape = file.get_tensor(f"{prefix}context_embedder.weight").shape + context_embedder_config = { + "target": "torch.nn.Linear", + "params": { + "in_features": context_shape[1], + "out_features": context_shape[0] + } + } + self.diffusion_model = MMDiT(input_size=None, pos_embed_scaling_factor=None, pos_embed_offset=None, pos_embed_max_size=pos_embed_max_size, patch_size=patch_size, in_channels=16, depth=depth, num_patches=num_patches, adm_in_channels=adm_in_channels, context_embedder_config=context_embedder_config, device=device, dtype=dtype) + self.model_sampling = ModelSamplingDiscreteFlow(shift=shift) + + def forward(self, x, sigma, c_crossattn=None, y=None): + dtype = self.get_dtype() + timestep = self.model_sampling.timestep(sigma).float() + model_output = self.diffusion_model(x.to(dtype), timestep, context=c_crossattn.to(dtype), y=y.to(dtype)).float() + return self.model_sampling.calculate_denoised(sigma, model_output, x) + + def get_dtype(self): + return self.diffusion_model.dtype + + +class CFGDenoiser(torch.nn.Module): + """Helper for applying CFG Scaling to diffusion outputs""" + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, x, timestep, cond, uncond, cond_scale): + # Run cond and uncond in a batch together + batched = self.model(torch.cat([x, x]), torch.cat([timestep, timestep]), c_crossattn=torch.cat([cond["c_crossattn"], uncond["c_crossattn"]]), y=torch.cat([cond["y"], uncond["y"]])) + # Then split and apply CFG Scaling + pos_out, neg_out = batched.chunk(2) + scaled = neg_out + (pos_out - neg_out) * cond_scale + return scaled + + +class SD3LatentFormat: + """Latents are slightly shifted from center - this class must be called after VAE Decode to correct for the shift""" + def __init__(self): + self.scale_factor = 1.5305 + self.shift_factor = 0.0609 + + def process_in(self, latent): + return (latent - self.shift_factor) * self.scale_factor + + def process_out(self, latent): + return (latent / self.scale_factor) + self.shift_factor + + def decode_latent_to_preview(self, x0): + """Quick RGB approximate preview of sd3 latents""" + factors = torch.tensor([ + [-0.0645, 0.0177, 0.1052], [ 0.0028, 0.0312, 0.0650], + [ 0.1848, 0.0762, 0.0360], [ 0.0944, 0.0360, 0.0889], + [ 0.0897, 0.0506, -0.0364], [-0.0020, 0.1203, 0.0284], + [ 0.0855, 0.0118, 0.0283], [-0.0539, 0.0658, 0.1047], + [-0.0057, 0.0116, 0.0700], [-0.0412, 0.0281, -0.0039], + [ 0.1106, 0.1171, 0.1220], [-0.0248, 0.0682, -0.0481], + [ 0.0815, 0.0846, 0.1207], [-0.0120, -0.0055, -0.0867], + [-0.0749, -0.0634, -0.0456], [-0.1418, -0.1457, -0.1259] + ], device="cuda") + latent_image = x0[0].permute(1, 2, 0).cuda() @ factors + + latents_ubyte = (((latent_image + 1) / 2) + .clamp(0, 1) # change scale from -1..1 to 0..1 + .mul(0xFF) # to 0..255 + .byte()).cuda() + + return Image.fromarray(latents_ubyte.numpy()) + + +################################################################################################# +### K-Diffusion Sampling +################################################################################################# + + +def append_dims(x, target_dims): + """Appends dimensions to the end of a tensor until it has target_dims dimensions.""" + dims_to_append = target_dims - x.ndim + return x[(...,) + (None,) * dims_to_append] + + +def to_d(x, sigma, denoised): + """Converts a denoiser output to a Karras ODE derivative.""" + return (x - denoised) / append_dims(sigma, x.ndim) + + +@torch.no_grad() +@torch.autocast("cuda", dtype=torch.float16) +def sample_euler(func, x, sigmas, extra_args=None): + """Implements Algorithm 2 (Euler steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in range(len(sigmas) - 1): + sigma_hat = sigmas[i] + denoised = func(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + dt = sigmas[i + 1] - sigma_hat + # Euler method + x = x + d * dt + return x + + +################################################################################################# +### VAE +################################################################################################# + + +def Normalize(in_channels, num_groups=32, dtype=torch.float32, device=None): + return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device) + + +class ResnetBlock(torch.nn.Module): + def __init__(self, *, in_channels, out_channels=None, dtype=torch.float32, device=None): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + + self.norm1 = Normalize(in_channels, dtype=dtype, device=device) + self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + self.norm2 = Normalize(out_channels, dtype=dtype, device=device) + self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + if self.in_channels != self.out_channels: + self.nin_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + else: + self.nin_shortcut = None + self.swish = torch.nn.SiLU(inplace=True) + + def forward(self, x): + hidden = x + hidden = self.norm1(hidden) + hidden = self.swish(hidden) + hidden = self.conv1(hidden) + hidden = self.norm2(hidden) + hidden = self.swish(hidden) + hidden = self.conv2(hidden) + if self.in_channels != self.out_channels: + x = self.nin_shortcut(x) + return x + hidden + + +class AttnBlock(torch.nn.Module): + def __init__(self, in_channels, dtype=torch.float32, device=None): + super().__init__() + self.norm = Normalize(in_channels, dtype=dtype, device=device) + self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + + def forward(self, x): + hidden = self.norm(x) + q = self.q(hidden) + k = self.k(hidden) + v = self.v(hidden) + b, c, h, w = q.shape + q, k, v = map(lambda x: einops.rearrange(x, "b c h w -> b 1 (h w) c").contiguous(), (q, k, v)) + hidden = torch.nn.functional.scaled_dot_product_attention(q, k, v) # scale is dim ** -0.5 per default + hidden = einops.rearrange(hidden, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b) + hidden = self.proj_out(hidden) + return x + hidden + + +class Downsample(torch.nn.Module): + def __init__(self, in_channels, dtype=torch.float32, device=None): + super().__init__() + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0, dtype=dtype, device=device) + + def forward(self, x): + pad = (0,1,0,1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + return x + + +class Upsample(torch.nn.Module): + def __init__(self, in_channels, dtype=torch.float32, device=None): + super().__init__() + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + + def forward(self, x): + x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + x = self.conv(x) + return x + + +class VAEEncoder(torch.nn.Module): + def __init__(self, ch=128, ch_mult=(1,2,4,4), num_res_blocks=2, in_channels=3, z_channels=16, dtype=torch.float32, device=None): + super().__init__() + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + # downsampling + self.conv_in = torch.nn.Conv2d(in_channels, ch, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + in_ch_mult = (1,) + tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = torch.nn.ModuleList() + for i_level in range(self.num_resolutions): + block = torch.nn.ModuleList() + attn = torch.nn.ModuleList() + block_in = ch*in_ch_mult[i_level] + block_out = ch*ch_mult[i_level] + for i_block in range(num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out, dtype=dtype, device=device)) + block_in = block_out + down = torch.nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, dtype=dtype, device=device) + self.down.append(down) + # middle + self.mid = torch.nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + self.mid.attn_1 = AttnBlock(block_in, dtype=dtype, device=device) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + # end + self.norm_out = Normalize(block_in, dtype=dtype, device=device) + self.conv_out = torch.nn.Conv2d(block_in, 2 * z_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + self.swish = torch.nn.SiLU(inplace=True) + + def forward(self, x): + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1]) + hs.append(h) + if i_level != self.num_resolutions-1: + hs.append(self.down[i_level].downsample(hs[-1])) + # middle + h = hs[-1] + h = self.mid.block_1(h) + h = self.mid.attn_1(h) + h = self.mid.block_2(h) + # end + h = self.norm_out(h) + h = self.swish(h) + h = self.conv_out(h) + return h + + +class VAEDecoder(torch.nn.Module): + def __init__(self, ch=128, out_ch=3, ch_mult=(1, 2, 4, 4), num_res_blocks=2, resolution=256, z_channels=16, dtype=torch.float32, device=None): + super().__init__() + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + # z to block_in + self.conv_in = torch.nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + # middle + self.mid = torch.nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + self.mid.attn_1 = AttnBlock(block_in, dtype=dtype, device=device) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + # upsampling + self.up = torch.nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = torch.nn.ModuleList() + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out, dtype=dtype, device=device)) + block_in = block_out + up = torch.nn.Module() + up.block = block + if i_level != 0: + up.upsample = Upsample(block_in, dtype=dtype, device=device) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + # end + self.norm_out = Normalize(block_in, dtype=dtype, device=device) + self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + self.swish = torch.nn.SiLU(inplace=True) + + def forward(self, z): + # z to block_in + hidden = self.conv_in(z) + # middle + hidden = self.mid.block_1(hidden) + hidden = self.mid.attn_1(hidden) + hidden = self.mid.block_2(hidden) + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + hidden = self.up[i_level].block[i_block](hidden) + if i_level != 0: + hidden = self.up[i_level].upsample(hidden) + # end + hidden = self.norm_out(hidden) + hidden = self.swish(hidden) + hidden = self.conv_out(hidden) + return hidden + + +class SDVAE(torch.nn.Module): + def __init__(self, dtype=torch.float32, device=None): + super().__init__() + self.encoder = VAEEncoder(dtype=dtype, device=device) + self.decoder = VAEDecoder(dtype=dtype, device=device) + + @torch.autocast("cuda", dtype=torch.float16) + def decode(self, latent): + return self.decoder(latent) + + @torch.autocast("cuda", dtype=torch.float16) + def encode(self, image): + hidden = self.encoder(image) + mean, logvar = torch.chunk(hidden, 2, dim=1) + logvar = torch.clamp(logvar, -30.0, 20.0) + std = torch.exp(0.5 * logvar) + return mean + std * torch.randn_like(mean) diff --git a/docker/rockylinux8.Dockerfile b/docker/rockylinux8.Dockerfile index dca7208c..707c419b 100644 --- a/docker/rockylinux8.Dockerfile +++ b/docker/rockylinux8.Dockerfile @@ -25,7 +25,7 @@ ENV NV_CUDNN_VERSION 8.9.6.50-1 ENV NV_CUDNN_PACKAGE libcudnn8-${NV_CUDNN_VERSION}.cuda12.2 ENV NV_CUDNN_PACKAGE_DEV libcudnn8-devel-${NV_CUDNN_VERSION}.cuda12.2 -ENV TRT_VERSION 10.0.1.6 +ENV TRT_VERSION 10.1.0.27 SHELL ["/bin/bash", "-c"] RUN dnf install -y \ @@ -62,15 +62,15 @@ RUN dnf install -y python38 python38-devel &&\ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp38-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp38-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/rockylinux9.Dockerfile b/docker/rockylinux9.Dockerfile index ff00512a..c62aa214 100644 --- a/docker/rockylinux9.Dockerfile +++ b/docker/rockylinux9.Dockerfile @@ -25,7 +25,7 @@ ENV NV_CUDNN_VERSION 8.9.6.50-1 ENV NV_CUDNN_PACKAGE libcudnn8-${NV_CUDNN_VERSION}.cuda12.2 ENV NV_CUDNN_PACKAGE_DEV libcudnn8-devel-${NV_CUDNN_VERSION}.cuda12.2 -ENV TRT_VERSION 10.0.1.6 +ENV TRT_VERSION 10.1.0.27 SHELL ["/bin/bash", "-c"] RUN dnf install -y \ @@ -67,15 +67,15 @@ RUN dnf -y install \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp39-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp39-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp39-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp39-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-20.04.Dockerfile b/docker/ubuntu-20.04.Dockerfile index 7498c124..da587d25 100644 --- a/docker/ubuntu-20.04.Dockerfile +++ b/docker/ubuntu-20.04.Dockerfile @@ -28,7 +28,7 @@ ENV CUDA_VERSION_MAJOR_MINOR=12.2 ENV NV_CUDNN_PACKAGE "libcudnn8=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" ENV NV_CUDNN_PACKAGE_DEV "libcudnn8-dev=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" -ENV TRT_VERSION 10.0.1.6 +ENV TRT_VERSION 10.1.0.27 SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -84,15 +84,15 @@ RUN apt-get install -y --no-install-recommends \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp38-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp38-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-22.04-aarch64.Dockerfile b/docker/ubuntu-22.04-aarch64.Dockerfile index ebac9297..cd09108d 100644 --- a/docker/ubuntu-22.04-aarch64.Dockerfile +++ b/docker/ubuntu-22.04-aarch64.Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_VERSION=12.4.0 # Multi-arch container support available in non-cudnn containers. FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 -ENV TRT_VERSION 10.0.1.6 +ENV TRT_VERSION 10.1.0.27 SHELL ["/bin/bash", "-c"] # Setup user account diff --git a/docker/ubuntu-22.04.Dockerfile b/docker/ubuntu-22.04.Dockerfile index a7e0d6a1..28686c6f 100644 --- a/docker/ubuntu-22.04.Dockerfile +++ b/docker/ubuntu-22.04.Dockerfile @@ -28,7 +28,7 @@ ENV CUDA_VERSION_MAJOR_MINOR=12.2 ENV NV_CUDNN_PACKAGE "libcudnn8=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" ENV NV_CUDNN_PACKAGE_DEV "libcudnn8-dev=$NV_CUDNN_VERSION-1+cuda${CUDA_VERSION_MAJOR_MINOR}" -ENV TRT_VERSION 10.0.1.6 +ENV TRT_VERSION 10.1.0.27 SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -84,15 +84,15 @@ RUN apt-get install -y --no-install-recommends \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp310-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp310-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && tar -xf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz \ - && cp -a TensorRT-10.0.1.6/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.0.1.6/python/tensorrt-10.0.1-cp310-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.1.0/tars/TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && tar -xf TensorRT-10.1.0.27.Linux.x86_64-gnu.cuda-12.4.tar.gz \ + && cp -a TensorRT-10.1.0.27/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.1.0.27/python/tensorrt-10.1.0-cp310-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-cross-aarch64.Dockerfile b/docker/ubuntu-cross-aarch64.Dockerfile index eb2e100b..ae07ab92 100644 --- a/docker/ubuntu-cross-aarch64.Dockerfile +++ b/docker/ubuntu-cross-aarch64.Dockerfile @@ -21,7 +21,7 @@ ARG OS_VERSION=22.04 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION} LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 10.0.1.6 +ENV TRT_VERSION 10.1.0.27 ENV DEBIAN_FRONTEND=noninteractive ARG uid=1000 diff --git a/include/NvInfer.h b/include/NvInfer.h index c921ede0..a0420973 100644 --- a/include/NvInfer.h +++ b/include/NvInfer.h @@ -281,7 +281,9 @@ class ITensor : public INoCopy //! //! Requires that min and max be finite, and min <= max. //! - bool setDynamicRange(float min, float max) noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED bool setDynamicRange(float min, float max) noexcept { return mImpl->setDynamicRange(min, max); } @@ -369,6 +371,8 @@ class ITensor : public INoCopy //! //! \return True if dynamic range is set, false otherwise. //! + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! bool dynamicRangeIsSet() const noexcept { return mImpl->dynamicRangeIsSet(); @@ -403,14 +407,16 @@ class ITensor : public INoCopy } //! - //! \brief Set allowed formats for this tensor. By default all formats are allowed. + //! \brief Set allowed formats for an input or output tensor. By default all formats are allowed. //! Shape tensors (for which isShapeTensor() returns true) may only have row-major linear format. //! //! When running network on DLA and the build option kGPU_FALLBACK is not specified, if DLA format(kCHW4 with Int8, - //! kCHW4 with FP16, kCHW16 with FP16, kCHW32 with Int8) is set, the input format is treated as native DLA format with - //! line stride requirement. Input/output binding with these format should have correct layout during + //! kCHW4 with FP16, kCHW16 with FP16, kCHW32 with Int8) is set, the input format is treated as native DLA format + //! with line stride requirement. Input/output binding with these format should have correct layout during //! inference. //! + //! Tensor formats are determined at build time by TensorRT for tensors not marked as input or output. + //! //! \param formats A bitmask of TensorFormat values that are supported for this tensor. //! //! \see ITensor::getAllowedFormats() @@ -653,8 +659,7 @@ class ILayer : public INoCopy //! otherwise it must be either the input or output type. //! //! Strongly-typed networks reject calls to method setPrecision. In strongly-typed networks, the computation - //! precision is typically controlled by casting the input tensors to the desired type. The exception is - //! INormalizationLayer, which has a method setComputePrecision(). + //! precision is typically controlled by casting the input tensors to the desired type. //! //! \param dataType the computational precision. //! @@ -2775,6 +2780,7 @@ enum class UnaryOperation : int32_t kSIGN = 21, //!< Sign, If input > 0, output 1; if input < 0, output -1; if input == 0, output 0. kROUND = 22, //!< Round to nearest even for floating-point data type. kISINF = 23, //!< Return true if input value equals +/- infinity for floating-point data type. + kISNAN = 24, //!< Return true if input value is a NaN for floating-point data type. }; //! @@ -2785,7 +2791,7 @@ enum class UnaryOperation : int32_t template <> constexpr inline int32_t EnumMax() noexcept { - return 24; + return 25; } //! @@ -3031,7 +3037,7 @@ struct Permutation //! This layer shuffles data by applying in sequence: a transpose operation, a reshape operation //! and a second transpose operation. The dimension types of the output are those of the reshape dimension. //! -//! The layer has an optional second input. If present, it must be a 1D Int32 shape tensor, +//! The layer has an optional second input. If present, it must be a 1D Int32 shape tensor, //! and the reshape dimensions are taken from it. //! //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. @@ -3080,6 +3086,9 @@ class IShuffleLayer : public ILayer //! Value -1 infers that particular dimension by looking at input and rest //! of the reshape dimensions. Note that only a maximum of one dimension is //! permitted to be specified as -1. + //! Avoid using -1 if the input can have zero volume and any of the other + //! reshape dimensions can be zero (after resolving special treatment of 0), + //! because the solution for the -1 becomes indeterminate and TensorRT will report an error. //! //! The product of the new dimensions must be equal to the product of the old. //! @@ -4572,7 +4581,7 @@ class IRecurrenceLayer : public ILoopBoundaryLayer //! The second input tensor, if present, must be defined outside the loop. //! //! If getLoopOutput() is kLAST_VALUE, a single input must be provided, -//! and that input must from a IRecurrenceLayer in the same loop. +//! and that input must be from an IRecurrenceLayer in the same loop. //! //! If getLoopOutput() is kCONCATENATE or kREVERSE, a second input must be provided. //! The second input must be a 0D shape tensor, defined before the loop commences, @@ -6157,9 +6166,9 @@ class INormalizationLayer : public ILayer //! //! \param type The datatype used for the compute precision of this layer. //! - //! By default, to avoid overflow errors, TensorRT will run the normalization computation in DataType::kFLOAT32 - //! even in mixed precision mode regardless of builder flags. To override this default, use this method - //! to set the desired compute precision. + //! The method is used to avoid overflow errors by controlling the normalization computation in + //! mixed precision mode. The compute precision defaults to DataType::kFLOAT32. + //! To override this default, use this method to set the desired compute precision. //! //! For a weakly typed network: //! @@ -6168,7 +6177,8 @@ class INormalizationLayer : public ILayer //! * Method setPrecision() can still be called. The input data is cast to that precision before //! being cast to the compute precision. //! - //! Neither of these two methods are allowed for a strongly typed network. + //! Strongly typed network rejects calls to this method since the compute precision is typically + //! controlled by casting the input tensors to the desired type. //! //! Only DataType::kFLOAT32 and DataType::kHALF are valid types for \p type. //! @@ -7538,6 +7548,8 @@ class INetworkDefinition : public INoCopy //! //! \brief Version of calibration algorithm to use. //! +//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. +//! enum class CalibrationAlgoType : int32_t { kLEGACY_CALIBRATION = 0, //!< Legacy calibration @@ -7568,7 +7580,9 @@ constexpr inline int32_t EnumMax() noexcept //! the distribution of activations. It may optionally implement a method for caching the calibration result for reuse //! on subsequent runs. //! -class IInt8Calibrator : public IVersionedInterface +//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. +//! +class TRT_DEPRECATED IInt8Calibrator : public IVersionedInterface { public: //! @@ -7634,7 +7648,7 @@ class IInt8Calibrator : public IVersionedInterface namespace v_1_0 { -class IInt8EntropyCalibrator : public IInt8Calibrator +class TRT_DEPRECATED IInt8EntropyCalibrator : public IInt8Calibrator { public: //! @@ -7668,11 +7682,13 @@ class IInt8EntropyCalibrator : public IInt8Calibrator //! \note To ensure compatibility of source code with future versions of TensorRT, use IEntropyCalibrator, not //! v_1_0::IEntropyCalibrator //! +//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. +//! using IInt8EntropyCalibrator = v_1_0::IInt8EntropyCalibrator; namespace v_1_0 { -class IInt8EntropyCalibrator2 : public IInt8Calibrator +class TRT_DEPRECATED IInt8EntropyCalibrator2 : public IInt8Calibrator { public: //! @@ -7706,11 +7722,13 @@ class IInt8EntropyCalibrator2 : public IInt8Calibrator //! \note To ensure compatibility of source code with future versions of TensorRT, use IEntropyCalibrator2, not //! v_1_0::IEntropyCalibrator2 //! +//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. +//! using IInt8EntropyCalibrator2 = v_1_0::IInt8EntropyCalibrator2; namespace v_1_0 { -class IInt8MinMaxCalibrator : public IInt8Calibrator +class TRT_DEPRECATED IInt8MinMaxCalibrator : public IInt8Calibrator { public: //! @@ -7743,11 +7761,13 @@ class IInt8MinMaxCalibrator : public IInt8Calibrator //! \note To ensure compatibility of source code with future versions of TensorRT, use IMinMaxCalibrator>, not //! v_1_0::IMinMaxCalibrator //! +//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. +//! using IInt8MinMaxCalibrator = v_1_0::IInt8MinMaxCalibrator; namespace v_1_0 { -class IInt8LegacyCalibrator : public IInt8Calibrator +class TRT_DEPRECATED IInt8LegacyCalibrator : public IInt8Calibrator { public: //! @@ -7821,6 +7841,8 @@ class IInt8LegacyCalibrator : public IInt8Calibrator //! \note To ensure compatibility of source code with future versions of TensorRT, use ILegacyCalibrator, not //! v_1_0::ILegacyCalibrator //! +//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. +//! using IInt8LegacyCalibrator = v_1_0::IInt8LegacyCalibrator; //! @@ -8102,6 +8124,8 @@ using QuantizationFlags = uint32_t; //! //! \see IBuilderConfig::setQuantizationFlag(), IBuilderConfig::getQuantizationFlag() //! +//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. +//! enum class QuantizationFlag : int32_t { //! Run int8 calibration pass before layer fusion. Only valid for IInt8LegacyCalibrator and @@ -8200,7 +8224,7 @@ enum class BuilderFlag : int32_t //! If BuilderFlag::kVERSION_COMPATIBLE is not set then the value of this flag will be ignored. kEXCLUDE_LEAN_RUNTIME = 14, - //! Enable FP8 layer selection, with FP32 fallback. + //! Enable plugins with FP8 input/output. //! //! This flag is not supported with hardware-compatibility mode. //! @@ -8261,6 +8285,10 @@ enum class BuilderFlag : int32_t //! ICudaEngine::setWeightStreamingBudget //! kWEIGHT_STREAMING = 21, + + //! Enable plugins with INT4 input/output. + kINT4 = 22, + }; //! @@ -8271,7 +8299,7 @@ enum class BuilderFlag : int32_t template <> constexpr inline int32_t EnumMax() noexcept { - return 22; + return 23; } //! @@ -8280,7 +8308,10 @@ constexpr inline int32_t EnumMax() noexcept //! \brief Class to handle tactic timing info collected from builder. //! //! The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances -//! to accelerate the builder wallclock time. +//! to reduce the builder wallclock time. +//! +//! \warning It is a known issue that the same timing cache doesn't guarantee stable engine build reproducibility +//! at optimization level 4 and higher. This issue will be fixed by 2024. //! //! \see IBuilderConfig //! @@ -8643,7 +8674,9 @@ class IBuilderConfig : public INoCopy //! //! The calibrator is to minimize the information loss during the INT8 quantization process. //! - void setInt8Calibrator(IInt8Calibrator* calibrator) noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED void setInt8Calibrator(IInt8Calibrator* calibrator) noexcept { mImpl->setInt8Calibrator(calibrator); } @@ -8651,7 +8684,9 @@ class IBuilderConfig : public INoCopy //! //! \brief Get Int8 Calibration interface. //! - IInt8Calibrator* getInt8Calibrator() const noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED IInt8Calibrator* getInt8Calibrator() const noexcept { return mImpl->getInt8Calibrator(); } @@ -8944,7 +8979,9 @@ class IBuilderConfig : public INoCopy //! //! \return True if the calibration profile was set correctly. //! - bool setCalibrationProfile(IOptimizationProfile const* profile) noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED bool setCalibrationProfile(IOptimizationProfile const* profile) noexcept { return mImpl->setCalibrationProfile(profile); } @@ -8954,7 +8991,9 @@ class IBuilderConfig : public INoCopy //! //! \return A pointer to the current calibration profile or nullptr if calibration profile is unset. //! - IOptimizationProfile const* getCalibrationProfile() noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED IOptimizationProfile const* getCalibrationProfile() noexcept { return mImpl->getCalibrationProfile(); } diff --git a/include/NvInferImpl.h b/include/NvInferImpl.h index 1c2dbff8..bb66ecd6 100644 --- a/include/NvInferImpl.h +++ b/include/NvInferImpl.h @@ -358,6 +358,14 @@ class VCudaEngine : public VRoot virtual int64_t getStreamableWeightsSize() const noexcept = 0; virtual bool isDebugTensor(char const* name) const noexcept = 0; + + // Added in TensorRT 10.1 + virtual bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept = 0; + virtual int64_t getWeightStreamingBudgetV2() const noexcept = 0; + virtual int64_t getWeightStreamingAutomaticBudget() const noexcept = 0; + virtual int64_t getWeightStreamingScratchMemorySize() const noexcept = 0; + virtual int64_t getDeviceMemorySizeV2() const noexcept = 0; + virtual int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept = 0; }; class VExecutionContext : public VRoot @@ -410,6 +418,9 @@ class VExecutionContext : public VRoot virtual bool getDebugState(char const* name) const noexcept = 0; virtual bool setAllTensorsDebugState(bool flag) noexcept = 0; virtual size_t updateDeviceMemorySizeForShapes() noexcept = 0; + + // Added in TensorRT 10.1 + virtual void setDeviceMemoryV2(void* memory, int64_t size) noexcept = 0; }; class VEngineInspector : public VRoot diff --git a/include/NvInferRuntime.h b/include/NvInferRuntime.h index 04434931..81cb7ba1 100644 --- a/include/NvInferRuntime.h +++ b/include/NvInferRuntime.h @@ -922,8 +922,24 @@ class IPluginV3OneBuild : public IPluginCapability //! //! \brief Query for any custom tactics that the plugin intends to use //! - //! For each format combination supported by the plugin (up to a maximum indicated by getFormatCombinationLimit()), - //! the plugin will be timed for each tactic advertised through this method. + //! This method queries for the set of tactics T(f) supported by the plugin for the format combination f indicated + //! by the immediately preceding call to configurePlugin(). It is guaranteed to be called after configurePlugin(). + //! + //! For each format combination provided through configurePlugin(), up to a maximum of getFormatCombinationLimit(), + //! the plugin will be timed for each tactic advertised through this method for that format combination. i.e. The + //! plugin will be timed \f$N = sum_{i=0}^{i getFormatCombinationLimit() + //! goto done + //! configurePlugin(...) + //! for each tactic in getValidTactics(...) + //! time tactic + //! done: + //! //! //! \param tactics Pre-allocated buffer to which the tactic values should be written //! \param nbTactics The number of tactics advertised through getNbTactics() @@ -1699,7 +1715,9 @@ class IRefitter : public INoCopy //! //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator. //! - bool setDynamicRange(char const* tensorName, float min, float max) noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept { return mImpl->setDynamicRange(tensorName, min, max); } @@ -1713,7 +1731,9 @@ class IRefitter : public INoCopy //! //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator. //! - float getDynamicRangeMin(char const* tensorName) const noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept { return mImpl->getDynamicRangeMin(tensorName); } @@ -1727,7 +1747,9 @@ class IRefitter : public INoCopy //! //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator. //! - float getDynamicRangeMax(char const* tensorName) const noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept { return mImpl->getDynamicRangeMax(tensorName); } @@ -1743,7 +1765,9 @@ class IRefitter : public INoCopy //! If tensorNames!=nullptr, each written pointer points to a string owned by //! the engine being refit, and becomes invalid when the engine is destroyed. //! - int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept + //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization. + //! + TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept { return mImpl->getTensorsWithDynamicRange(size, tensorNames); } @@ -2263,7 +2287,7 @@ enum class TacticSource : int32_t //! \deprecated Deprecated in TensorRT 10.0. kCUBLAS TRT_DEPRECATED_ENUM = 0, - //! cuBLAS LT tactics. Enabled by default. + //! cuBLAS LT tactics. Disabled by default. //! \deprecated Deprecated in TensorRT 9.0. kCUBLAS_LT TRT_DEPRECATED_ENUM = 1, @@ -2610,9 +2634,11 @@ class ICudaEngine : public INoCopy //! //! \brief Return the maximum device memory required by the context over all profiles. //! + //! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeV2(). + //! //! \see IExecutionContext::setDeviceMemory() //! - size_t getDeviceMemorySize() const noexcept + TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept { return mImpl->getDeviceMemorySize(); } @@ -2620,13 +2646,47 @@ class ICudaEngine : public INoCopy //! //! \brief Return the maximum device memory required by the context for a profile. //! - //! \see IExecutionContext::setDeviceMemory() + //! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeForProfileV2(int32_t). //! - size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept + //! \see IExecutionContext::setDeviceMemoryV2() + //! + TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept { return mImpl->getDeviceMemorySizeForProfile(profileIndex); } + //! + //! \brief Return the maximum device memory required by the context over all profiles. + //! + //! This API is stateful, so its call returns different values based on the following calls: + //! * setWeightStreamingBudget() + //! * setWeightStreamingBudgetV2() + //! + //! \see IExecutionContext::setDeviceMemoryV2() + //! \see setWeightStreamingBudget() + //! \see setWeightStreamingBudgetV2() + //! + int64_t getDeviceMemorySizeV2() const noexcept + { + return mImpl->getDeviceMemorySizeV2(); + } + + //! + //! \brief Return the maximum device memory required by the context for a profile. + //! + //! This API is stateful, so its call returns different values based on the following calls: + //! * setWeightStreamingBudget() + //! * setWeightStreamingBudgetV2() + //! + //! \see IExecutionContext::setDeviceMemoryV2() + //! \see setWeightStreamingBudget() + //! \see setWeightStreamingBudgetV2() + //! + int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept + { + return mImpl->getDeviceMemorySizeForProfileV2(profileIndex); + } + //! //! \brief Return true if an engine can be refit. //! @@ -2638,10 +2698,10 @@ class ICudaEngine : public INoCopy } //! - //! \brief Return the number of bytes per component of an element, or -1 if the provided name does not map to an - //! input or output tensor. + //! \brief Return the number of bytes per component of an element, or -1 if the + //! tensor is not vectorized or provided name does not map to an input or output tensor. //! - //! The vector component size is returned if getTensorVectorizedDim() != -1. + //! The vector component size is returned if getTensorVectorizedDim(tensorName) != -1. //! //! \param tensorName The name of an input or output tensor. //! @@ -2659,8 +2719,8 @@ class ICudaEngine : public INoCopy } //! - //! \brief Return the number of bytes per component of an element of given profile, or -1 if the provided name does - //! not map to an input or output tensor. + //! \brief Return the number of bytes per component of an element given of given profile, or -1 if the tensor is not + //! vectorized or provided name does not map to an input or output tensor. //! //! The vector component size is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1. //! @@ -2677,10 +2737,10 @@ class ICudaEngine : public INoCopy } //! - //! \brief Return the number of components included in one element, or -1 if the provided name does not map to an - //! input or output tensor. + //! \brief Return the number of components included in one element, or -1 if tensor is + //! not vectorized or if the provided name does not map to an input or output tensor. //! - //! The number of elements in the vectors is returned if getTensorVectorizedDim() != -1. + //! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName) != -1. //! //! \param tensorName The name of an input or output tensor. //! @@ -2698,8 +2758,8 @@ class ICudaEngine : public INoCopy } //! - //! \brief Return the number of components included in one element of given profile, or -1 if the provided name does - //! not map to an input or output tensor. + //! \brief Return the number of components included in one element of given profile, or -1 if tensor is not + //! vectorized or the provided name does not map to an input or output tensor. //! //! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1. //! @@ -2881,8 +2941,9 @@ class ICudaEngine : public INoCopy //! //! \param select Whether to query the minimum, optimum, or maximum values for this input tensor. //! - //! \return The minimum / optimum / maximum values for an input tensor in this profile. - //! If the profileIndex is invalid or the provided name does not map to an input tensor, return nullptr. + //! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is + //! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return + //! nullptr. //! //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator. //! @@ -3062,6 +3123,8 @@ class ICudaEngine : public INoCopy //! \return An IHostMemory object that contains the serialized engine. //! //! The network may be deserialized with IRuntime::deserializeCudaEngine(). + //! Serializing plan file with SerializationFlag::kEXCLUDE_WEIGHTS requires building the engine with kREFIT or + //! kREFIT_IDENTICAL. //! //! \see IRuntime::deserializeCudaEngine() //! @@ -3076,16 +3139,17 @@ class ICudaEngine : public INoCopy //! //! \param gpuMemoryBudget This parameter may take on 3 types of values: //! -1: Allows TensorRT to choose the budget according to the streamable weights size. - //! Free CUDA memory will be queried at ::createExecutionContext and accordingly: + //! Free CUDA memory will be queried at createExecutionContext() and accordingly: //! * If streamable weights all fit: weight streaming is not required and disabled. //! * Otherwise: Budget is set to getMinimumWeightStreamingBudget //! 0: (default) Disables weight streaming. The execution may fail if the network is too large for GPU memory. //! >0: The maximum bytes of GPU memory that weights can occupy. It must be bounded by - //! [getMinimumWeightStreamingBudget, min(getStreamableWeightsSize - 1, free GPU memory)]. + //! [getMinimumWeightStreamingBudget, free GPU memory)]. //! //! By setting a weight limit, users can expect a GPU memory usage reduction - //! of |network weights| - gpuMemoryBudget bytes. Maximum memory savings occur - //! when gpuMemoryBudget is set to getMinimumWeightStreamingBudget. + //! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur + //! when gpuMemoryBudget is set to getMinimumWeightStreamingBudget(). Creating additional + //! IExecutionContexts will increase memory usage by O(getMinimumStreamingBudget()). //! //! Streaming larger amounts of memory will likely result in lower performance //! except in some boundary cases where streaming weights allows the user to @@ -3093,22 +3157,23 @@ class ICudaEngine : public INoCopy //! latency in these cases. Tuning the value of the memory limit is //! recommended for best performance. //! - //! \warning If weight streaming is active, then multiple concurrent IExecutionContexts will forced to run serially. - //! - //! \warning GPU memory for the weights is allocated upon the first IExecutionContext's creation - //! and deallocated upon the last one's destruction. + //! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight + //! streaming or destroying the ICudaEngine. //! //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building. //! - //! \return true if the memory limit is valid and the call was successful - //! otherwise false. + //! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts. //! - //! \see BuilderFlag::kWEIGHT_STREAMING, - //! ICudaEngine::getWeightStreamingBudget - //! ICudaEngine::getMinimumWeightStreamingBudget, - //! ICudaEngine::getStreamableWeightsSize + //! \return true if the memory limit is valid and the call was successful, false otherwise. + //! + //! \deprecated Deprecated in TensorRT 10.1. Superceded by setWeightStreamingBudgetV2(). //! - bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept + //! \see BuilderFlag::kWEIGHT_STREAMING + //! \see getWeightStreamingBudget() + //! \see getMinimumWeightStreamingBudget() + //! \see getStreamableWeightsSize() + //! + TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept { return mImpl->setWeightStreamingBudget(gpuMemoryBudget); } @@ -3118,15 +3183,17 @@ class ICudaEngine : public INoCopy //! //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building. //! - //! \returns The weight streaming budget in bytes. Please see ::setWeightStreamingBudget for the possible + //! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudget() for the possible //! values. //! + //! \deprecated Deprecated in TensorRT 10.1. Superceded by getWeightStreamingBudgetV2(). + //! //! \see BuilderFlag::kWEIGHT_STREAMING, - //! ICudaEngine::setWeightStreamingBudget, - //! ICudaEngine::getMinimumWeightStreamingBudget, - //! ICudaEngine::getStreamableWeightsSize + //! \see setWeightStreamingBudget() + //! \see getMinimumWeightStreamingBudget() + //! \see getStreamableWeightsSize() //! - int64_t getWeightStreamingBudget() const noexcept + TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept { return mImpl->getWeightStreamingBudget(); } @@ -3143,12 +3210,13 @@ class ICudaEngine : public INoCopy //! //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building. //! - //! //! \returns The minimum number of bytes of GPU memory required for streaming. //! - //! \see ICudaEngine::setWeightStreamingBudget + //! \deprecated Deprecated in TensorRT 10.1. The minimum budget is 0 in the V2 APIs. + //! + //! \see setWeightStreamingBudget() //! - int64_t getMinimumWeightStreamingBudget() const noexcept + TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept { return mImpl->getMinimumWeightStreamingBudget(); } @@ -3159,18 +3227,124 @@ class ICudaEngine : public INoCopy //! The set of streamable weights is a subset of all network weights. The //! total size may exceed free GPU memory. //! - //! Returns 0 if BuilderFlag::kWEIGHT_STREAMING is unset during engine building. - //! - //! //! \returns The total size in bytes of all streamable weights. + //! Returns 0 if BuilderFlag::kWEIGHT_STREAMING is unset during engine building. //! - //! \see ICudaEngine::setWeightStreamingBudget + //! \see setWeightStreamingBudget() //! int64_t getStreamableWeightsSize() const noexcept { return mImpl->getStreamableWeightsSize(); } + //! + //! \brief Limit the maximum amount of GPU memory usable for network weights in bytes. + //! + //! \param gpuMemoryBudget This parameter must be a non-negative value. + //! 0: Only small amounts of scratch memory will required to run the model. + //! >= getStreamableWeightsSize (default): Disables weight streaming. + //! The execution may fail if the network is too large for GPU memory. + //! + //! By setting a weight limit, users can expect a GPU memory usage reduction on the order + //! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur + //! when gpuMemoryBudget is set to 0. Each IExecutionContext will require getWeightStreamingScratchMemorySize() + //! bytes of additional device memory if the engine is streaming its weights (budget < getStreamableWeightsSize()). + //! + //! Streaming larger amounts of memory will likely result in lower performance + //! except in some boundary cases where streaming weights allows the user to + //! run larger batch sizes. The higher throughput offsets the increased + //! latency in these cases. Tuning the value of the memory limit is + //! recommended for best performance. + //! + //! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight + //! streaming or destroying the ICudaEngine. + //! + //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building. + //! + //! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts. + //! + //! \warning Using the V2 weight streaming APIs with V1 APIs (setWeightStreamingBudget(), + //! getWeightStreamingBudget(), getWeightStreamingMinimumBudget()) leads to undefined behavior. + //! + //! \return true if the memory limit is valid and the call was successful, false otherwise. + //! + //! \see BuilderFlag::kWEIGHT_STREAMING + //! \see getWeightStreamingBudgetV2() + //! \see getWeightStreamingScratchMemorySize() + //! \see getWeightStreamingAutomaticBudget() + //! \see getStreamableWeightsSize() + //! + bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept + { + return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget); + } + + //! + //! \brief Returns the current weight streaming device memory budget in bytes. + //! + //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building. + //! + //! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudgetV2() for the possible + //! return values. Returns getStreamableWeightsSize() if weight streaming is disabled. + //! + //! \see BuilderFlag::kWEIGHT_STREAMING + //! \see setWeightStreamingBudget() + //! \see getMinimumWeightStreamingBudget() + //! \see getStreamableWeightsSize() + //! + int64_t getWeightStreamingBudgetV2() const noexcept + { + return mImpl->getWeightStreamingBudgetV2(); + } + + //! + //! \brief TensorRT automatically determines an ideal budget for the model to run. + //! + //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building. + //! + //! \warning The return value may change between TensorRT minor versions. + //! + //! \warning Setting the returned budget with V1 APIs (setWeightStreamingBudget()) will lead to undefined behavior. + //! Please use V2 APIs. + //! + //! \returns The weight streaming budget in bytes. Please set with setWeightStreamingBudgetV2(). + //! + //! \see BuilderFlag::kWEIGHT_STREAMING + //! \see setWeightStreamingBudgetV2() + //! + int64_t getWeightStreamingAutomaticBudget() const noexcept + { + return mImpl->getWeightStreamingAutomaticBudget(); + } + + //! + //! \brief Returns the size of the scratch memory required by the current weight streaming budget. + //! + //! Weight streaming requires small amounts of scratch memory on the GPU to stage CPU weights right before + //! execution. This value is typically much smaller than the total streamable weights size. Each IExecutionContext + //! will then allocate this additional memory or the user can provide the additional memory through + //! getDeviceMemorySizeV2() and IExecutionContext::setDeviceMemoryV2(). + //! + //! The return value of this call depends on + //! 1. setWeightStreamingBudget() + //! 2. setWeightStreamingBudgetV2() + //! + //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building. + //! + //! \returns The weight streaming scratch memory in bytes. Returns 0 if weight streaming is disabled. + //! + //! \see BuilderFlag::kWEIGHT_STREAMING + //! \see setWeightStreamingBudgetV2() + //! \see getStreamableWeightsSize() + //! \see getDeviceMemorySizeV2() + //! \see getDeviceMemorySizeForProfileV2() + //! \see IExecutionContext::setDeviceMemoryV2() + //! + int64_t getWeightStreamingScratchMemorySize() const noexcept + { + return mImpl->getWeightStreamingScratchMemorySize(); + } + //! //! \brief Check if a tensor is marked as a debug tensor. //! @@ -3214,6 +3388,7 @@ class IOutputAllocator : public IVersionedInterface //! //! \return A pointer to memory to use for the output tensor or nullptr. //! + //! //! To preallocate memory and have the engine fail if the preallocation is not big enough, //! use IExecutionContext::setTensorAddress to set a pointer to the preallocated memory, //! and have reallocateOutput return nullptr if that memory is not big enough. @@ -3416,7 +3591,13 @@ class IExecutionContext : public INoCopy //! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the //! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of //! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns. - //! Releasing or otherwise using the memory for other purposes during this time will result in undefined behavior. + //! Releasing or otherwise using the memory for other purposes, including using it in another execution context + //! running in parallel, during this time will result in undefined behavior. + //! + //! \deprecated Deprecated in TensorRT 10.1. Superceded by setDeviceMemoryV2(). + //! + //! \warning Weight streaming related scratch memory will be allocated by TensorRT if the memory is set by this API. + //! Please use setDeviceMemoryV2() instead. //! //! \see ICudaEngine::getDeviceMemorySize() //! \see ICudaEngine::getDeviceMemorySizeForProfile() @@ -3429,6 +3610,28 @@ class IExecutionContext : public INoCopy mImpl->setDeviceMemory(memory); } + //! + //! \brief Set the device memory and its corresponding size for use by this execution context. + //! + //! The memory must be aligned with cuda memory alignment property (using cudaGetDeviceProperties()), and its size + //! must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and + //! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the + //! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of + //! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns. + //! Releasing or otherwise using the memory for other purposes, including using it in another execution context + //! running in parallel, during this time will result in undefined behavior. + //! + //! \see ICudaEngine::getDeviceMemorySizeV2() + //! \see ICudaEngine::getDeviceMemorySizeForProfileV2() + //! \see ExecutionContextAllocationStrategy + //! \see ICudaEngine::createExecutionContext() + //! \see ICudaEngine::createExecutionContextWithoutDeviceMemory() + //! + void setDeviceMemoryV2(void* memory, int64_t size) noexcept + { + return mImpl->setDeviceMemoryV2(memory, size); + } + //! //! \brief Return the strides of the buffer for the given tensor name. //! diff --git a/include/NvInferRuntimeBase.h b/include/NvInferRuntimeBase.h index 3624706c..984ce869 100644 --- a/include/NvInferRuntimeBase.h +++ b/include/NvInferRuntimeBase.h @@ -70,7 +70,7 @@ //! * NvInferConsistency.h (for consistency checker) //! * NvInferPluginUtils.h (for plugin utilities) //! -#if !defined(NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE) && !defined(TRT_VCAST_SAFE) +#if !defined(NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE) static_assert(false, "Do not directly include this file. Include NvInferRuntime.h or NvInferSafeRuntime.h or NvInferConsistency.h or NvInferPluginUtils.h"); #endif @@ -174,6 +174,7 @@ enum class DataType : int32_t //! Signed 4-bit integer type. kINT4 = 9, + }; namespace impl @@ -182,7 +183,7 @@ namespace impl template <> struct EnumMaxImpl { - //! Declaration of kVALUE that represents the maximum number of elements in the DataType enum. +//! Declaration of kVALUE that represents the maximum number of elements in the DataType enum. static constexpr int32_t kVALUE = 10; }; } // namespace impl @@ -898,7 +899,7 @@ class IErrorRecorder : public IVersionedInterface //! If the upper bound of errors that can be stored is exceeded, the upper bound value must //! be returned. //! - //! For example, if the error recorder can store up to 16 error descriptions but recordError() has + //! For example, if the error recorder can store up to 16 error descriptions but reportError() has //! been called 20 times, getNbErrors() must return 16. //! //! \see clear(), hasOverflowed() diff --git a/include/NvInferRuntimePlugin.h b/include/NvInferRuntimePlugin.h index 5f97f4a5..dffdd901 100644 --- a/include/NvInferRuntimePlugin.h +++ b/include/NvInferRuntimePlugin.h @@ -855,6 +855,8 @@ enum class PluginFieldType : int32_t kINT64 = 10, //! FP8 field type. kFP8 = 11, + //! INT4 field type. + kINT4 = 12, }; //! diff --git a/include/NvInferVersion.h b/include/NvInferVersion.h index 13861a12..3a33d493 100644 --- a/include/NvInferVersion.h +++ b/include/NvInferVersion.h @@ -24,9 +24,9 @@ #define NV_INFER_VERSION_H #define NV_TENSORRT_MAJOR 10 //!< TensorRT major version. -#define NV_TENSORRT_MINOR 0 //!< TensorRT minor version. -#define NV_TENSORRT_PATCH 1 //!< TensorRT patch version. -#define NV_TENSORRT_BUILD 6 //!< TensorRT build number. +#define NV_TENSORRT_MINOR 1 //!< TensorRT minor version. +#define NV_TENSORRT_PATCH 0 //!< TensorRT patch version. +#define NV_TENSORRT_BUILD 27 //!< TensorRT build number. #define NV_TENSORRT_LWS_MAJOR 0 //!< TensorRT LWS major version. #define NV_TENSORRT_LWS_MINOR 0 //!< TensorRT LWS minor version. diff --git a/parsers/onnx b/parsers/onnx index eb43908b..96e78110 160000 --- a/parsers/onnx +++ b/parsers/onnx @@ -1 +1 @@ -Subproject commit eb43908b02a296ea0594432f06e9d3fac288d672 +Subproject commit 96e781103cfc4adf4a6bb557e94bac8e693f6f4c diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 2007b7ed..112d45f7 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -24,9 +24,7 @@ set(VFC_SHARED_TARGET ${VFC_TARGET_NAME}) set(TARGET_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PLUGIN_EXPORT_MAP ${TARGET_DIR}/exports.map) -set(PLUGIN_EXPORT_DEF ${TARGET_DIR}/exports.def) set(VFC_PLUGIN_EXPORT_MAP ${TARGET_DIR}/exports-vfc_plugin.map) -set(VFC_PLUGIN_EXPORT_DEF ${TARGET_DIR}/exports-vfc_plugin.def) if(${CMAKE_BUILD_TYPE} MATCHES "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") diff --git a/plugin/README.md b/plugin/README.md index b8d96607..6f619095 100644 --- a/plugin/README.md +++ b/plugin/README.md @@ -43,7 +43,8 @@ | [pyramidROIAlignPlugin](pyramidROIAlignPlugin) | PyramidROIAlign_TRT | 1 | | [regionPlugin](regionPlugin) | Region_TRT | 1 | | [reorgPlugin](reorgPlugin) | Reorg_TRT | 2 | -| [roiAlignPlugin](roiAlignPlugin) | ROIAlign_TRT | 1 | +| [roiAlignPlugin](roiAlignPlugin) [DEPRECATED] | ROIAlign_TRT | 1 | +| [roiAlignPlugin](roiAlignPlugin) | ROIAlign_TRT | 2 | | [resizeNearestPlugin](resizeNearestPlugin) | ResizeNearest_TRT | 1 | | [scatterElementsPlugin](scatterElementsPlugin) | ScatterElements | 1 | | [scatterPlugin](scatterPlugin) | ScatterND | 1 | diff --git a/plugin/api/inferPlugin.cpp b/plugin/api/inferPlugin.cpp index 452f61b6..1aea8cee 100644 --- a/plugin/api/inferPlugin.cpp +++ b/plugin/api/inferPlugin.cpp @@ -49,6 +49,7 @@ #include "reorgPlugin/reorgPlugin.h" #include "resizeNearestPlugin/resizeNearestPlugin.h" #include "roiAlignPlugin/roiAlignPlugin.h" +#include "roiAlignPlugin/roiAlignPluginLegacy.h" #include "scatterElementsPlugin/scatterElementsPlugin.h" #include "scatterPlugin/scatterPlugin.h" #include "specialSlicePlugin/specialSlicePlugin.h" @@ -157,7 +158,7 @@ class PluginCreatorRegistry PluginCreatorRegistry() {} std::mutex mRegistryLock; - std::stack> mRegistry; + std::stack> mRegistry; std::unordered_set mRegistryList; public: @@ -216,6 +217,7 @@ extern "C" initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); + initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); diff --git a/plugin/clipPlugin/clip.cu b/plugin/clipPlugin/clip.cu index 44bc1f73..39ccb7c7 100644 --- a/plugin/clipPlugin/clip.cu +++ b/plugin/clipPlugin/clip.cu @@ -99,10 +99,7 @@ int clipInference( case nvinfer1::DataType::kBF16: case nvinfer1::DataType::kINT64: case nvinfer1::DataType::kINT4: - { - PLUGIN_FAIL("Unsupported datatype"); - break; - } + PLUGIN_FAIL("Unsupported data type"); } return 0; diff --git a/plugin/common/bertCommon.h b/plugin/common/bertCommon.h index 4cb33551..16c3e54a 100644 --- a/plugin/common/bertCommon.h +++ b/plugin/common/bertCommon.h @@ -153,7 +153,8 @@ inline uint32_t getElementSize(nvinfer1::DataType t) noexcept case nvinfer1::DataType::kUINT8: case nvinfer1::DataType::kINT8: case nvinfer1::DataType::kFP8: return 1; - case nvinfer1::DataType::kINT4: PLUGIN_FAIL("Element size is not implemented for sub-byte data-types (INT4)"); + case nvinfer1::DataType::kINT4: + PLUGIN_FAIL("Element size is not implemented for sub-byte data-types"); } return 0; } diff --git a/plugin/common/common.cuh b/plugin/common/common.cuh index 5a3819c2..58d6e31c 100644 --- a/plugin/common/common.cuh +++ b/plugin/common/common.cuh @@ -20,6 +20,7 @@ #include "common/cublasWrapper.h" #include +#include #define HDI inline __host__ __device__ diff --git a/plugin/common/kernels/maskRCNNKernels.h b/plugin/common/kernels/maskRCNNKernels.h index 433d7ca2..19dce37d 100644 --- a/plugin/common/kernels/maskRCNNKernels.h +++ b/plugin/common/kernels/maskRCNNKernels.h @@ -56,8 +56,9 @@ inline size_t typeSize(const nvinfer1::DataType type) case nvinfer1::DataType::kINT64: return sizeof(int64_t); case nvinfer1::DataType::kBOOL: return sizeof(bool); case nvinfer1::DataType::kUINT8: return sizeof(uint8_t); - case nvinfer1::DataType::kFP8: PLUGIN_FAIL("FP8 is not supported"); break; - case nvinfer1::DataType::kINT4: PLUGIN_FAIL("INT4 is not supported"); break; + case nvinfer1::DataType::kFP8: + case nvinfer1::DataType::kINT4: + PLUGIN_FAIL("Unsupported data type"); } return 0; } diff --git a/plugin/common/vfcCommon.cpp b/plugin/common/vfcCommon.cpp index 8664ab56..b1610ef8 100644 --- a/plugin/common/vfcCommon.cpp +++ b/plugin/common/vfcCommon.cpp @@ -22,7 +22,7 @@ #include using namespace nvinfer1; -using nvinfer1::plugin::ROIAlignPluginCreator; +using nvinfer1::plugin::ROIAlignV3PluginCreator; namespace nvinfer1 { @@ -70,29 +70,14 @@ ILogger* getPluginLogger() } // namespace plugin } // namespace nvinfer1 -IPluginCreatorInterface* const* getCreatorsHelper(int32_t& nbAllCreators, int32_t& nbIPluginCreators) +extern "C" TENSORRTAPI IPluginCreatorInterface* const* getCreators(int32_t& nbCreators) { - nbAllCreators = 1; - nbIPluginCreators = 1; - static ROIAlignPluginCreator sRoiAlignCreator; + nbCreators = 1; + static ROIAlignV3PluginCreator sRoiAlignCreator; static IPluginCreatorInterface* const kPLUGIN_CREATOR_LIST[] = {&sRoiAlignCreator}; return kPLUGIN_CREATOR_LIST; } -extern "C" TENSORRTAPI IPluginCreator* const* getPluginCreators(int32_t& nbCreators) -{ - int32_t nbAllCreators; - auto creators = getCreatorsHelper(nbAllCreators, nbCreators); - - return reinterpret_cast(creators + (nbAllCreators - nbCreators)); -} - -extern "C" TENSORRTAPI IPluginCreatorInterface* const* getCreators(int32_t& nbCreators) -{ - int32_t nbIPluginCreators; - return getCreatorsHelper(nbCreators, nbIPluginCreators); -} - extern "C" TENSORRTAPI void setLoggerFinder(nvinfer1::ILoggerFinder* finder) { nvinfer1::plugin::gLoggerFinder.setLoggerFinder(finder); diff --git a/plugin/common/vfcCommon.h b/plugin/common/vfcCommon.h index 7b7db007..0f23c43f 100644 --- a/plugin/common/vfcCommon.h +++ b/plugin/common/vfcCommon.h @@ -35,7 +35,5 @@ ILogger* getPluginLogger(); extern "C" TENSORRTAPI void setLoggerFinder(nvinfer1::ILoggerFinder* finder); -extern "C" TENSORRTAPI IPluginCreator* const* getPluginCreators(int32_t& nbCreators); - extern "C" TENSORRTAPI IPluginCreatorInterface* const* getCreators(int32_t& nbCreators); #endif // TRT_PLUGIN_VFC_COMMON_H diff --git a/plugin/coordConvACPlugin/coordConvACPluginKernels.cu b/plugin/coordConvACPlugin/coordConvACPluginKernels.cu index 8f32aa87..5c3667dd 100644 --- a/plugin/coordConvACPlugin/coordConvACPluginKernels.cu +++ b/plugin/coordConvACPlugin/coordConvACPluginKernels.cu @@ -99,7 +99,8 @@ int CoordConvACPlugin::enqueue( case DataType::kFP8: case DataType::kBF16: case DataType::kINT64: - case DataType::kINT4: PLUGIN_FAIL("Unsupported data type"); + case DataType::kINT4: + PLUGIN_FAIL("Unsupported data type"); } return 1; } diff --git a/plugin/exports-vfc_plugin.def b/plugin/exports-vfc_plugin.def deleted file mode 100644 index 28a79242..00000000 --- a/plugin/exports-vfc_plugin.def +++ /dev/null @@ -1,20 +0,0 @@ -; SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -; SPDX-License-Identifier: Apache-2.0 -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -LIBRARY nvinfer_vc_plugin_10 -EXPORTS -setLoggerFinder -getPluginCreators -getCreators diff --git a/plugin/exports-vfc_plugin.map b/plugin/exports-vfc_plugin.map index 7171544b..6e27d9ee 100644 --- a/plugin/exports-vfc_plugin.map +++ b/plugin/exports-vfc_plugin.map @@ -18,7 +18,6 @@ /* Hides all symbols except those specified in the global section */ { global: - getPluginCreators; getCreators; setLoggerFinder; local: *; diff --git a/plugin/exports.def b/plugin/exports.def deleted file mode 100644 index 20503473..00000000 --- a/plugin/exports.def +++ /dev/null @@ -1,20 +0,0 @@ -; SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -; SPDX-License-Identifier: Apache-2.0 -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -LIBRARY nvinfer_plugin_10 -EXPORTS -getInferLibVersion -getPluginRegistry -initLibNvInferPlugins diff --git a/plugin/exports.map b/plugin/exports.map index b68b1d16..545daee5 100644 --- a/plugin/exports.map +++ b/plugin/exports.map @@ -18,15 +18,6 @@ /* Hides all symbols except those specified in the global section */ { global: - getInferLibVersion; - getPluginRegistry; initLibNvInferPlugins; - extern "C++" { - nvinfer1::IPluginCreator::*; - nvinfer1::IPluginV2Ext::*; - nvinfer1::IPluginV2IOExt::*; - nvinfer1::PluginRegistrar*; - nvinfer1::plugin::*; - }; local: *; }; diff --git a/plugin/roiAlignPlugin/README.md b/plugin/roiAlignPlugin/README.md index f4f2f82e..3f0cf532 100644 --- a/plugin/roiAlignPlugin/README.md +++ b/plugin/roiAlignPlugin/README.md @@ -54,6 +54,8 @@ documentation. ## Changelog +April 2024: Implements `ROIAlignV3` which uses the `IPluginV3` interface. + June 2022: This is the first release of this `README.md` file. ## Known issues diff --git a/plugin/roiAlignPlugin/ROIAlign_PluginConfig.yaml b/plugin/roiAlignPlugin/ROIAlign_PluginConfig.yaml index edafe3af..a39daae0 100644 --- a/plugin/roiAlignPlugin/ROIAlign_PluginConfig.yaml +++ b/plugin/roiAlignPlugin/ROIAlign_PluginConfig.yaml @@ -1,8 +1,8 @@ --- name: ROIAlign_TRT -interface: "IPluginV2DynamicExt" +interface: "IPluginV3" versions: - "1": + "2": inputs: - X - rois diff --git a/plugin/roiAlignPlugin/roiAlignPlugin.cpp b/plugin/roiAlignPlugin/roiAlignPlugin.cpp index 5681eff5..1dd4951f 100644 --- a/plugin/roiAlignPlugin/roiAlignPlugin.cpp +++ b/plugin/roiAlignPlugin/roiAlignPlugin.cpp @@ -21,23 +21,22 @@ using namespace nvinfer1; using namespace plugin; -using nvinfer1::plugin::ROIAlign; -using nvinfer1::plugin::ROIAlignPluginCreator; +using nvinfer1::plugin::ROIAlignV3; +using nvinfer1::plugin::ROIAlignV3PluginCreator; namespace { -char const* kROIALIGN_PLUGIN_VERSION{"1"}; -char const* kROIALIGN_PLUGIN_NAME{"ROIAlign_TRT"}; -size_t constexpr kSERIALIZATION_SIZE{sizeof(int32_t) * 5 + sizeof(float) + sizeof(int32_t) * 4}; +char const* gRoialignPluginVersion{"2"}; +char const* gRoialignPluginName{"ROIAlign_TRT"}; } // namespace -PluginFieldCollection ROIAlignPluginCreator::mFC{}; -std::vector ROIAlignPluginCreator::mPluginAttributes; +PluginFieldCollection ROIAlignV3PluginCreator::mFC{}; +std::vector ROIAlignV3PluginCreator::mPluginAttributes; -ROIAlignPluginCreator::ROIAlignPluginCreator() +ROIAlignV3PluginCreator::ROIAlignV3PluginCreator() { - static std::mutex mutex; - std::lock_guard guard(mutex); + static std::mutex sMutex; + std::lock_guard guard(sMutex); mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("coordinate_transformation_mode", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("mode", nullptr, PluginFieldType::kINT32, 1)); @@ -50,22 +49,23 @@ ROIAlignPluginCreator::ROIAlignPluginCreator() mFC.fields = mPluginAttributes.data(); } -char const* ROIAlignPluginCreator::getPluginName() const noexcept +char const* ROIAlignV3PluginCreator::getPluginName() const noexcept { - return kROIALIGN_PLUGIN_NAME; + return gRoialignPluginName; } -char const* ROIAlignPluginCreator::getPluginVersion() const noexcept +char const* ROIAlignV3PluginCreator::getPluginVersion() const noexcept { - return kROIALIGN_PLUGIN_VERSION; + return gRoialignPluginVersion; } -PluginFieldCollection const* ROIAlignPluginCreator::getFieldNames() noexcept +PluginFieldCollection const* ROIAlignV3PluginCreator::getFieldNames() noexcept { return &mFC; } -IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV3* ROIAlignV3PluginCreator::createPlugin( + char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept { try { @@ -114,7 +114,7 @@ IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, Plugi aligned = static_cast(*(static_cast(fields[i].data))); } } - return new ROIAlign(outputHeight, outputWidth, samplingRatio, mode, spatialScale, aligned); + return new ROIAlignV3(outputHeight, outputWidth, samplingRatio, mode, spatialScale, aligned); } catch (std::exception const& e) { @@ -123,13 +123,54 @@ IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, Plugi return nullptr; } -IPluginV2DynamicExt* ROIAlignPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept +void ROIAlignV3PluginCreator::setPluginNamespace(char const* libNamespace) noexcept +{ + mNamespace = libNamespace; +} + +char const* ROIAlignV3PluginCreator::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} + +ROIAlignV3::ROIAlignV3( + int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned) + : mOutputHeight(outputHeight) + , mOutputWidth(outputWidth) + , mSamplingRatio(samplingRatio) + , mSpatialScale(spatialScale) + , mMode(mode) + , mAligned(aligned) +{ + PLUGIN_VALIDATE(outputHeight > 0); + PLUGIN_VALIDATE(outputWidth > 0); + PLUGIN_VALIDATE(samplingRatio >= 0); + PLUGIN_VALIDATE(mode == 0 || mode == 1); + PLUGIN_VALIDATE(spatialScale > 0.0F); + PLUGIN_VALIDATE(aligned == 0 || aligned == 1); + + int32_t device; + PLUGIN_CUASSERT(cudaGetDevice(&device)); + cudaDeviceProp props; + PLUGIN_CUASSERT(cudaGetDeviceProperties(&props, device)); + + mMaxThreadsPerBlock = props.maxThreadsPerBlock; +} + +IPluginCapability* ROIAlignV3::getCapabilityInterface(PluginCapabilityType type) noexcept { try { - PLUGIN_VALIDATE(data != nullptr); - return new ROIAlign(data, length); + if (type == PluginCapabilityType::kBUILD) + { + return static_cast(this); + } + if (type == PluginCapabilityType::kRUNTIME) + { + return static_cast(this); + } + PLUGIN_ASSERT(type == PluginCapabilityType::kCORE); + return static_cast(this); } catch (std::exception const& e) { @@ -138,45 +179,55 @@ IPluginV2DynamicExt* ROIAlignPluginCreator::deserializePlugin( return nullptr; } -int32_t ROIAlign::getNbOutputs() const noexcept +IPluginV3* ROIAlignV3::clone() noexcept { - return 1; + try + { + auto plugin = std::make_unique(*this); + return plugin.release(); + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; } -int32_t ROIAlign::initialize() noexcept +char const* ROIAlignV3::getPluginName() const noexcept { - int32_t device; - PLUGIN_CHECK_CUDA(cudaGetDevice(&device)); - cudaDeviceProp props; - PLUGIN_CHECK_CUDA(cudaGetDeviceProperties(&props, device)); - - mMaxThreadsPerBlock = props.maxThreadsPerBlock; + return gRoialignPluginName; +} - return 0; +char const* ROIAlignV3::getPluginVersion() const noexcept +{ + return gRoialignPluginVersion; } -void ROIAlign::terminate() noexcept {} +char const* ROIAlignV3::getPluginNamespace() const noexcept +{ + return mNameSpace.c_str(); +} -void ROIAlign::destroy() noexcept +int32_t ROIAlignV3::getNbOutputs() const noexcept { - delete this; + return 1; } -size_t ROIAlign::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +int32_t ROIAlignV3::configurePlugin( + DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept { return 0; } -bool ROIAlign::supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +bool ROIAlignV3::supportsFormatCombination( + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept { PLUGIN_ASSERT(inOut != nullptr); PLUGIN_ASSERT(pos >= 0 && pos <= 3); PLUGIN_ASSERT(nbInputs == 3); PLUGIN_ASSERT(nbOutputs == 1); - PluginTensorDesc const& desc = inOut[pos]; + PluginTensorDesc const& desc = inOut[pos].desc; if (desc.format != TensorFormat::kLINEAR) { return false; @@ -185,117 +236,56 @@ bool ROIAlign::supportsFormatCombination( // first input should be float16 or float32 if (pos == 0) { - return (inOut[pos].type == nvinfer1::DataType::kFLOAT || inOut[pos].type == nvinfer1::DataType::kHALF); + return (desc.type == nvinfer1::DataType::kFLOAT || desc.type == nvinfer1::DataType::kHALF); } // batch_indices always has to be int32 if (pos == 2) { - return (inOut[pos].type == nvinfer1::DataType::kINT32); + return (desc.type == nvinfer1::DataType::kINT32); } // rois and the output should have the same type as the first input - return (inOut[pos].type == inOut[0].type); -} - -char const* ROIAlign::getPluginType() const noexcept -{ - return kROIALIGN_PLUGIN_NAME; -} - -char const* ROIAlign::getPluginVersion() const noexcept -{ - return kROIALIGN_PLUGIN_VERSION; -} - -IPluginV2DynamicExt* ROIAlign::clone() const noexcept -{ - try - { - auto plugin = new ROIAlign(*this); - plugin->setPluginNamespace(mNameSpace.c_str()); - return plugin; - } - catch (std::exception const& e) - { - caughtError(e); - } - return nullptr; -} - -void ROIAlign::setPluginNamespace(char const* libNamespace) noexcept -{ - try - { - PLUGIN_ASSERT(libNamespace != nullptr); - mNameSpace = libNamespace; - } - catch (std::exception const& e) - { - gLogError << e.what() << std::endl; - } -} - -char const* ROIAlign::getPluginNamespace() const noexcept -{ - return mNameSpace.c_str(); -} - -void ROIAlign::checkValidInputs(nvinfer1::DynamicPluginTensorDesc const* inputs, int32_t nbInputDims) -{ - PLUGIN_ASSERT(inputs != nullptr); - PLUGIN_ASSERT(nbInputDims == 3); - - nvinfer1::Dims rois = inputs[1].desc.dims; - nvinfer1::Dims batchIndices = inputs[2].desc.dims; - - PLUGIN_ASSERT(rois.nbDims == 2); - PLUGIN_ASSERT(rois.d[1] == 4); - - PLUGIN_ASSERT(batchIndices.nbDims == 1); - // Check batch_indices matches rois in length - PLUGIN_ASSERT(rois.d[0] == batchIndices.d[0]); + return (desc.type == inOut[0].desc.type); } -void ROIAlign::validateAttributes( - int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned) +int32_t ROIAlignV3::getOutputDataTypes( + DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept { - PLUGIN_VALIDATE(outputHeight > 0); - PLUGIN_VALIDATE(outputWidth > 0); - PLUGIN_VALIDATE(samplingRatio >= 0); - PLUGIN_VALIDATE(mode == 0 || mode == 1); - PLUGIN_VALIDATE(spatialScale > 0.0F); - PLUGIN_VALIDATE(aligned == 0 || aligned == 1); + PLUGIN_ASSERT(inputTypes != nullptr); + PLUGIN_ASSERT(nbInputs == 3); + PLUGIN_ASSERT(nbOutputs == 1); + outputTypes[0] = inputTypes[0]; + return 0; } -DimsExprs ROIAlign::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept +int32_t ROIAlignV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, + int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { PLUGIN_ASSERT(inputs != nullptr); PLUGIN_ASSERT(nbInputs == 3); - PLUGIN_ASSERT(outputIndex == 0); // there is only one output + PLUGIN_ASSERT(nbOutputs == 1); - nvinfer1::DimsExprs result; - result.nbDims = 4; + outputs[0].nbDims = 4; // mROICount - result.d[0] = inputs[1].d[0]; + outputs[0].d[0] = inputs[1].d[0]; // mFeatureLength - result.d[1] = inputs[0].d[1]; + outputs[0].d[1] = inputs[0].d[1]; // height auto const* height = exprBuilder.constant(mOutputHeight); PLUGIN_ASSERT(height != nullptr); - result.d[2] = height; + outputs[0].d[2] = height; // width auto const* width = exprBuilder.constant(mOutputWidth); PLUGIN_ASSERT(width != nullptr); - result.d[3] = width; + outputs[0].d[3] = width; - return result; + return 0; } -int32_t ROIAlign::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /* outputDesc */, - void const* const* inputs, void* const* outputs, void* /* workspace */, cudaStream_t stream) noexcept +int32_t ROIAlignV3::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); @@ -341,88 +331,66 @@ int32_t ROIAlign::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc co return 0; } -size_t ROIAlign::getSerializationSize() const noexcept +int32_t ROIAlignV3::onShapeChange( + PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept { - return kSERIALIZATION_SIZE; -} + PLUGIN_ASSERT(in != nullptr); + PLUGIN_ASSERT(out != nullptr); + PLUGIN_ASSERT(nbOutputs == 1); + PLUGIN_ASSERT(nbInputs == 3); -void ROIAlign::serialize(void* buffer) const noexcept -{ - PLUGIN_VALIDATE(buffer != nullptr); - char* d = static_cast(buffer); - char* a = d; - write(d, mAligned); // int32_t - write(d, mMode); // int32_t - write(d, mOutputHeight); // int32_t - write(d, mOutputWidth); // int32_t - write(d, mSamplingRatio); // int32_t - write(d, mSpatialScale); // float - - write(d, mROICount); // int32_t - write(d, mFeatureLength); // int32_t - write(d, mHeight); // int32_t - write(d, mWidth); // int32_t - PLUGIN_ASSERT(d == a + getSerializationSize()); + nvinfer1::Dims rois = in[1].dims; + nvinfer1::Dims batchIndices = in[2].dims; + + PLUGIN_ASSERT(rois.nbDims == 2); + PLUGIN_ASSERT(rois.d[1] == 4); + + PLUGIN_ASSERT(batchIndices.nbDims == 1); + // Check batch_indices matches rois in length + PLUGIN_ASSERT(rois.d[0] == batchIndices.d[0]); + + mFeatureLength = in[0].dims.d[1]; + mHeight = in[0].dims.d[2]; + mWidth = in[0].dims.d[3]; + + mROICount = in[1].dims.d[0]; + return 0; } -ROIAlign::ROIAlign( - int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned) - : mOutputHeight(outputHeight) - , mOutputWidth(outputWidth) - , mSamplingRatio(samplingRatio) - , mSpatialScale(spatialScale) - , mMode(mode) - , mAligned(aligned) +IPluginV3* ROIAlignV3::attachToContext(IPluginResourceContext* context) noexcept { - validateAttributes(mOutputHeight, mOutputWidth, mSamplingRatio, mMode, mSpatialScale, mAligned); + return clone(); } -ROIAlign::ROIAlign(void const* data, size_t length) +PluginFieldCollection const* ROIAlignV3::getFieldsToSerialize() noexcept { - PLUGIN_VALIDATE(data != nullptr); - PLUGIN_VALIDATE(length == kSERIALIZATION_SIZE); - - char const* d = static_cast(data); - char const* a = d; - - mAligned = read(d); - mMode = read(d); - mOutputHeight = read(d); - mOutputWidth = read(d); - mSamplingRatio = read(d); - mSpatialScale = read(d); - - mROICount = read(d); - mFeatureLength = read(d); - mHeight = read(d); - mWidth = read(d); - - PLUGIN_VALIDATE(d == a + length); - validateAttributes(mOutputHeight, mOutputWidth, mSamplingRatio, mMode, mSpatialScale, mAligned); + mDataToSerialize.clear(); + mDataToSerialize.emplace_back("coordinate_transformation_mode", &mAligned, PluginFieldType::kINT32, 1); + mDataToSerialize.emplace_back("mode", &mMode, PluginFieldType::kINT32, 1); + mDataToSerialize.emplace_back("output_height", &mOutputHeight, PluginFieldType::kINT32, 1); + mDataToSerialize.emplace_back("output_width", &mOutputWidth, PluginFieldType::kINT32, 1); + mDataToSerialize.emplace_back("sampling_ratio", &mSamplingRatio, PluginFieldType::kINT32, 1); + mDataToSerialize.emplace_back("spatial_scale", &mSpatialScale, PluginFieldType::kFLOAT32, 1); + mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.fields = mDataToSerialize.data(); + return &mFCToSerialize; } -DataType ROIAlign::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +size_t ROIAlignV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, + DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept { - PLUGIN_ASSERT(inputTypes != nullptr); - PLUGIN_ASSERT(nbInputs == 3); - PLUGIN_ASSERT(index == 0); - return inputTypes[0]; + return 0; } -void ROIAlign::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void ROIAlignV3::setPluginNamespace(char const* libNamespace) noexcept { - PLUGIN_ASSERT(in != nullptr); - PLUGIN_ASSERT(out != nullptr); - PLUGIN_ASSERT(nbOutputs == 1); - PLUGIN_ASSERT(nbInputs == 3); - - checkValidInputs(in, nbInputs); - - mFeatureLength = in[0].desc.dims.d[1]; - mHeight = in[0].desc.dims.d[2]; - mWidth = in[0].desc.dims.d[3]; - - mROICount = in[1].desc.dims.d[0]; + try + { + PLUGIN_ASSERT(libNamespace != nullptr); + mNameSpace = libNamespace; + } + catch (std::exception const& e) + { + caughtError(e); + } } diff --git a/plugin/roiAlignPlugin/roiAlignPlugin.h b/plugin/roiAlignPlugin/roiAlignPlugin.h index e22d2571..ce4ab924 100644 --- a/plugin/roiAlignPlugin/roiAlignPlugin.h +++ b/plugin/roiAlignPlugin/roiAlignPlugin.h @@ -30,52 +30,79 @@ namespace nvinfer1 namespace plugin { -class ROIAlign : public IPluginV2DynamicExt +class ROIAlignV3PluginCreator : public nvinfer1::IPluginCreatorV3One { public: - ROIAlign(int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, + ROIAlignV3PluginCreator(); + + ~ROIAlignV3PluginCreator() override = default; + + char const* getPluginName() const noexcept override; + + char const* getPluginVersion() const noexcept override; + + PluginFieldCollection const* getFieldNames() noexcept override; + + IPluginV3* createPlugin(char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept override; + + void setPluginNamespace(char const* libNamespace) noexcept; + + char const* getPluginNamespace() const noexcept override; + +private: + static PluginFieldCollection mFC; + static std::vector mPluginAttributes; + std::string mNamespace; +}; + +class ROIAlignV3 : public IPluginV3, public IPluginV3OneCore, public IPluginV3OneBuild, public IPluginV3OneRuntime +{ +public: + ROIAlignV3(int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned); - ROIAlign(void const* data, size_t length); - ROIAlign() = default; - ~ROIAlign() override = default; + ROIAlignV3(ROIAlignV3 const&) = default; + ~ROIAlignV3() override = default; + + IPluginCapability* getCapabilityInterface(PluginCapabilityType type) noexcept override; + + IPluginV3* clone() noexcept override; + + char const* getPluginName() const noexcept override; - // IPluginV2 methods - char const* getPluginType() const noexcept override; char const* getPluginVersion() const noexcept override; - int32_t getNbOutputs() const noexcept override; - int32_t initialize() noexcept override; - void terminate() noexcept override; - size_t getSerializationSize() const noexcept override; - void serialize(void* buffer) const noexcept override; - void destroy() noexcept override; - void setPluginNamespace(char const* libNamespace) noexcept override; + char const* getPluginNamespace() const noexcept override; - void setClipParam(bool clip) noexcept; - void setScoreBits(int32_t scoreBits) noexcept; - void setCaffeSemantics(bool caffeSemantics) noexcept; - - // IPluginV2Ext methods - nvinfer1::DataType getOutputDataType( - int32_t index, nvinfer1::DataType const* inputType, int32_t nbInputs) const noexcept override; - - // IPluginV2DynamicExt methods - IPluginV2DynamicExt* clone() const noexcept override; - DimsExprs getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept override; - bool supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; - void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, + + int32_t getNbOutputs() const noexcept override; + + int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; - size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, - int32_t nbOutputs) const noexcept override; + + bool supportsFormatCombination( + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; + + int32_t getOutputDataTypes( + DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept override; + + int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, + int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept override; + int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; -private: - void checkValidInputs(nvinfer1::DynamicPluginTensorDesc const* inputs, int32_t nbInputDims); - void validateAttributes(int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, - float spatialScale, int32_t aligned); + int32_t onShapeChange( + PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept override; + IPluginV3* attachToContext(IPluginResourceContext* context) noexcept override; + + PluginFieldCollection const* getFieldsToSerialize() noexcept override; + + size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, + DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + + void setPluginNamespace(char const* libNamespace) noexcept; + +private: int32_t mOutputHeight{}; int32_t mOutputWidth{}; int32_t mSamplingRatio{}; @@ -91,29 +118,9 @@ class ROIAlign : public IPluginV2DynamicExt int32_t mMaxThreadsPerBlock{}; std::string mNameSpace{}; -}; - -class ROIAlignPluginCreator : public nvinfer1::pluginInternal::BaseCreator -{ -public: - ROIAlignPluginCreator(); - - ~ROIAlignPluginCreator() override = default; - - char const* getPluginName() const noexcept override; - - char const* getPluginVersion() const noexcept override; - PluginFieldCollection const* getFieldNames() noexcept override; - - IPluginV2DynamicExt* createPlugin(char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override; - - IPluginV2DynamicExt* deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept override; - -private: - static PluginFieldCollection mFC; - static std::vector mPluginAttributes; + std::vector mDataToSerialize; + nvinfer1::PluginFieldCollection mFCToSerialize; }; } // namespace plugin diff --git a/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp b/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp new file mode 100644 index 00000000..7ba8f7d1 --- /dev/null +++ b/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp @@ -0,0 +1,428 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "roiAlignPluginLegacy.h" +#include "roiAlignKernel.h" +#include +#include + +using namespace nvinfer1; +using namespace plugin; +using nvinfer1::plugin::ROIAlign; +using nvinfer1::plugin::ROIAlignPluginCreator; + +namespace +{ +char const* gRoialignPluginVersion{"1"}; +char const* gRoialignPluginName{"ROIAlign_TRT"}; +size_t constexpr kSERIALIZATION_SIZE{sizeof(int32_t) * 5 + sizeof(float) + sizeof(int32_t) * 4}; +} // namespace + +PluginFieldCollection ROIAlignPluginCreator::mFC{}; +std::vector ROIAlignPluginCreator::mPluginAttributes; + +ROIAlignPluginCreator::ROIAlignPluginCreator() +{ + static std::mutex sMutex; + std::lock_guard guard(sMutex); + mPluginAttributes.clear(); + mPluginAttributes.emplace_back(PluginField("coordinate_transformation_mode", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("mode", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("output_height", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("output_width", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("sampling_ratio", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("spatial_scale", nullptr, PluginFieldType::kFLOAT32, 1)); + + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +char const* ROIAlignPluginCreator::getPluginName() const noexcept +{ + return gRoialignPluginName; +} + +char const* ROIAlignPluginCreator::getPluginVersion() const noexcept +{ + return gRoialignPluginVersion; +} + +PluginFieldCollection const* ROIAlignPluginCreator::getFieldNames() noexcept +{ + return &mFC; +} + +IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +{ + try + { + PLUGIN_VALIDATE(fc != nullptr); + PluginField const* fields = fc->fields; + + // default values + int32_t outputHeight = 1; + int32_t outputWidth = 1; + int32_t samplingRatio = 0; + int32_t mode = 1; + int32_t aligned = 1; + float spatialScale = 1.0F; + + for (int32_t i = 0; i < fc->nbFields; ++i) + { + char const* attrName = fields[i].name; + if (!strcmp(attrName, "output_height")) + { + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); + outputHeight = static_cast(*(static_cast(fields[i].data))); + } + else if (!strcmp(attrName, "output_width")) + { + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); + outputWidth = static_cast(*(static_cast(fields[i].data))); + } + else if (!strcmp(attrName, "sampling_ratio")) + { + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); + samplingRatio = static_cast(*(static_cast(fields[i].data))); + } + else if (!strcmp(attrName, "mode")) + { + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); + mode = static_cast(*(static_cast(fields[i].data))); + } + else if (!strcmp(attrName, "spatial_scale")) + { + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kFLOAT32); + spatialScale = static_cast(*(static_cast(fields[i].data))); + } + else if (!strcmp(attrName, "coordinate_transformation_mode")) + { + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); + aligned = static_cast(*(static_cast(fields[i].data))); + } + } + return new ROIAlign(outputHeight, outputWidth, samplingRatio, mode, spatialScale, aligned); + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +IPluginV2DynamicExt* ROIAlignPluginCreator::deserializePlugin( + char const* name, void const* data, size_t length) noexcept +{ + try + { + PLUGIN_VALIDATE(data != nullptr); + return new ROIAlign(data, length); + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +int32_t ROIAlign::getNbOutputs() const noexcept +{ + return 1; +} + +int32_t ROIAlign::initialize() noexcept +{ + int32_t device; + PLUGIN_CHECK_CUDA(cudaGetDevice(&device)); + cudaDeviceProp props; + PLUGIN_CHECK_CUDA(cudaGetDeviceProperties(&props, device)); + + mMaxThreadsPerBlock = props.maxThreadsPerBlock; + + return 0; +} + +void ROIAlign::terminate() noexcept {} + +void ROIAlign::destroy() noexcept +{ + delete this; +} + +size_t ROIAlign::getWorkspaceSize( + PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +{ + return 0; +} + +bool ROIAlign::supportsFormatCombination( + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +{ + PLUGIN_ASSERT(inOut != nullptr); + PLUGIN_ASSERT(pos >= 0 && pos <= 3); + PLUGIN_ASSERT(nbInputs == 3); + PLUGIN_ASSERT(nbOutputs == 1); + + PluginTensorDesc const& desc = inOut[pos]; + if (desc.format != TensorFormat::kLINEAR) + { + return false; + } + + // first input should be float16 or float32 + if (pos == 0) + { + return (inOut[pos].type == nvinfer1::DataType::kFLOAT || inOut[pos].type == nvinfer1::DataType::kHALF); + } + + // batch_indices always has to be int32 + if (pos == 2) + { + return (inOut[pos].type == nvinfer1::DataType::kINT32); + } + + // rois and the output should have the same type as the first input + return (inOut[pos].type == inOut[0].type); +} + +char const* ROIAlign::getPluginType() const noexcept +{ + return gRoialignPluginName; +} + +char const* ROIAlign::getPluginVersion() const noexcept +{ + return gRoialignPluginVersion; +} + +IPluginV2DynamicExt* ROIAlign::clone() const noexcept +{ + try + { + auto plugin = new ROIAlign(*this); + plugin->setPluginNamespace(mNameSpace.c_str()); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +void ROIAlign::setPluginNamespace(char const* libNamespace) noexcept +{ + try + { + PLUGIN_ASSERT(libNamespace != nullptr); + mNameSpace = libNamespace; + } + catch (std::exception const& e) + { + gLogError << e.what() << std::endl; + } +} + +char const* ROIAlign::getPluginNamespace() const noexcept +{ + return mNameSpace.c_str(); +} + +void ROIAlign::checkValidInputs(nvinfer1::DynamicPluginTensorDesc const* inputs, int32_t nbInputDims) +{ + PLUGIN_ASSERT(inputs != nullptr); + PLUGIN_ASSERT(nbInputDims == 3); + + nvinfer1::Dims rois = inputs[1].desc.dims; + nvinfer1::Dims batchIndices = inputs[2].desc.dims; + + PLUGIN_ASSERT(rois.nbDims == 2); + PLUGIN_ASSERT(rois.d[1] == 4); + + PLUGIN_ASSERT(batchIndices.nbDims == 1); + // Check batch_indices matches rois in length + PLUGIN_ASSERT(rois.d[0] == batchIndices.d[0]); +} + +void ROIAlign::validateAttributes( + int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned) +{ + PLUGIN_VALIDATE(outputHeight > 0); + PLUGIN_VALIDATE(outputWidth > 0); + PLUGIN_VALIDATE(samplingRatio >= 0); + PLUGIN_VALIDATE(mode == 0 || mode == 1); + PLUGIN_VALIDATE(spatialScale > 0.0F); + PLUGIN_VALIDATE(aligned == 0 || aligned == 1); +} + +DimsExprs ROIAlign::getOutputDimensions( + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept +{ + PLUGIN_ASSERT(inputs != nullptr); + PLUGIN_ASSERT(nbInputs == 3); + PLUGIN_ASSERT(outputIndex == 0); // there is only one output + + nvinfer1::DimsExprs result; + result.nbDims = 4; + + // mROICount + result.d[0] = inputs[1].d[0]; + // mFeatureLength + result.d[1] = inputs[0].d[1]; + // height + auto const* height = exprBuilder.constant(mOutputHeight); + PLUGIN_ASSERT(height != nullptr); + result.d[2] = height; + // width + auto const* width = exprBuilder.constant(mOutputWidth); + PLUGIN_ASSERT(width != nullptr); + result.d[3] = width; + + return result; +} + +int32_t ROIAlign::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /* outputDesc */, + void const* const* inputs, void* const* outputs, void* /* workspace */, cudaStream_t stream) noexcept +{ + PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); + + // No-op pass-through for empty ROIs + if (mROICount == 0) + { + return 0; + } + + auto type = inputDesc[0].type; + + PLUGIN_ASSERT(type == nvinfer1::DataType::kHALF || type == nvinfer1::DataType::kFLOAT); + + switch (type) + { + case nvinfer1::DataType::kFLOAT: + { + auto bottomData = static_cast(inputs[0]); + auto bottomRois = static_cast(inputs[1]); + auto batchIndicesPtr = static_cast(inputs[2]); + auto topData = static_cast(outputs[0]); + + return RoiAlignImpl(stream, mMaxThreadsPerBlock, bottomData, mSpatialScale, mROICount, mFeatureLength, + mHeight, mWidth, mOutputHeight, mOutputWidth, mSamplingRatio, bottomRois, topData, mMode, batchIndicesPtr, + mAligned); + } + break; + case nvinfer1::DataType::kHALF: + { + auto bottomData = static_cast<__half const*>(inputs[0]); + auto bottomRois = static_cast<__half const*>(inputs[1]); + auto batchIndicesPtr = static_cast(inputs[2]); + auto topData = static_cast<__half*>(outputs[0]); + + return RoiAlignImpl<__half>(stream, mMaxThreadsPerBlock, bottomData, mSpatialScale, mROICount, mFeatureLength, + mHeight, mWidth, mOutputHeight, mOutputWidth, mSamplingRatio, bottomRois, topData, mMode, batchIndicesPtr, + mAligned); + } + break; + default: return -1; + } + + return 0; +} + +size_t ROIAlign::getSerializationSize() const noexcept +{ + return kSERIALIZATION_SIZE; +} + +void ROIAlign::serialize(void* buffer) const noexcept +{ + PLUGIN_VALIDATE(buffer != nullptr); + char* d = static_cast(buffer); + char* a = d; + write(d, mAligned); // int32_t + write(d, mMode); // int32_t + write(d, mOutputHeight); // int32_t + write(d, mOutputWidth); // int32_t + write(d, mSamplingRatio); // int32_t + write(d, mSpatialScale); // float + + write(d, mROICount); // int32_t + write(d, mFeatureLength); // int32_t + write(d, mHeight); // int32_t + write(d, mWidth); // int32_t + PLUGIN_ASSERT(d == a + getSerializationSize()); +} + +ROIAlign::ROIAlign( + int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned) + : mOutputHeight(outputHeight) + , mOutputWidth(outputWidth) + , mSamplingRatio(samplingRatio) + , mSpatialScale(spatialScale) + , mMode(mode) + , mAligned(aligned) +{ + validateAttributes(mOutputHeight, mOutputWidth, mSamplingRatio, mMode, mSpatialScale, mAligned); +} + +ROIAlign::ROIAlign(void const* data, size_t length) +{ + PLUGIN_VALIDATE(data != nullptr); + PLUGIN_VALIDATE(length == kSERIALIZATION_SIZE); + + char const* d = static_cast(data); + char const* a = d; + + mAligned = read(d); + mMode = read(d); + mOutputHeight = read(d); + mOutputWidth = read(d); + mSamplingRatio = read(d); + mSpatialScale = read(d); + + mROICount = read(d); + mFeatureLength = read(d); + mHeight = read(d); + mWidth = read(d); + + PLUGIN_VALIDATE(d == a + length); + validateAttributes(mOutputHeight, mOutputWidth, mSamplingRatio, mMode, mSpatialScale, mAligned); +} + +DataType ROIAlign::getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +{ + PLUGIN_ASSERT(inputTypes != nullptr); + PLUGIN_ASSERT(nbInputs == 3); + PLUGIN_ASSERT(index == 0); + return inputTypes[0]; +} + +void ROIAlign::configurePlugin( + DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +{ + PLUGIN_ASSERT(in != nullptr); + PLUGIN_ASSERT(out != nullptr); + PLUGIN_ASSERT(nbOutputs == 1); + PLUGIN_ASSERT(nbInputs == 3); + + checkValidInputs(in, nbInputs); + + mFeatureLength = in[0].desc.dims.d[1]; + mHeight = in[0].desc.dims.d[2]; + mWidth = in[0].desc.dims.d[3]; + + mROICount = in[1].desc.dims.d[0]; +} diff --git a/plugin/roiAlignPlugin/roiAlignPluginLegacy.h b/plugin/roiAlignPlugin/roiAlignPluginLegacy.h new file mode 100644 index 00000000..1ce69f55 --- /dev/null +++ b/plugin/roiAlignPlugin/roiAlignPluginLegacy.h @@ -0,0 +1,121 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TRT_ROIALIGN_PLUGIN_LEGACY_H +#define TRT_ROIALIGN_PLUGIN_LEGACY_H + +#include "common/plugin.h" +#include +#include +#include + +#include "NvInfer.h" +#include "NvInferPlugin.h" + +namespace nvinfer1 +{ +namespace plugin +{ + +class ROIAlign : public IPluginV2DynamicExt +{ +public: + ROIAlign(int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, + int32_t aligned); + ROIAlign(void const* data, size_t length); + ROIAlign() = default; + ~ROIAlign() override = default; + + // IPluginV2 methods + char const* getPluginType() const noexcept override; + char const* getPluginVersion() const noexcept override; + int32_t getNbOutputs() const noexcept override; + int32_t initialize() noexcept override; + void terminate() noexcept override; + size_t getSerializationSize() const noexcept override; + void serialize(void* buffer) const noexcept override; + void destroy() noexcept override; + void setPluginNamespace(char const* libNamespace) noexcept override; + char const* getPluginNamespace() const noexcept override; + void setClipParam(bool clip) noexcept; + void setScoreBits(int32_t scoreBits) noexcept; + void setCaffeSemantics(bool caffeSemantics) noexcept; + + // IPluginV2Ext methods + nvinfer1::DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputType, int32_t nbInputs) const noexcept override; + + // IPluginV2DynamicExt methods + IPluginV2DynamicExt* clone() const noexcept override; + DimsExprs getOutputDimensions( + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept override; + bool supportsFormatCombination( + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; + void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, + int32_t nbOutputs) noexcept override; + size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, + int32_t nbOutputs) const noexcept override; + int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs, + void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; + +private: + void checkValidInputs(nvinfer1::DynamicPluginTensorDesc const* inputs, int32_t nbInputDims); + void validateAttributes(int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, + float spatialScale, int32_t aligned); + + int32_t mOutputHeight{}; + int32_t mOutputWidth{}; + int32_t mSamplingRatio{}; + float mSpatialScale{}; + int32_t mMode{}; + int32_t mAligned{}; + + int32_t mROICount{}; + int32_t mFeatureLength{}; // number of channels + int32_t mHeight{}; + int32_t mWidth{}; + + int32_t mMaxThreadsPerBlock{}; + + std::string mNameSpace{}; +}; + +class ROIAlignPluginCreator : public nvinfer1::pluginInternal::BaseCreator +{ +public: + ROIAlignPluginCreator(); + + ~ROIAlignPluginCreator() override = default; + + char const* getPluginName() const noexcept override; + + char const* getPluginVersion() const noexcept override; + + PluginFieldCollection const* getFieldNames() noexcept override; + + IPluginV2DynamicExt* createPlugin(char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override; + + IPluginV2DynamicExt* deserializePlugin( + char const* name, void const* serialData, size_t serialLength) noexcept override; + +private: + static PluginFieldCollection mFC; + static std::vector mPluginAttributes; +}; + +} // namespace plugin +} // namespace nvinfer1 +#endif // TRT_ROIALIGN_PLUGIN_LEGACY_H diff --git a/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu b/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu index b09db5ae..f34ffaba 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu +++ b/plugin/scatterElementsPlugin/scatterElementsPluginKernel.cu @@ -66,7 +66,7 @@ bool hasBfloat16AtomicAdd() cudaGetDeviceProperties(&deviceProp, deviceId); return deviceProp.major >= 8; } - + inline uint32_t getElementSize(nvinfer1::DataType t) noexcept { switch (t) @@ -79,8 +79,9 @@ inline uint32_t getElementSize(nvinfer1::DataType t) noexcept case nvinfer1::DataType::kBOOL: case nvinfer1::DataType::kUINT8: case nvinfer1::DataType::kINT8: - case nvinfer1::DataType::kINT4: case nvinfer1::DataType::kFP8: return 1; + case nvinfer1::DataType::kINT4: + PLUGIN_FAIL("Unsupported data type"); } return 0; } diff --git a/plugin/scatterPlugin/scatterPlugin.cpp b/plugin/scatterPlugin/scatterPlugin.cpp index a19e555c..d6e5a9c0 100644 --- a/plugin/scatterPlugin/scatterPlugin.cpp +++ b/plugin/scatterPlugin/scatterPlugin.cpp @@ -159,7 +159,8 @@ int32_t ScatterND::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc c case DataType::kFP8: case DataType::kBF16: case DataType::kINT64: - case DataType::kINT4: PLUGIN_FAIL("Unsupported data type"); + case DataType::kINT4: + PLUGIN_FAIL("Unsupported data type"); } for (int32_t i = indexRank; i < dataDims.nbDims; i++) diff --git a/plugin/splitPlugin/split.cu b/plugin/splitPlugin/split.cu index 0afec432..7a636318 100644 --- a/plugin/splitPlugin/split.cu +++ b/plugin/splitPlugin/split.cu @@ -17,9 +17,22 @@ #include #include +#include #include "split.h" +namespace nvinfer1 +{ +namespace plugin +{ +struct SplitPluginDeviceVectors +{ + thrust::device_vector _d_segment_offsets; + thrust::device_vector _d_output_ptrs; +}; +} // namespace plugin +} // namespace nvinfer1 + using namespace nvinfer1; using nvinfer1::plugin::SplitPlugin; @@ -101,12 +114,13 @@ void SplitPlugin::terminate() noexcept void SplitPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept { + deviceVectors.reset(new SplitPluginDeviceVectors); std::vector segment_offsets(1, 0); for( int i = 0; i < nbOutputs; ++i ) { segment_offsets.push_back(segment_offsets.back() + _output_lengths[i]); } - _d_segment_offsets = segment_offsets; + deviceVectors->_d_segment_offsets = segment_offsets; for (int i = 0; i < nbInputs; i++) { @@ -129,7 +143,7 @@ void SplitPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, i { _nz *= dims.d[i]; } - _d_output_ptrs.resize(nbOutputs, nullptr); + deviceVectors->_d_output_ptrs.resize(nbOutputs, nullptr); } int SplitPlugin::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* /* outputDesc */, @@ -138,13 +152,13 @@ int SplitPlugin::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvin PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); int const* d_segment_offsets_ptr = - thrust::raw_pointer_cast(&_d_segment_offsets[0]); + thrust::raw_pointer_cast(&(deviceVectors->_d_segment_offsets)[0]); float const* idata = reinterpret_cast(inputs[0]); float* const* h_odatas = reinterpret_cast(outputs); - float** odatas = thrust::raw_pointer_cast(&_d_output_ptrs[0]); + float** odatas = thrust::raw_pointer_cast(&(deviceVectors->_d_output_ptrs)[0]); cudaError_t cuda_status = cudaMemcpyAsync(odatas, h_odatas, - _d_output_ptrs.size() * sizeof(float*), + (deviceVectors->_d_output_ptrs).size() * sizeof(float*), cudaMemcpyHostToDevice, stream); if( cuda_status != cudaSuccess ) { @@ -158,13 +172,13 @@ int SplitPlugin::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvin if (inputDesc[0].type==nvinfer1::DataType::kFLOAT) { split_kernel<<>> - (_d_segment_offsets.size(), d_segment_offsets_ptr, idata, odatas, + ((deviceVectors->_d_segment_offsets).size(), d_segment_offsets_ptr, idata, odatas, _nx, _ny, nz); } else { split_kernel<<>> - (_d_segment_offsets.size(), d_segment_offsets_ptr, (__half const*)idata, (__half**)odatas, + ((deviceVectors->_d_segment_offsets).size(), d_segment_offsets_ptr, (__half const*)idata, (__half**)odatas, _nx, _ny, nz); } return cudaGetLastError() != cudaSuccess; diff --git a/plugin/splitPlugin/split.h b/plugin/splitPlugin/split.h index 2d7a9bd5..4553b28a 100644 --- a/plugin/splitPlugin/split.h +++ b/plugin/splitPlugin/split.h @@ -23,8 +23,8 @@ #include "common/serialize.hpp" #include +#include #include -#include namespace { @@ -36,14 +36,15 @@ namespace nvinfer1 { namespace plugin { +struct SplitPluginDeviceVectors; + class TRT_DEPRECATED SplitPlugin final : public nvinfer1::IPluginV2DynamicExt { int32_t _axis; std::vector _output_lengths; int32_t _nx, _ny, _nz; int32_t _x_stride, _y_stride, _z_stride; - thrust::device_vector _d_segment_offsets; - thrust::device_vector _d_output_ptrs; + std::shared_ptr deviceVectors; using IPluginV2::getOutputDimensions; using IPluginV2::getWorkspaceSize; diff --git a/python/docstrings/infer/pyCoreDoc.h b/python/docstrings/infer/pyCoreDoc.h index d59d6ac0..eb604771 100644 --- a/python/docstrings/infer/pyCoreDoc.h +++ b/python/docstrings/infer/pyCoreDoc.h @@ -397,7 +397,7 @@ constexpr char const* descr = R"trtdoc( :ivar profiler: :class:`IProfiler` The profiler in use by this :class:`IExecutionContext` . :ivar engine: :class:`ICudaEngine` The associated :class:`ICudaEngine` . :ivar name: :class:`str` The name of the :class:`IExecutionContext` . - :ivar device_memory: :class:`capsule` The device memory for use by this execution context. The memory must be aligned on a 256-byte boundary, and its size must be at least :attr:`engine.device_memory_size`. If using :func:`execute_v2()`, it is in use until :func:`execute_v2()` returns. Releasing or otherwise using the memory for other purposes during this time will result in undefined behavior. + :ivar device_memory: :class:`capsule` The device memory for use by this execution context. The memory must be aligned with cuda memory alignment property (using :func:`cuda.cudart.cudaGetDeviceProperties()`), and its size must be large enough for performing inference with the given network inputs. :func:`engine.device_memory_size` and :func:`engine.get_device_memory_size_for_profile` report upper bounds of the size. Setting memory to nullptr is acceptable if the reported size is 0. If using :func:`execute_async_v3()` to run the network, the memory is in use from the invocation of :func:`execute_async_v3()` until network execution is complete. If using :func:`execute_v2()`, it is in use until :func:`execute_v2()` returns. Releasing or otherwise using the memory for other purposes, including using it in another execution context running in parallel, during this time will result in undefined behavior. :ivar active_optimization_profile: :class:`int` The active optimization profile for the context. The selected profile will be used in subsequent calls to :func:`execute_v2()`. Profile 0 is selected by default. This is a readonly property and active optimization profile can be changed with :func:`set_optimization_profile_async()`. Changing this value will invalidate all dynamic bindings for the current execution context, so that they have to be set again using :func:`set_input_shape` before calling either :func:`execute_v2()`. :ivar all_binding_shapes_specified: :class:`bool` Whether all dynamic dimensions of input tensors have been specified by calling :func:`set_input_shape` . Trivially true if network has no dynamically shaped input tensors. Does not work with name-base interfaces eg. :func:`set_input_shape()`. Use :func:`infer_shapes()` instead. :ivar all_shape_inputs_specified: :class:`bool` Whether values for all input shape tensors have been specified by calling :func:`set_shape_input` . Trivially true if network has no input shape bindings. Does not work with name-base interfaces eg. :func:`set_input_shape()`. Use :func:`infer_shapes()` instead. @@ -406,9 +406,6 @@ constexpr char const* descr = R"trtdoc( :ivar persistent_cache_limit: The maximum size of persistent L2 cache that this execution context may use for activation caching. Activation caching is not supported on all architectures - see "How TensorRT uses Memory" in the developer guide for details. The default is 0 Bytes. :ivar nvtx_verbosity: The NVTX verbosity of the execution context. Building with DETAILED verbosity will generally increase latency in enqueueV3(). Call this method to select NVTX verbosity in this execution context at runtime. The default is the verbosity with which the engine was built, and the verbosity may not be raised above that level. This function does not affect how IEngineInspector interacts with the engine. :ivar temporary_allocator: :class:`IGpuAllocator` The GPU allocator used for internal temporary storage. - :ivar weight_streaming_budget: Set and get the current weight streaming budget for inference. The budget may be set to -1 disabling weight streaming at runtime, 0 (default) enabling TRT to choose to weight stream or not, or a positive value in the inclusive range [minimum_weight_streaming_budget, streamable_weights_size - 1]. - :ivar minimum_weight_streaming_budget: Returns the minimum weight streaming budget in bytes required to run the network successfully. The engine must have been built with kWEIGHT_STREAMING. - :ivar streamable_weights_size: Returns the size of the streamable weights in the engine. This may not include all the weights. )trtdoc"; constexpr char const* execute_v2 = R"trtdoc( @@ -421,13 +418,14 @@ constexpr char const* execute_v2 = R"trtdoc( )trtdoc"; // TODO: Check if this makes sense to have. -constexpr char const* device_memory = R"trtdoc( +constexpr char const* set_device_memory = R"trtdoc( The device memory for use by this :class:`IExecutionContext` . - The memory must be aligned on a 256-byte boundary, and its size must be at least that - returned by getDeviceMemorySize(). If using :func:`execute_v2()`, - it is in use until :func:`execute_v2()` returns. Releasing or otherwise using the memory for other - purposes during this time will result in undefined behavior. + :arg memory: 256-byte aligned device memory. + :arg size: Size of the provided memory. This must be at least as large as CudaEngine.get_device_memory_size_v2 + + If using :func:`enqueue_v3()`, it is in use until :func:`enqueue_v3()` returns. Releasing or otherwise using the memory for other + purposes during this time will result in undefined behavior. This includes using the same memory for a parallel execution context. )trtdoc"; constexpr char const* set_optimization_profile_async = R"trtdoc( @@ -700,6 +698,7 @@ constexpr char const* descr = R"trtdoc( :ivar num_layers: :class:`int` The number of layers in the network. The number of layers in the network is not necessarily the number in the original :class:`INetworkDefinition`, as layers may be combined or eliminated as the :class:`ICudaEngine` is optimized. This value can be useful when building per-layer tables, such as when aggregating profiling data over a number of executions. :ivar max_workspace_size: :class:`int` The amount of workspace the :class:`ICudaEngine` uses. The workspace size will be no greater than the value provided to the :class:`Builder` when the :class:`ICudaEngine` was built, and will typically be smaller. Workspace will be allocated for each :class:`IExecutionContext` . :ivar device_memory_size: :class:`int` The amount of device memory required by an :class:`IExecutionContext` . + :ivar device_memory_size_v2: :class:`int` The amount of device memory required by an :class:`IExecutionContext`. The return value depends on the weight streaming budget if enabled. :ivar refittable: :class:`bool` Whether the engine can be refit. :ivar name: :class:`str` The name of the network associated with the engine. The name is set during network creation and is retrieved after building or deserialization. :ivar num_optimization_profiles: :class:`int` The number of optimization profiles defined for this engine. This is always at least 1. @@ -708,7 +707,13 @@ constexpr char const* descr = R"trtdoc( :ivar tactic_sources: :class:`int` The tactic sources required by this engine. :ivar profiling_verbosity: The profiling verbosity the builder config was set to when the engine was built. :ivar hardware_compatibility_level: The hardware compatibility level of the engine. - :ivar num_aux_streams: Read-only. The number of auxiliary streams used by this engine, which will be less than or equal to the maximum allowed number of auxiliary streams by setting builder_config.max_aux_streams when the engine is built.)trtdoc" + :ivar num_aux_streams: Read-only. The number of auxiliary streams used by this engine, which will be less than or equal to the maximum allowed number of auxiliary streams by setting builder_config.max_aux_streams when the engine is built. + :ivar weight_streaming_budget: [DEPRECATED] Deprecated in TensorRT 10.1, superceded by weight_streaming_budget_v2. Set and get the current weight streaming budget for inference. The budget may be set to -1 disabling weight streaming at runtime, 0 (default) enabling TRT to choose to weight stream or not, or a positive value in the inclusive range [minimum_weight_streaming_budget, streamable_weights_size - 1]. + :ivar minimum_weight_streaming_budget: [DEPRECATED] Deprecated in TensorRT 10.1, superceded by weight_streaming_budget_v2. Returns the minimum weight streaming budget in bytes required to run the network successfully. The engine must have been built with kWEIGHT_STREAMING. + :ivar streamable_weights_size: Returns the size of the streamable weights in the engine. This may not include all the weights. + :ivar weight_streaming_budget_v2: Set and get the current weight streaming budget for inference. The budget may be set any non-negative value. A value of 0 streams the most weights. Values equal to streamable_weights_size (default) or larger will disable weight streaming. + :ivar weight_streaming_scratch_memory_size: The amount of scratch memory required by a TensorRT ExecutionContext to perform inference. This value may change based on the current weight streaming budget. Please use the V2 memory APIs, engine.device_memory_size_v2 and ExecutionContext.set_device_memory() to provide memory which includes the current weight streaming scratch memory. Not specifying these APIs or using the V1 APIs will not include this memory, so TensorRT will resort to allocating itself. + )trtdoc" ; // Documentation bug with parameters on these three functions because they are overloaded. @@ -841,6 +846,15 @@ constexpr char const* get_device_memory_size_for_profile = R"trtdoc( :arg profile_index: The index of the profile. )trtdoc"; +constexpr char const* get_device_memory_size_for_profile_v2 = R"trtdoc( + Return the device memory size required for a certain profile. + + The return value will change depending on the following API calls + 1. setWeightStreamingBudgetV2 + + :arg profile_index: The index of the profile. +)trtdoc"; + constexpr char const* create_serialization_config = R"trtdoc( Create a serialization configuration object. )trtdoc"; @@ -849,6 +863,11 @@ constexpr char const* serialize_with_config = R"trtdoc( Serialize the network to a stream. )trtdoc"; +constexpr char const* get_weight_streaming_automatic_budget = R"trtdoc( + Get an automatic weight streaming budget based on available device memory. This value may change between TensorRT major and minor versions. + Please use CudaEngine.weight_streaming_budget_v2 to set the returned budget. +)trtdoc"; + constexpr char const* is_debug_tensor = R"trtdoc( Determine whether the given name corresponds to a debug tensor. @@ -975,7 +994,7 @@ constexpr char const* REJECT_EMPTY_ALGORITHMS constexpr char const* VERSION_COMPATIBLE = R"trtdoc(Restrict to lean runtime operators to provide version forward compatibility for the plan files.)trtdoc"; constexpr char const* EXCLUDE_LEAN_RUNTIME = R"trtdoc(Exclude lean runtime from the plan.)trtdoc"; -constexpr char const* FP8 = R"trtdoc(Enable FP8 layer selection)trtdoc"; +constexpr char const* FP8 = R"trtdoc(Enable plugins with FP8 input/output)trtdoc"; constexpr char const* ERROR_ON_TIMING_CACHE_MISS = R"trtdoc(Emit error when a tactic being timed is not present in the timing cache.)trtdoc"; constexpr char const* DISABLE_COMPILATION_CACHE @@ -989,6 +1008,7 @@ constexpr char const* REFIT_IDENTICAL = R"trtdoc(Create a refittable engine using identical weights. Different weights during refits yield unpredictable behavior.)trtdoc"; constexpr char const* WEIGHT_STREAMING = R"trtdoc(Enable building with the ability to stream varying amounts of weights during Runtime. This decreases GPU memory of TRT at the expense of performance.)trtdoc"; +constexpr char const* INT4 = R"trtdoc(Enable plugins with INT4 input/output)trtdoc"; } // namespace BuilderFlagDoc namespace MemoryPoolTypeDoc @@ -1153,7 +1173,7 @@ constexpr char const* CUBLAS = R"trtdoc( **NOTE:** Disabling CUBLAS tactic source will cause the cuBLAS handle passed to plugins in attachToContext to be null. )trtdoc"; constexpr char const* CUBLAS_LT = R"trtdoc( - Enables CUBLAS_LT tactics. Disabled by default. + Enables cuBLAS LT tactics. Disabled by default. [DEPRECATED] Deprecated in TensorRT 9.0. )trtdoc"; constexpr char const* CUDNN = R"trtdoc( @@ -1240,7 +1260,7 @@ namespace IBuilderConfigDoc constexpr char const* descr = R"trtdoc( :ivar avg_timing_iterations: :class:`int` The number of averaging iterations used when timing layers. When timing layers, the builder minimizes over a set of average times for layer execution. This parameter controls the number of iterations used in averaging. By default the number of averaging iterations is 1. - :ivar int8_calibrator: :class:`IInt8Calibrator` Int8 Calibration interface. The calibrator is to minimize the information loss during the INT8 quantization process. + :ivar int8_calibrator: :class:`IInt8Calibrator` [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. Int8 Calibration interface. The calibrator is to minimize the information loss during the INT8 quantization process. :ivar flags: :class:`int` The build mode flags to turn on builder options for this network. The flags are listed in the BuilderFlags enum. The flags set configuration options to build the network. This should be in integer consisting of one or more :class:`BuilderFlag` s, combined via binary OR. For example, ``1 << BuilderFlag.FP16 | 1 << BuilderFlag.DEBUG``. :ivar profile_stream: :class:`int` The handle for the CUDA stream that is used to profile this network. :ivar num_optimization_profiles: :class:`int` The number of optimization profiles. @@ -1353,6 +1373,8 @@ constexpr char const* add_optimization_profile = R"trtdoc( )trtdoc"; constexpr char const* set_calibration_profile = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Set a calibration profile. Calibration optimization profile must be set if int8 calibration is used to set scales for a network with runtime dimensions. @@ -1363,6 +1385,8 @@ constexpr char const* set_calibration_profile = R"trtdoc( )trtdoc"; constexpr char const* get_calibration_profile = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Get the current calibration profile. :returns: The current calibration profile or None if calibrartion profile is unset. @@ -1866,6 +1890,8 @@ constexpr char const* get_all_weights = R"trtdoc( )trtdoc"; constexpr char const* get_dynamic_range = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Gets the dynamic range of a tensor. If the dynamic range was never set, returns the range computed during calibration. :arg tensor_name: The name of the tensor whose dynamic range to retrieve. @@ -1874,6 +1900,8 @@ constexpr char const* get_dynamic_range = R"trtdoc( )trtdoc"; constexpr char const* set_dynamic_range = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Update dynamic range for a tensor. :arg tensor_name: The name of the tensor whose dynamic range to update. @@ -1885,6 +1913,8 @@ constexpr char const* set_dynamic_range = R"trtdoc( )trtdoc"; constexpr char const* get_tensors_with_dynamic_range = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Get names of all tensors that have refittable dynamic ranges. :returns: The names of tensors with refittable dynamic ranges. diff --git a/python/docstrings/infer/pyFoundationalTypesDoc.h b/python/docstrings/infer/pyFoundationalTypesDoc.h index 39ffd53f..eaf742dd 100644 --- a/python/docstrings/infer/pyFoundationalTypesDoc.h +++ b/python/docstrings/infer/pyFoundationalTypesDoc.h @@ -52,9 +52,6 @@ constexpr char const* uint8 = R"trtdoc( constexpr char const* fp8 = R"trtdoc( Signed 8-bit floating point with 1 sign bit, 4 exponent bits, 3 mantissa bits, and exponent-bias 7. - - .. warning:: - fp8 is not supported yet and will result in an error or undefined behavior. )trtdoc"; constexpr char const* int4 = R"trtdoc(Signed 4-bit integer representing a quantized floating-point value.)trtdoc"; diff --git a/python/docstrings/infer/pyGraphDoc.h b/python/docstrings/infer/pyGraphDoc.h index e9913210..19a0df9f 100644 --- a/python/docstrings/infer/pyGraphDoc.h +++ b/python/docstrings/infer/pyGraphDoc.h @@ -195,12 +195,13 @@ constexpr const char* descr = R"trtdoc( :ivar location: :class:`TensorLocation` The storage location of a tensor. :ivar is_network_input: :class:`bool` Whether the tensor is a network input. :ivar is_network_output: :class:`bool` Whether the tensor is a network output. - :ivar dynamic_range: :class:`Tuple[float, float]` A tuple containing the [minimum, maximum] of the dynamic range, or :class:`None` if the range was not set. + :ivar dynamic_range: :class:`Tuple[float, float]` [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. A tuple containing the [minimum, maximum] of the dynamic range, or :class:`None` if the range was not set. :ivar is_shape: :class:`bool` Whether the tensor is a shape tensor. :ivar allowed_formats: :class:`int32` The allowed set of TensorFormat candidates. This should be an integer consisting of one or more :class:`TensorFormat` s, combined via bitwise OR after bit shifting. For example, ``1 << int(TensorFormat.CHW4) | 1 << int(TensorFormat.CHW32)``. )trtdoc" ; constexpr const char* set_dynamic_range = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. Set dynamic range for the tensor. NOTE: It is suggested to use ``tensor.dynamic_range = (min, max)`` instead. @@ -210,6 +211,7 @@ constexpr const char* set_dynamic_range = R"trtdoc( )trtdoc"; constexpr const char* get_dynamic_range = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. Get dynamic range for the tensor. NOTE: It is suggested to use ``tensor.dynamic_range`` instead, which is a tuple including both the minimum and maximum of the dynamic range. @@ -217,6 +219,7 @@ constexpr const char* get_dynamic_range = R"trtdoc( )trtdoc"; constexpr const char* reset_dynamic_range = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. Undo the effect of setting the dynamic range. )trtdoc"; @@ -663,6 +666,8 @@ constexpr const char* SIGN constexpr const char* ROUND = R"trtdoc(Round to nearest even for floating-point data type.)trtdoc"; constexpr const char* ISINF = R"trtdoc(Return true if the input value equals +/- infinity for floating-point data type.)trtdoc"; +constexpr const char* ISNAN + = R"trtdoc(Return true if the input value equals NaN for floating-point data type.)trtdoc"; } // namespace UnaryOperationDoc namespace IUnaryLayerDoc @@ -848,7 +853,7 @@ constexpr const char* descr = R"trtdoc( This class sets the output to a one-dimensional tensor with the dimensions of the input tensor. For example, if the input is a four-dimensional tensor (of any type) with - dimensions [2,3,5,7], the output tensor is a one-dimensional :class:`int32` tensor + dimensions [2,3,5,7], the output tensor is a one-dimensional :class:`int64` tensor of length 4 containing the sequence 2, 3, 5, 7. )trtdoc"; diff --git a/python/docstrings/infer/pyInt8Doc.h b/python/docstrings/infer/pyInt8Doc.h index 013c6c75..be10b89c 100644 --- a/python/docstrings/infer/pyInt8Doc.h +++ b/python/docstrings/infer/pyInt8Doc.h @@ -31,6 +31,8 @@ constexpr const char* descr = R"trtdoc( namespace IInt8CalibratorDoc { constexpr const char* descr = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Application-implemented interface for calibration. Calibration is a step performed by the builder when deciding suitable scale factors for 8-bit inference. It must also provide a method for retrieving representative images which the calibration process can use to examine the distribution of activations. It may optionally implement a method for caching the calibration result for reuse on subsequent runs. To implement a custom calibrator, ensure that you explicitly instantiate the base class in :func:`__init__` : @@ -117,6 +119,8 @@ constexpr const char* write_calibration_cache = R"trtdoc( namespace IInt8LegacyCalibratorDoc { constexpr const char* descr = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Extends the :class:`IInt8Calibrator` class. This calibrator requires user parameterization, and is provided as a fallback option if the other calibrators yield poor results. @@ -160,6 +164,8 @@ constexpr const char* get_algorithm = R"trtdoc( namespace IInt8EntropyCalibratorDoc { constexpr const char* descr = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Extends the :class:`IInt8Calibrator` class. To implement a custom calibrator, ensure that you explicitly instantiate the base class in :func:`__init__` : @@ -183,6 +189,8 @@ constexpr const char* get_algorithm = R"trtdoc( namespace IInt8EntropyCalibrator2Doc { constexpr const char* descr = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Extends the :class:`IInt8Calibrator` class. To implement a custom calibrator, ensure that you explicitly instantiate the base class in :func:`__init__` : @@ -205,6 +213,8 @@ constexpr const char* get_algorithm = R"trtdoc( namespace IInt8MinMaxCalibratorDoc { constexpr const char* descr = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. Superseded by explicit quantization. + Extends the :class:`IInt8Calibrator` class. To implement a custom calibrator, ensure that you explicitly instantiate the base class in :func:`__init__` : diff --git a/python/docstrings/parsers/pyOnnxDoc.h b/python/docstrings/parsers/pyOnnxDoc.h index 17656d27..5493b942 100644 --- a/python/docstrings/parsers/pyOnnxDoc.h +++ b/python/docstrings/parsers/pyOnnxDoc.h @@ -59,6 +59,8 @@ constexpr const char* parse_from_file = R"trtdoc( )trtdoc"; constexpr const char* supports_model = R"trtdoc( + [DEPRECATED] Deprecated in TensorRT 10.1. See supports_model_v2. + Check whether TensorRT supports a particular ONNX model. :arg model: The serialized ONNX model. @@ -69,6 +71,38 @@ constexpr const char* supports_model = R"trtdoc( The second indicates subgraphs (by node index) in the model and whether they are supported. )trtdoc"; +constexpr const char* supports_model_v2 = R"trtdoc( + Check whether TensorRT supports a particular ONNX model. + Query each subgraph with num_subgraphs, is_subgraph_supported, get_subgraph_nodes. + + :arg model: The serialized ONNX model. + :arg path: The path to the model file. Only required if the model has externally stored weights. + :returns: true if the model is supported +)trtdoc"; + +constexpr const char* num_subgraphs = R"trtdoc( + Get the number of subgraphs. Calling before \p supportsModelV2 is an undefined behavior. Will return 0 by default. + + :returns: Number of subgraphs +)trtdoc"; + +constexpr const char* is_subgraph_supported = R"trtdoc( + Returns whether the subgraph is supported. Calling before \p supportsModelV2 is an undefined behavior. + Will return false by default. + + :arg index: Index of the subgraph to be checked. + :returns: true if subgraph is supported +)trtdoc"; + +constexpr const char* get_subgraph_nodes = R"trtdoc( + Get the nodes of the specified subgraph. Calling before \p supportsModelV2 is an undefined behavior. + Will return an empty list by default. + + :arg index: Index of the subgraph. + :returns: List[int] + A list of node indices in the subgraph. +)trtdoc"; + constexpr const char* supports_operator = R"trtdoc( Returns whether the specified operator may be supported by the parser. Note that a result of true does not guarantee that the operator will be supported in all cases (i.e., this function may return false-positives). diff --git a/python/packaging/bindings_wheel/tensorrt/__init__.py b/python/packaging/bindings_wheel/tensorrt/__init__.py index e82ee1ec..efcbcf64 100644 --- a/python/packaging/bindings_wheel/tensorrt/__init__.py +++ b/python/packaging/bindings_wheel/tensorrt/__init__.py @@ -20,6 +20,7 @@ import sys import warnings +ENABLE_LONG_TERM = bool(int(os.environ.get("NV_INTERNAL_ENABLE_LONG_TERM", "0"))) # For standalone wheels, attempt to import the wheel containing the libraries. _libs_wheel_imported = False @@ -196,6 +197,10 @@ def _itemsize(trt_type): fp8: 1, int4: 0.5, } + # $nv-internal-release begin + if ENABLE_LONG_TERM: + mapping[fp4] = 0.5 + # $nv-internal-release end if trt_type in mapping: return mapping[trt_type] diff --git a/python/packaging/frontend_sdist/setup.py b/python/packaging/frontend_sdist/setup.py index 8c050d20..ec711fc5 100644 --- a/python/packaging/frontend_sdist/setup.py +++ b/python/packaging/frontend_sdist/setup.py @@ -157,7 +157,7 @@ def parent_command_line(): ], packages=[tensorrt_package], install_requires=install_requires, - setup_requires=["wheel"], + setup_requires=["wheel", "pip"], python_requires=">=3.6", # ref https://pypi.nvidia.com/tensorrt-bindings/ cmdclass=cmdclass, extras_require={"numpy": "numpy"}, diff --git a/python/src/infer/pyCore.cpp b/python/src/infer/pyCore.cpp index 4d6f72e0..52d9cb75 100644 --- a/python/src/infer/pyCore.cpp +++ b/python/src/infer/pyCore.cpp @@ -336,6 +336,11 @@ void context_set_device_memory(IExecutionContext& self, size_t memory) self.setDeviceMemory(reinterpret_cast(memory)); } +void context_set_device_memory_v2(IExecutionContext& self, size_t memory, int64_t size) +{ + self.setDeviceMemoryV2(reinterpret_cast(memory), size); +} + void serialization_config_set_flags(ISerializationConfig& self, uint32_t flags) { if (!self.setFlags(flags)) @@ -1034,6 +1039,8 @@ void bindCore(py::module& m) "name", &IExecutionContext::getName, py::cpp_function(&IExecutionContext::setName, py::keep_alive<1, 2>{})) // For writeonly properties, we use a nullptr getter. .def_property("device_memory", nullptr, &lambdas::context_set_device_memory) + .def("set_device_memory", &lambdas::context_set_device_memory_v2, "memory"_a, "size"_a, + IExecutionContextDoc::set_device_memory) .def("update_device_memory_size_for_shapes", &IExecutionContext::updateDeviceMemorySizeForShapes, IExecutionContextDoc::update_device_memory_size_for_shapes) .def_property_readonly("active_optimization_profile", &IExecutionContext::getOptimizationProfile) @@ -1162,9 +1169,16 @@ void bindCore(py::module& m) utils::deprecateMember(&ICudaEngine::createExecutionContextWithoutDeviceMemory, "create_execution_context"), ICudaEngineDoc::create_execution_context_without_device_memory, py::keep_alive<0, 1>{}, py::call_guard{}) - .def("get_device_memory_size_for_profile", &ICudaEngine::getDeviceMemorySizeForProfile, "profile_index"_a, - ICudaEngineDoc::get_device_memory_size_for_profile) - .def_property_readonly("device_memory_size", &ICudaEngine::getDeviceMemorySize) + .def("get_device_memory_size_for_profile", + utils::deprecateMember(&ICudaEngine::getDeviceMemorySizeForProfile, + "Deprecated in TensorRT 10.1. Superseded by get_device_memory_size_for_profile_v2"), + "profile_index"_a, ICudaEngineDoc::get_device_memory_size_for_profile) + .def("get_device_memory_size_for_profile_v2", &ICudaEngine::getDeviceMemorySizeForProfile, "profile_index"_a, + ICudaEngineDoc::get_device_memory_size_for_profile_v2) + .def_property_readonly("device_memory_size", + utils::deprecateMember(&ICudaEngine::getDeviceMemorySize, + "Deprecated in TensorRT 10.1. Superseded by get_device_memory_size_v2")) + .def_property_readonly("device_memory_size_v2", &ICudaEngine::getDeviceMemorySizeV2) .def_property_readonly("refittable", &ICudaEngine::isRefittable) .def_property_readonly("name", &ICudaEngine::getName) .def_property_readonly("num_optimization_profiles", &ICudaEngine::getNbOptimizationProfiles) @@ -1258,10 +1272,26 @@ void bindCore(py::module& m) .def_property_readonly("hardware_compatibility_level", &ICudaEngine::getHardwareCompatibilityLevel) .def_property_readonly("num_aux_streams", &ICudaEngine::getNbAuxStreams) // Weight streaming APIs - .def_property( - "weight_streaming_budget", &ICudaEngine::getWeightStreamingBudget, &ICudaEngine::setWeightStreamingBudget) - .def_property_readonly("minimum_weight_streaming_budget", &ICudaEngine::getMinimumWeightStreamingBudget) + .def_property("weight_streaming_budget", + utils::deprecateMember(&ICudaEngine::getWeightStreamingBudget, + "Deprecated in TensorRT 10.1. Superseded by weight_streaming_budget_v2"), + utils::deprecateMember(&ICudaEngine::setWeightStreamingBudget, + "Deprecated in TensorRT 10.1. Superseded by weight_streaming_budget_v2")) + .def_property("weight_streaming_budget_v2", &ICudaEngine::getWeightStreamingBudgetV2, + &ICudaEngine::setWeightStreamingBudgetV2) + .def_property_readonly("minimum_weight_streaming_budget", + utils::deprecateMember( + &ICudaEngine::getMinimumWeightStreamingBudget, "Deprecated in TensorRT 10.1. Not required by V2 APIs.")) .def_property_readonly("streamable_weights_size", &ICudaEngine::getStreamableWeightsSize) + // We keep this as a method so that future TRT versions may overload if the automatic budgeting algorithm ever + // requires additional arguments. + .def( + "get_weight_streaming_automatic_budget", + [](ICudaEngine& self) -> int32_t { return self.getWeightStreamingAutomaticBudget(); }, + ICudaEngineDoc::get_weight_streaming_automatic_budget) + .def_property_readonly( + "weight_streaming_scratch_memory_size", &ICudaEngine::getWeightStreamingScratchMemorySize) + // End weight streaming APIs .def("is_debug_tensor", &ICudaEngine::isDebugTensor, "name"_a, ICudaEngineDoc::is_debug_tensor) .def("__del__", &utils::doNothingDel); @@ -1332,7 +1362,9 @@ void bindCore(py::module& m) .value("WEIGHTLESS", BuilderFlag::kWEIGHTLESS, BuilderFlagDoc::WEIGHTLESS) .value("STRIP_PLAN", BuilderFlag::kSTRIP_PLAN, BuilderFlagDoc::STRIP_PLAN) .value("REFIT_IDENTICAL", BuilderFlag::kREFIT_IDENTICAL, BuilderFlagDoc::REFIT_IDENTICAL) - .value("WEIGHT_STREAMING", BuilderFlag::kWEIGHT_STREAMING, BuilderFlagDoc::WEIGHT_STREAMING); + .value("WEIGHT_STREAMING", BuilderFlag::kWEIGHT_STREAMING, BuilderFlagDoc::WEIGHT_STREAMING) + .value("INT4", BuilderFlag::kINT4, BuilderFlagDoc::INT4) + ; py::enum_(m, "MemoryPoolType", MemoryPoolTypeDoc::descr, py::module_local()) .value("WORKSPACE", MemoryPoolType::kWORKSPACE, MemoryPoolTypeDoc::WORKSPACE) @@ -1386,8 +1418,12 @@ void bindCore(py::module& m) py::class_(m, "IBuilderConfig", IBuilderConfigDoc::descr, py::module_local()) .def_property( "avg_timing_iterations", &IBuilderConfig::getAvgTimingIterations, &IBuilderConfig::setAvgTimingIterations) - .def_property("int8_calibrator", &IBuilderConfig::getInt8Calibrator, - py::cpp_function(&IBuilderConfig::setInt8Calibrator, py::keep_alive<1, 2>{})) + .def_property("int8_calibrator", + utils::deprecateMember(&IBuilderConfig::getInt8Calibrator, + "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), + py::cpp_function(utils::deprecateMember(&IBuilderConfig::setInt8Calibrator, + "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), + py::keep_alive<1, 2>{})) .def_property("engine_capability", &IBuilderConfig::getEngineCapability, &IBuilderConfig::setEngineCapability) .def("set_memory_pool_limit", &IBuilderConfig::setMemoryPoolLimit, "pool"_a, "pool_size"_a, IBuilderConfigDoc::set_memory_pool_limit) @@ -1412,9 +1448,13 @@ void bindCore(py::module& m) .def_property("profile_stream", lambdas::netconfig_get_profile_stream, lambdas::netconfig_set_profile_stream) .def("add_optimization_profile", &IBuilderConfig::addOptimizationProfile, "profile"_a, IBuilderConfigDoc::add_optimization_profile) - .def("set_calibration_profile", &IBuilderConfig::setCalibrationProfile, "profile"_a, - IBuilderConfigDoc::set_calibration_profile) - .def("get_calibration_profile", &IBuilderConfig::getCalibrationProfile, + .def("set_calibration_profile", + utils::deprecateMember(&IBuilderConfig::setCalibrationProfile, + "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), + "profile"_a, IBuilderConfigDoc::set_calibration_profile) + .def("get_calibration_profile", + utils::deprecateMember(&IBuilderConfig::getCalibrationProfile, + "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), IBuilderConfigDoc::get_calibration_profile) .def_property_readonly("num_optimization_profiles", &IBuilderConfig::getNbOptimizationProfiles) .def("set_device_type", &IBuilderConfig::setDeviceType, "layer"_a, "device_type"_a, @@ -1523,10 +1563,18 @@ void bindCore(py::module& m) .def("get_missing_weights", lambdas::refitter_get_missing_weights, RefitterDoc::get_missing_weights) .def("get_all", lambdas::refitter_get_all, RefitterDoc::get_all) .def("get_all_weights", lambdas::refitter_get_all_weights, RefitterDoc::get_all_weights) - .def("get_dynamic_range", lambdas::refitter_get_dynamic_range, "tensor_name"_a, RefitterDoc::get_dynamic_range) - .def("set_dynamic_range", lambdas::refitter_set_dynamic_range, "tensor_name"_a, "range"_a, - RefitterDoc::set_dynamic_range) - .def("get_tensors_with_dynamic_range", lambdas::refitter_get_tensors_with_dynamic_range, + // Using a plus sign converts the lambda function into a function pointer. + .def("get_dynamic_range", + utils::deprecate(+lambdas::refitter_get_dynamic_range, + "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), + "tensor_name"_a, RefitterDoc::get_dynamic_range) + .def("set_dynamic_range", + utils::deprecate(+lambdas::refitter_set_dynamic_range, + "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), + "tensor_name"_a, "range"_a, RefitterDoc::set_dynamic_range) + .def("get_tensors_with_dynamic_range", + utils::deprecate(+lambdas::refitter_get_tensors_with_dynamic_range, + "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), RefitterDoc::get_tensors_with_dynamic_range) .def_property("error_recorder", &IRefitter::getErrorRecorder, py::cpp_function(&IRefitter::setErrorRecorder, py::keep_alive<1, 2>{})) diff --git a/python/src/infer/pyFoundationalTypes.cpp b/python/src/infer/pyFoundationalTypes.cpp index 6f64f7d4..9b9ea4ad 100644 --- a/python/src/infer/pyFoundationalTypes.cpp +++ b/python/src/infer/pyFoundationalTypes.cpp @@ -186,7 +186,8 @@ void bindFoundationalTypes(py::module& m) .value("BOOL", DataType::kBOOL, DataTypeDoc::boolean) .value("UINT8", DataType::kUINT8, DataTypeDoc::uint8) .value("FP8", DataType::kFP8, DataTypeDoc::fp8) - .value("INT4", DataType::kINT4, DataTypeDoc::int4); // DataType + .value("INT4", DataType::kINT4, DataTypeDoc::int4) + ; // DataType // Also create direct mappings (so we can call trt.float32, for example). m.attr("float32") = DataType::kFLOAT; diff --git a/python/src/infer/pyGraph.cpp b/python/src/infer/pyGraph.cpp index ddca1e9d..57bfc264 100644 --- a/python/src/infer/pyGraph.cpp +++ b/python/src/infer/pyGraph.cpp @@ -327,10 +327,11 @@ namespace tensorrt .def_property_readonly("is_network_output", &ITensor::isNetworkOutput) .def_property_readonly("is_shape_tensor", &ITensor::isShapeTensor) .def_property_readonly("is_execution_tensor", &ITensor::isExecutionTensor) - .def_property("dynamic_range", lambdas::get_dynamic_range, lambdas::set_dynamic_range) + // Using a plus sign converts the lambda function into a function pointer. + .def_property("dynamic_range", utils::deprecate(+lambdas::get_dynamic_range, "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), utils::deprecate(+lambdas::set_dynamic_range, "Deprecated in TensorRT 10.1. Superseded by explicit quantization.")) .def_property("allowed_formats", &ITensor::getAllowedFormats, &ITensor::setAllowedFormats) - .def("set_dynamic_range", &ITensor::setDynamicRange, "min"_a, "max"_a, ITensorDoc::set_dynamic_range) - .def("reset_dynamic_range", &ITensor::resetDynamicRange, ITensorDoc::reset_dynamic_range) + .def("set_dynamic_range", utils::deprecateMember(&ITensor::setDynamicRange, "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), "min"_a, "max"_a, ITensorDoc::set_dynamic_range) + .def("reset_dynamic_range", utils::deprecateMember(&ITensor::resetDynamicRange, "Deprecated in TensorRT 10.1. Superseded by explicit quantization."), ITensorDoc::reset_dynamic_range) .def("set_dimension_name", &ITensor::setDimensionName, "index"_a, "name"_a, ITensorDoc::set_dimension_name) .def("get_dimension_name", &ITensor::getDimensionName, "index"_a, ITensorDoc::get_dimension_name) ; @@ -549,6 +550,7 @@ namespace tensorrt .value("SIGN", UnaryOperation::kSIGN, UnaryOperationDoc::SIGN) .value("ROUND", UnaryOperation::kROUND, UnaryOperationDoc::ROUND) .value("ISINF", UnaryOperation::kISINF, UnaryOperationDoc::ISINF) + .value("ISNAN", UnaryOperation::kISNAN, UnaryOperationDoc::ISNAN) ; py::class_>(m, "IUnaryLayer", IUnaryLayerDoc::descr, py::module_local()) diff --git a/python/src/infer/pyPlugin.cpp b/python/src/infer/pyPlugin.cpp index 9fc1b901..671a14c3 100644 --- a/python/src/infer/pyPlugin.cpp +++ b/python/src/infer/pyPlugin.cpp @@ -975,6 +975,8 @@ class PyIPluginV3OneBuildImpl : public IPluginV3OneBuild py::function pyGetValidTactics = py::get_override(static_cast(this), "get_valid_tactics"); + mIsTacticsInitialized = true; + if (!pyGetValidTactics) { // if no implementation is provided for get_valid_tactics(), communicate that no custom tactics are @@ -983,8 +985,8 @@ class PyIPluginV3OneBuildImpl : public IPluginV3OneBuild } py::object pyResult = pyGetValidTactics(); - auto result = pyResult.cast>(); - return static_cast(result.size()); + mTactics = pyResult.cast>(); + return static_cast(mTactics.size()); } PLUGIN_API_CATCH_CAST("get_valid_tactics", "std::vector") catch (py::error_already_set& e) @@ -1004,19 +1006,28 @@ class PyIPluginV3OneBuildImpl : public IPluginV3OneBuild try { - py::function pyGetValidTactics - = py::get_override(static_cast(this), "get_valid_tactics"); - - if (!pyGetValidTactics) + // getValidTactics() must immediately follow getNbTactics() + // because it is impossible to call getValidTactics() without knowing the + // correct number of tactics. So check that mIsTacticsInitialized is true. + // Otherwise, something has gone wrong. + if (mIsTacticsInitialized) { - // if no implementation is provided for get_valid_tactics() nothing further to do + // Unset to catch any subsequent violations + mIsTacticsInitialized = false; + if (nbTactics != static_cast(mTactics.size())) + { + utils::throwPyError( + PyExc_RuntimeError, "number of tactics does not match cached number of tactics"); + } + std::copy(mTactics.begin(), mTactics.end(), tactics); return 0; } - - py::object pyResult = pyGetValidTactics(); - auto result = pyResult.cast>(); - std::copy(result.begin(), result.end(), tactics); - return 0; + else + { + utils::throwPyError( + PyExc_RuntimeError, "Internal error. getValidTactics() called before getNbTactics()."); + } + return -1; } PLUGIN_API_CATCH_CAST("get_valid_tactics", "std::vector") catch (py::error_already_set& e) @@ -1313,11 +1324,13 @@ class PyIPluginV3OneBuildImpl : public IPluginV3OneBuild int32_t mFormatCombinationLimit{}; std::string mTimingCachedId{}; std::string mMetadataString{}; + std::vector mTactics; bool mIsNbOutputsInitialized{false}; bool mIsTimingCachedIdInitialized{false}; bool mIsFormatCombinationLimitInitialized{false}; bool mIsMetadataStringInitialized{false}; + bool mIsTacticsInitialized{false}; }; class PyIPluginV3OneRuntimeImpl : public IPluginV3OneRuntime @@ -2322,7 +2335,9 @@ void bindPlugin(py::module& m) .value("UNKNOWN", PluginFieldType::kUNKNOWN) .value("BF16", PluginFieldType::kBF16) .value("INT64", PluginFieldType::kINT64) - .value("FP8", PluginFieldType::kFP8); + .value("FP8", PluginFieldType::kFP8) + .value("INT4", PluginFieldType::kINT4) + ; py::class_(m, "PluginField", PluginFieldDoc::descr, py::module_local()) .def(py::init(lambdas::plugin_field_default_constructor), "name"_a = "", py::keep_alive<1, 2>{}) @@ -2337,27 +2352,23 @@ void bindPlugin(py::module& m) [](PluginField& self) { switch (self.type) { - case PluginFieldType::kINT32: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kINT8: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kINT16: - return py::array(self.length, static_cast(self.data)); - break; + case PluginFieldType::kINT32: return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kUNKNOWN: + case PluginFieldType::kINT8: return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kINT16: return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kFLOAT32: return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kFLOAT64: return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kINT64: return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kCHAR: return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kINT4: case PluginFieldType::kFLOAT16: - // TODO: Figure out how to handle float16 correctly here - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kFLOAT32: - return py::array(self.length, static_cast(self.data)); - break; - case PluginFieldType::kFLOAT64: - return py::array(self.length, static_cast(self.data)); + case PluginFieldType::kBF16: + case PluginFieldType::kDIMS: + case PluginFieldType::kFP8: + utils::throwPyError( + PyExc_AttributeError, "No known conversion for returning data from PluginField"); break; - case PluginFieldType::kCHAR: return py::array(self.length, static_cast(self.data)); break; - default: assert(false && "No known conversion for returning data from PluginField"); break; + default: return py::array(); } // should not reach this line return py::array(); diff --git a/python/src/parsers/pyOnnx.cpp b/python/src/parsers/pyOnnx.cpp index 9059a3b7..3cb45419 100644 --- a/python/src/parsers/pyOnnx.cpp +++ b/python/src/parsers/pyOnnx.cpp @@ -55,9 +55,9 @@ static const auto error_code_str = [](ErrorCode self) { }; static const auto parser_error_str = [](IParserError& self) { - const std::string node_info = "In node " + std::to_string(self.node()) + " with name: " + self.nodeName() + std::string const node_info = "In node " + std::to_string(self.node()) + " with name: " + self.nodeName() + " and operator: " + self.nodeOperator() + " "; - const std::string error_info + std::string const error_info = std::string("(") + self.func() + "): " + error_code_str(self.code()) + ": " + self.desc(); if (self.code() == ErrorCode::kMODEL_DESERIALIZE_FAILED || self.code() == ErrorCode::kREFIT_FAILED) { @@ -66,26 +66,45 @@ static const auto parser_error_str = [](IParserError& self) { return node_info + error_info; }; -static const auto parse = [](IParser& self, const py::buffer& model, const char* path = nullptr) { +static const auto parse = [](IParser& self, py::buffer const& model, char const* path = nullptr) { py::buffer_info info = model.request(); return self.parse(info.ptr, info.size * info.itemsize, path); }; -static const auto parse_with_weight_descriptors = [](IParser& self, const py::buffer& model) { +static const auto parse_with_weight_descriptors = [](IParser& self, py::buffer const& model) { py::buffer_info info = model.request(); return self.parseWithWeightDescriptors(info.ptr, info.size * info.itemsize); }; static const auto parseFromFile - = [](IParser& self, const std::string& model) { return self.parseFromFile(model.c_str(), 0); }; + = [](IParser& self, std::string const& model) { return self.parseFromFile(model.c_str(), 0); }; -static const auto supportsModel = [](IParser& self, const py::buffer& model, const char* path = nullptr) { +static const auto supportsModel = [](IParser& self, py::buffer const& model, char const* path = nullptr) { py::buffer_info info = model.request(); SubGraphCollection_t subgraphs; - const bool supported = self.supportsModel(info.ptr, info.size * info.itemsize, subgraphs, path); + bool const supported = self.supportsModel(info.ptr, info.size * info.itemsize, subgraphs, path); return std::make_pair(supported, subgraphs); }; +static const auto supportsModelV2 = [](IParser& self, py::buffer const& model, char const* path = nullptr) { + py::buffer_info info = model.request(); + return self.supportsModelV2(info.ptr, info.size * info.itemsize, path); +}; + +static const auto isSubgraphSupported + = [](IParser& self, int64_t const index) { return self.isSubgraphSupported(index); }; + +static const auto getSubgraphNodes = [](IParser& self, int64_t const index) { + py::list py_nodes; + int64_t nb_nodes = 0; + int64_t* nodes = self.getSubgraphNodes(index, nb_nodes); + for (int64_t i = 0; i < nb_nodes; i++) + { + py_nodes.append(nodes[i]); + } + return py_nodes; +}; + static const auto get_used_vc_plugin_libraries = [](IParser& self) { std::vector vcPluginLibs; int64_t nbPluginLibs; @@ -117,13 +136,13 @@ static const auto get_local_function_stack = [](IParserError& self) { return localFunctionStack; }; -static const auto refitFromBytes = [](IParserRefitter& self, const py::buffer& model, const char* path = nullptr) { +static const auto refitFromBytes = [](IParserRefitter& self, py::buffer const& model, char const* path = nullptr) { py::buffer_info info = model.request(); return self.refitFromBytes(info.ptr, info.size * info.itemsize, path); }; static const auto refitFromFile - = [](IParserRefitter& self, const std::string& model) { return self.refitFromFile(model.c_str()); }; + = [](IParserRefitter& self, std::string const& model) { return self.refitFromFile(model.c_str()); }; } // namespace lambdas @@ -143,6 +162,11 @@ void bindOnnx(py::module& m) py::call_guard{}) .def("supports_operator", &IParser::supportsOperator, "op_name"_a, OnnxParserDoc::supports_operator) .def("supports_model", lambdas::supportsModel, "model"_a, "path"_a = nullptr, OnnxParserDoc::supports_model) + .def("supports_model_v2", lambdas::supportsModelV2, "model"_a, "path"_a = nullptr, + OnnxParserDoc::supports_model_v2) + .def_property_readonly("num_subgraphs", &IParser::getNbSubgraphs) + .def("is_subgraph_supported", lambdas::isSubgraphSupported, "index"_a, OnnxParserDoc::is_subgraph_supported) + .def("get_subgraph_nodes", lambdas::getSubgraphNodes, "index"_a, OnnxParserDoc::get_subgraph_nodes) .def_property_readonly("num_errors", &IParser::getNbErrors) .def("get_error", &IParser::getError, "index"_a, OnnxParserDoc::get_error) .def("clear_errors", &IParser::clearErrors, OnnxParserDoc::clear_errors) diff --git a/python/src/utils.cpp b/python/src/utils.cpp index de601542..2191eadd 100644 --- a/python/src/utils.cpp +++ b/python/src/utils.cpp @@ -44,7 +44,8 @@ size_t size(nvinfer1::DataType type) case nvinfer1::DataType::kUINT8: return 1; case nvinfer1::DataType::kFP8: return 1; case nvinfer1::DataType::kBF16: return 2; - case nvinfer1::DataType::kINT4: break; // TRT-22011 - need to address sub-byte element size + case nvinfer1::DataType::kINT4: + break; // TRT-22011 - need to address sub-byte element size } return -1; } @@ -62,9 +63,10 @@ std::unique_ptr nptype(nvinfer1::DataType type) case nvinfer1::DataType::kINT64: return makeDtype("i8"); case nvinfer1::DataType::kBOOL: return makeDtype("b1"); case nvinfer1::DataType::kUINT8: return makeDtype("u1"); - case nvinfer1::DataType::kFP8: return nullptr; - case nvinfer1::DataType::kBF16: return nullptr; - case nvinfer1::DataType::kINT4: return nullptr; + case nvinfer1::DataType::kFP8: + case nvinfer1::DataType::kBF16: + case nvinfer1::DataType::kINT4: + return nullptr; } return nullptr; } diff --git a/requirements.txt b/requirements.txt index f87a9b0c..bb70e107 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -onnx -tensorflow-gpu==2.9.1; (platform_machine=="x86_64" and sys.platform=="linux" and python_version>="3.7") +onnx>=1.14.0 +tensorflow[and-cuda]==2.12.0; (platform_machine=="x86_64" and sys.platform=="linux" and python_version>="3.7") onnxruntime==1.8.1; python_version<"3.10" onnxruntime==1.12.1; python_version=="3.10" -f https://download.pytorch.org/whl/cu113/torch_stable.html diff --git a/samples/README.md b/samples/README.md index 81b8c4b8..51d4cc68 100644 --- a/samples/README.md +++ b/samples/README.md @@ -27,6 +27,7 @@ | [onnx_packnet](python/onnx_packnet) | Python | ONNX | TensorRT Inference Of ONNX Models With Custom Layers | | [simpleProgressMonitor](python/simple_progress_monitor) | Python | ONNX | Progress Monitor API usage | | [python_plugin](python/python_plugin) | Python | INetwork/ONNX | Python-based TRT plugins | +| [non_zero_plugin](python/non_zero_plugin) | Python | INetwork/ONNX | Python-based TRT plugin for NonZero op | ### 3. Application Samples | Sample | Language | Format | Description | diff --git a/samples/common/argsParser.h b/samples/common/argsParser.h index b302dc47..1f0b9025 100644 --- a/samples/common/argsParser.h +++ b/samples/common/argsParser.h @@ -43,6 +43,7 @@ struct SampleParams std::vector dataDirs; //!< Directory paths where sample data files are stored std::vector inputTensorNames; std::vector outputTensorNames; + std::string timingCacheFile; //!< Path to timing cache file }; //! @@ -69,6 +70,7 @@ struct Args std::string saveEngine; std::string loadEngine; bool rowOrder{true}; + std::string timingCacheFile; }; //! @@ -83,11 +85,12 @@ inline bool parseArgs(Args& args, int32_t argc, char* argv[]) while (1) { int32_t arg; - static struct option long_options[] = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'}, - {"int8", no_argument, 0, 'i'}, {"fp16", no_argument, 0, 'f'}, {"bf16", no_argument, 0, 'z'}, - {"columnOrder", no_argument, 0, 'c'}, {"saveEngine", required_argument, 0, 's'}, - {"loadEngine", required_argument, 0, 'o'}, {"useDLACore", required_argument, 0, 'u'}, - {"batch", required_argument, 0, 'b'}, {nullptr, 0, nullptr, 0}}; + static struct option long_options[] + = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'}, {"int8", no_argument, 0, 'i'}, + {"fp16", no_argument, 0, 'f'}, {"bf16", no_argument, 0, 'z'}, {"columnOrder", no_argument, 0, 'c'}, + {"saveEngine", required_argument, 0, 's'}, {"loadEngine", required_argument, 0, 'o'}, + {"useDLACore", required_argument, 0, 'u'}, {"batch", required_argument, 0, 'b'}, + {"timingCacheFile", required_argument, 0, 't'}, {nullptr, 0, nullptr, 0}}; int32_t option_index = 0; arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index); if (arg == -1) @@ -137,6 +140,17 @@ inline bool parseArgs(Args& args, int32_t argc, char* argv[]) args.batch = std::stoi(optarg); } break; + case 't': + if (optarg) + { + args.timingCacheFile = optarg; + } + else + { + std::cerr << "ERROR: --timingCacheFile requires option argument" << std::endl; + return false; + } + break; default: return false; } } diff --git a/samples/common/common.h b/samples/common/common.h index 0324d2fb..d5c3711c 100644 --- a/samples/common/common.h +++ b/samples/common/common.h @@ -18,7 +18,9 @@ #ifndef TENSORRT_COMMON_H #define TENSORRT_COMMON_H #include "NvInfer.h" +#if !TRT_WINML #include "NvInferPlugin.h" +#endif #include "logger.h" #include "safeCommon.h" #include "utils/timingCache.h" @@ -201,6 +203,7 @@ struct SimpleProfiler : public nvinfer1::IProfiler namespace samplesCommon { using nvinfer1::utils::loadTimingCacheFile; +using nvinfer1::utils::buildTimingCacheFromFile; using nvinfer1::utils::saveTimingCacheFile; using nvinfer1::utils::updateTimingCacheFile; // Swaps endianness of an integral type. @@ -295,7 +298,7 @@ struct InferDeleter }; template -using SampleUniquePtr = std::unique_ptr; +using SampleUniquePtr = std::unique_ptr; static auto StreamDeleter = [](cudaStream_t* pStream) { if (pStream) @@ -547,7 +550,8 @@ inline uint32_t getElementSize(nvinfer1::DataType t) noexcept case nvinfer1::DataType::kUINT8: case nvinfer1::DataType::kINT8: case nvinfer1::DataType::kFP8: return 1; - case nvinfer1::DataType::kINT4: ASSERT(false && "Element size is not implemented for sub-byte data-types (INT4)"); + case nvinfer1::DataType::kINT4: + ASSERT(false && "Element size is not implemented for sub-byte data-types"); } return 0; } @@ -894,11 +898,9 @@ inline int32_t getMaxPersistentCacheSize() int32_t deviceIndex{}; CHECK(cudaGetDevice(&deviceIndex)); - int32_t maxPersistentL2CacheSize; -#if CUDART_VERSION >= 11030 + int32_t maxPersistentL2CacheSize{}; +#if CUDART_VERSION >= 11030 && !TRT_WINML CHECK(cudaDeviceGetAttribute(&maxPersistentL2CacheSize, cudaDevAttrMaxPersistingL2CacheSize, deviceIndex)); -#else - maxPersistentL2CacheSize = 0; #endif return maxPersistentL2CacheSize; diff --git a/samples/common/safeCommon.h b/samples/common/safeCommon.h index 4cc87a70..5d9a30bc 100644 --- a/samples/common/safeCommon.h +++ b/samples/common/safeCommon.h @@ -164,7 +164,7 @@ inline uint32_t elementSize(nvinfer1::DataType t) case nvinfer1::DataType::kBOOL: case nvinfer1::DataType::kFP8: return 1; case nvinfer1::DataType::kINT4: - SAFE_ASSERT(false && "Element size is not implemented for sub-byte data-types (INT4)"); + SAFE_ASSERT(false && "Element size is not implemented for sub-byte data-types"); } return 0; } @@ -202,12 +202,18 @@ inline int64_t volume(nvinfer1::Dims dims, int32_t vecDim, int32_t comps, int32_ inline int32_t getSMVersion() { - int32_t deviceIndex = 0; +#if 0 + // Use default value for 4090 + int32_t major{8}; + int32_t minor{9}; +#else + int32_t major{}; + int32_t minor{}; + int32_t deviceIndex{}; CHECK(cudaGetDevice(&deviceIndex)); - int32_t major, minor; CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); - +#endif return ((major << 8) | minor); } diff --git a/samples/common/sampleDevice.cpp b/samples/common/sampleDevice.cpp index 235ad9f0..7964aeb5 100644 --- a/samples/common/sampleDevice.cpp +++ b/samples/common/sampleDevice.cpp @@ -54,6 +54,7 @@ std::string getUuidString(cudaUUID_t uuid) void setCudaDevice(int32_t device, std::ostream& os) { +#if !TRT_WINML os << "=== Device Information ===" << std::endl; // Get the number of visible GPUs. @@ -112,6 +113,7 @@ void setCudaDevice(int32_t device, std::ostream& os) os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " << "currently running at." << std::endl; // clang-format on +#endif } int32_t getCudaDriverVersion() diff --git a/samples/common/sampleDevice.h b/samples/common/sampleDevice.h index 5e62f6d0..add2cbf5 100644 --- a/samples/common/sampleDevice.h +++ b/samples/common/sampleDevice.h @@ -235,16 +235,18 @@ class TrtCudaBuffer TrtCudaBuffer(TrtCudaBuffer&& rhs) { - reset(rhs.mPtr); + reset(rhs.mPtr, rhs.mSize); rhs.mPtr = nullptr; + rhs.mSize = 0; } TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) { if (this != &rhs) { - reset(rhs.mPtr); + reset(rhs.mPtr, rhs.mSize); rhs.mPtr = nullptr; + rhs.mSize = 0; } return *this; } @@ -257,21 +259,24 @@ class TrtCudaBuffer TrtCudaBuffer(size_t size) { A()(&mPtr, size); + mSize = size; } void allocate(size_t size) { reset(); A()(&mPtr, size); + mSize = size; } - void reset(void* ptr = nullptr) + void reset(void* ptr = nullptr, size_t size = 0) { if (mPtr) { D()(mPtr); } mPtr = ptr; + mSize = size; } void* get() const @@ -279,8 +284,14 @@ class TrtCudaBuffer return mPtr; } + size_t getSize() const + { + return mSize; + } + private: void* mPtr{nullptr}; + size_t mSize{0}; }; struct DeviceAllocator diff --git a/samples/common/sampleEngines.cpp b/samples/common/sampleEngines.cpp index b39d513b..cb7521a8 100644 --- a/samples/common/sampleEngines.cpp +++ b/samples/common/sampleEngines.cpp @@ -119,10 +119,12 @@ nvinfer1::ICudaEngine* LazilyDeserializedEngine::get() mRuntime->setDLACore(mDLACore); } mRuntime->setErrorRecorder(&gRecorder); +#if !TRT_WINML for (auto const& pluginPath : mDynamicPlugins) { mRuntime->getPluginRegistry().loadLibrary(pluginPath.c_str()); } +#endif if (getFileReader().isOpen()) { @@ -241,17 +243,20 @@ Parser modelToNetwork(ModelOptions const& model, BuildOptions const& build, nvin using namespace nvonnxparser; parser.onnxParser.reset(createONNXParser(network)); ASSERT(parser.onnxParser != nullptr); +#if !TRT_WINML // kNATIVE_INSTANCENORM is ON by default in the parser and must be cleared to use the plugin implementation. if (build.pluginInstanceNorm) { parser.onnxParser->clearFlag(OnnxParserFlag::kNATIVE_INSTANCENORM); } +#endif if (!parser.onnxParser->parseFromFile( model.baseModel.model.c_str(), static_cast(sample::gLogger.getReportableSeverity()))) { err << "Failed to parse onnx file" << std::endl; parser.onnxParser.reset(); } +#if !TRT_WINML if (vcPluginLibrariesUsed && parser.onnxParser.get()) { int64_t nbPluginLibs; @@ -271,6 +276,7 @@ Parser modelToNetwork(ModelOptions const& model, BuildOptions const& build, nvin << std::endl; } } +#endif break; } case ModelFormat::kANY: break; @@ -621,7 +627,9 @@ void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) { - auto const roundToBytes = [](double const sizeInMB) { return static_cast(sizeInMB * (1 << 20)); }; + auto const roundToBytes = [](double const size, bool fromMB = true) { + return static_cast(size * (fromMB ? 1.0_MiB : 1.0_KiB)); + }; if (build.workspace >= 0) { config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace)); @@ -654,15 +662,7 @@ void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) } if (build.tacticSharedMem >= 0) { - if (build.tacticSharedMem >= 0.046 && build.tacticSharedMem <= 0.047) - { - // 48KB is a common use case but user might not type the exact number 0.046875MB. - config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, 48 << 10); - } - else - { - config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, roundToBytes(build.tacticSharedMem)); - } + config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, roundToBytes(build.tacticSharedMem, false)); } } @@ -730,28 +730,6 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, input->setType(build.inputFormats[inputFormatIndex].first); input->setAllowedFormats(build.inputFormats[inputFormatIndex].second); } - else - { - switch (input->getType()) - { - case DataType::kINT32: - case DataType::kINT64: - case DataType::kBOOL: - case DataType::kHALF: - case DataType::kUINT8: - case DataType::kBF16: - // Leave these as is. - break; - case DataType::kFLOAT: - case DataType::kINT8: - // User did not specify a floating-point format. Default to kFLOAT. - input->setType(DataType::kFLOAT); - break; - case DataType::kFP8: ASSERT(false && "FP8 is not supported"); - case DataType::kINT4: ASSERT(false && "INT4 is not supported"); - } - input->setAllowedFormats(1U << static_cast(TensorFormat::kLINEAR)); - } auto const dims = input->getDimensions(); auto const isScalar = dims.nbDims == 0; @@ -889,10 +867,6 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, output->setType(build.outputFormats[outputFormatIndex].first); output->setAllowedFormats(build.outputFormats[outputFormatIndex].second); } - else - { - output->setAllowedFormats(1U << static_cast(TensorFormat::kLINEAR)); - } } setMemoryPoolLimits(config, build); @@ -939,7 +913,7 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, { config.setFlag(BuilderFlag::kVERSION_COMPATIBLE); } - +#if !TRT_WINML std::vector pluginPaths; for (auto const& pluginPath : sys.setPluginsToSerialize) { @@ -950,7 +924,7 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, { config.setPluginsToSerialize(pluginPaths.data(), pluginPaths.size()); } - +#endif if (build.excludeLeanRuntime) { config.setFlag(BuilderFlag::kEXCLUDE_LEAN_RUNTIME); @@ -988,6 +962,11 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, config.setFlag(BuilderFlag::kFP8); } + if (build.int4) + { + config.setFlag(BuilderFlag::kINT4); + } + if (build.int8 && !build.fp16) { sample::gLogInfo @@ -1213,14 +1192,12 @@ bool networkToSerializedEngine( setupNetworkAndConfig(build, sys, builder, *env.network, *config, calibrator, err, sparseWeights), "Network And Config setup failed", false, err); - std::unique_ptr timingCache{nullptr}; + std::unique_ptr timingCache{}; // Try to load cache from file. Create a fresh cache if the file doesn't exist if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { - std::vector loadedCache = samplesCommon::loadTimingCacheFile(gLogger, build.timingCacheFile); - timingCache.reset(config->createTimingCache(static_cast(loadedCache.data()), loadedCache.size())); - SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", false, err); - config->setTimingCache(*timingCache, false); + timingCache + = samplesCommon::buildTimingCacheFromFile(gLogger.getTRTLogger(), *config, build.timingCacheFile, err); } // CUDA stream used for profiling by the builder. @@ -1250,7 +1227,7 @@ bool networkToSerializedEngine( if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { auto timingCache = config->getTimingCache(); - samplesCommon::updateTimingCacheFile(gLogger, build.timingCacheFile, timingCache, builder); + samplesCommon::updateTimingCacheFile(gLogger.getTRTLogger(), build.timingCacheFile, timingCache, builder); } return true; @@ -1268,10 +1245,12 @@ bool modelToBuildEnv( auto networkFlags = (build.stronglyTyped) ? 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED) : 0U; +#if !TRT_WINML for (auto const& pluginPath : sys.dynamicPlugins) { env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str()); } +#endif env.network.reset(env.builder->createNetworkV2(networkFlags)); std::vector vcPluginLibrariesUsed; @@ -1280,6 +1259,7 @@ bool modelToBuildEnv( = modelToNetwork(model, build, *env.network, err, build.versionCompatible ? &vcPluginLibrariesUsed : nullptr); SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, err); +#if !TRT_WINML if (build.versionCompatible && !sys.ignoreParsedPluginLibs && !vcPluginLibrariesUsed.empty()) { sample::gLogInfo << "The following plugin libraries were identified by the parser as required for a " @@ -1308,6 +1288,7 @@ bool modelToBuildEnv( sample::gLogInfo << "Use --ignoreParsedPluginLibs to disable this behavior." << std::endl; } +#endif SMP_RETVAL_IF_FALSE( networkToSerializedEngine(build, sys, *env.builder, env, err), "Building engine failed", false, err); diff --git a/samples/common/sampleInference.cpp b/samples/common/sampleInference.cpp index 024dd6f6..dc3aa1c2 100644 --- a/samples/common/sampleInference.cpp +++ b/samples/common/sampleInference.cpp @@ -235,10 +235,10 @@ bool allocateContextMemory(InferenceEnvironment& iEnv, InferenceOptions const& i sample::gLogError << "Unrecognizable memory allocation strategy." << std::endl; return false; } - iEnv.deviceMemory.at(i) = std::move(TrtDeviceBuffer(sizeToAlloc)); - ec->setDeviceMemory(iEnv.deviceMemory.at(i).get()); + iEnv.deviceMemory.at(i) = TrtDeviceBuffer(sizeToAlloc); + ec->setDeviceMemoryV2(iEnv.deviceMemory.at(i).get(), iEnv.deviceMemory.at(i).getSize()); sample::gLogInfo << "Maximum device memory size across all profiles: " - << (engine->getDeviceMemorySize() / 1.0_MiB) << " MiB" << std::endl; + << (engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl; sample::gLogInfo << "Only allocated device memory enough for " << allocReason << ": " << (sizeToAlloc / 1.0_MiB) << " MiB" << std::endl; } @@ -249,14 +249,19 @@ bool allocateContextMemory(InferenceEnvironment& iEnv, InferenceOptions const& i bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system) { +#if TRT_WINML + int32_t const isIntegrated{}; +#else int32_t device{}; cudaCheck(cudaGetDevice(&device)); cudaDeviceProp properties; cudaCheck(cudaGetDeviceProperties(&properties, device)); + int32_t const isIntegrated{properties.integrated}; +#endif // Use managed memory on integrated devices when transfers are skipped // and when it is explicitly requested on the commandline. - bool useManagedMemory{(inference.skipTransfers && properties.integrated) || inference.useManaged}; + bool useManagedMemory{(inference.skipTransfers && isIntegrated) || inference.useManaged}; using FillSafeBindings = FillBindingClosure; if (iEnv.safe) { @@ -302,15 +307,24 @@ bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inferenc { auto const& budget = inference.weightStreamingBudget; int64_t wsBudget = budget.bytes; - if (budget.percent != WeightStreamingBudget::kDISABLE) + if (budget.percent != 100.0) { double const percent = budget.percent; - ASSERT(percent > 0.0); - auto const min = engine->getMinimumWeightStreamingBudget(); + ASSERT(percent < 100.0); auto const max = engine->getStreamableWeightsSize(); - wsBudget = (max >= min) ? (1 - percent / 100) * (max - min) + min : WeightStreamingBudget::kDISABLE; + wsBudget = (max >= 0) ? (percent / 100) * (max) : WeightStreamingBudget::kDISABLE; } - bool success = engine->setWeightStreamingBudget(wsBudget); + + if (wsBudget == WeightStreamingBudget::kDISABLE) + { + wsBudget = engine->getStreamableWeightsSize(); + } + else if (wsBudget == WeightStreamingBudget::kAUTOMATIC) + { + wsBudget = engine->getWeightStreamingAutomaticBudget(); + } + ASSERT(wsBudget >= 0); + bool success = engine->setWeightStreamingBudgetV2(wsBudget); SMP_RETVAL_IF_FALSE(success, "Failed to set weight streaming limit!", false, sample::gLogError); switch (wsBudget) { @@ -1277,10 +1291,12 @@ bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys) auto& reader = iEnv.engine.getFileReader(); reader.reset(); ASSERT(reader.isOpen()); +#if !TRT_WINML for (auto const& pluginPath : sys.dynamicPlugins) { rt->getPluginRegistry().loadLibrary(pluginPath.c_str()); } +#endif engine.reset(rt->deserializeCudaEngine(reader)); deserializeOK = (engine != nullptr); } diff --git a/samples/common/sampleInference.h b/samples/common/sampleInference.h index e8e53bb7..af8fc181 100644 --- a/samples/common/sampleInference.h +++ b/samples/common/sampleInference.h @@ -18,6 +18,7 @@ #ifndef TRT_SAMPLE_INFERENCE_H #define TRT_SAMPLE_INFERENCE_H +#include "sampleDevice.h" #include "sampleEngines.h" #include "sampleReporting.h" #include "sampleUtils.h" @@ -29,7 +30,6 @@ #include #include -#include "NvInfer.h" #include "NvInferSafeRuntime.h" namespace sample diff --git a/samples/common/sampleOptions.cpp b/samples/common/sampleOptions.cpp index 7f2bd9f1..1532b66e 100644 --- a/samples/common/sampleOptions.cpp +++ b/samples/common/sampleOptions.cpp @@ -44,6 +44,12 @@ static const std::map> kUNIT_MULTIPLIERS{ {'G', {1 << 30, "Gibibytes"}}, }; +std::string addDefaultUnitSuffixIfNotSpecified(std::string const& option, char defaultUnit) +{ + char lastChar = option.at(option.size() - 1); + return std::isdigit(lastChar) ? option + defaultUnit : option; +} + // Returns "B (Bytes), K (Kilobytes), ..." std::string getAvailableUnitSuffixes() { @@ -141,7 +147,7 @@ nvinfer1::DataType stringToValue(const std::string& option) const std::unordered_map strToDT{{"fp32", nvinfer1::DataType::kFLOAT}, {"fp16", nvinfer1::DataType::kHALF}, {"bf16", nvinfer1::DataType::kBF16}, {"int8", nvinfer1::DataType::kINT8}, {"fp8", nvinfer1::DataType::kFP8}, {"int32", nvinfer1::DataType::kINT32}, {"int64", nvinfer1::DataType::kINT64}, - {"bool", nvinfer1::DataType::kBOOL}, {"uint8", nvinfer1::DataType::kUINT8}}; + {"bool", nvinfer1::DataType::kBOOL}, {"uint8", nvinfer1::DataType::kUINT8}, {"int4", nvinfer1::DataType::kINT4}}; const auto& dt = strToDT.find(option); if (dt == strToDT.end()) { @@ -245,11 +251,11 @@ WeightStreamingBudget stringToValue(std::string const& op else { double bytes = stringToValue(option); - if (!(bytes == WeightStreamingBudget::kAUTOMATIC || bytes >= WeightStreamingBudget::kDISABLE)) + if (!(bytes == WeightStreamingBudget::kAUTOMATIC || bytes == WeightStreamingBudget::kDISABLE || bytes >= 0)) { std::ostringstream err; - err << "The weight streaming budget must be " << WeightStreamingBudget::kAUTOMATIC << " or at least " - << WeightStreamingBudget::kDISABLE << "."; + err << "The weight streaming budget must be " << WeightStreamingBudget::kDISABLE << ", " + << WeightStreamingBudget::kAUTOMATIC << ", or at least 0."; throw std::invalid_argument(err.str()); } budget.bytes = static_cast(bytes); @@ -803,6 +809,10 @@ std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) { os << "+FP8"; } + if (options.int4) + { + os << "+INT4"; + } if (options.stronglyTyped) { os << " (Strongly Typed)"; @@ -857,10 +867,10 @@ std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) { - auto const printValueOrDefault = [&os](double const val) { + auto const printValueOrDefault = [&os](double const val, char const* unit = "MiB") { if (val >= 0) { - os << val << " MiB"; + os << val << " " << unit; } else { @@ -880,7 +890,7 @@ std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) printValueOrDefault(options.dlaGlobalDRAM); os << ", "; os << "tacticSharedMem: "; - printValueOrDefault(options.tacticSharedMem); + printValueOrDefault(options.tacticSharedMem, "KiB"); return os; } @@ -1117,7 +1127,9 @@ void BuildOptions::parse(Arguments& arguments) double memPoolSize; try { - std::tie(memPoolName, memPoolSize) = splitNameAndValue(memPoolSpec); + std::string strPoolSize; + std::tie(memPoolName, strPoolSize) = splitNameAndValue(memPoolSpec); + memPoolSize = stringToValue(addDefaultUnitSuffixIfNotSpecified(strPoolSize, 'M')); } catch (std::invalid_argument const& arg) { @@ -1132,23 +1144,28 @@ void BuildOptions::parse(Arguments& arguments) } if (memPoolName == "workspace") { - workspace = memPoolSize; + // use unit in MB. + workspace = memPoolSize / 1.0_MiB; } else if (memPoolName == "dlaSRAM") { - dlaSRAM = memPoolSize; + // use unit in MB. + dlaSRAM = memPoolSize / 1.0_MiB; } else if (memPoolName == "dlaLocalDRAM") { - dlaLocalDRAM = memPoolSize; + // use unit in MB. + dlaLocalDRAM = memPoolSize / 1.0_MiB; } else if (memPoolName == "dlaGlobalDRAM") { - dlaGlobalDRAM = memPoolSize; + // use unit in MB. + dlaGlobalDRAM = memPoolSize / 1.0_MiB; } else if (memPoolName == "tacticSharedMem") { - tacticSharedMem = memPoolSize; + // use unit in KB. + tacticSharedMem = memPoolSize / 1.0_KiB; } else if (!memPoolName.empty()) { @@ -1177,6 +1194,14 @@ void BuildOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--weightless", stripWeights); getAndDelOption(arguments, "--stripWeights", stripWeights); + bool stripAllWeights{}; + getAndDelOption(arguments, "--stripAllWeights", stripAllWeights); + if (stripAllWeights) + { + refittable = true; + stripWeights = true; + } + // --vc and --versionCompatible are synonyms getAndDelOption(arguments, "--vc", versionCompatible); if (!versionCompatible) @@ -1184,12 +1209,14 @@ void BuildOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--versionCompatible", versionCompatible); } +#if !TRT_WINML // --pi and --pluginInstanceNorm are synonyms getAndDelOption(arguments, "--pi", pluginInstanceNorm); if (!pluginInstanceNorm) { getAndDelOption(arguments, "--pluginInstanceNorm", pluginInstanceNorm); } +#endif getAndDelOption(arguments, "--excludeLeanRuntime", excludeLeanRuntime); getAndDelOption(arguments, "--noCompilationCache", disableCompilationCache); @@ -1198,6 +1225,7 @@ void BuildOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--bf16", bf16); getAndDelOption(arguments, "--int8", int8); getAndDelOption(arguments, "--fp8", fp8); + getAndDelOption(arguments, "--int4", int4); getAndDelOption(arguments, "--stronglyTyped", stronglyTyped); if (stronglyTyped) { @@ -1214,6 +1242,7 @@ void BuildOptions::parse(Arguments& arguments) disableAndLog(int8, "int8", "kINT8"); disableAndLog(bf16, "bf16", "kBF16"); disableAndLog(fp8, "fp8", "kFP8"); + disableAndLog(int4, "int4", "kINT4"); } if (fp8 && int8) @@ -1515,6 +1544,7 @@ void SystemOptions::parse(Arguments& arguments) { getAndDelOption(arguments, "--device", device); getAndDelOption(arguments, "--useDLACore", DLACore); +#if !TRT_WINML std::string pluginName; while (getAndDelOption(arguments, "--plugins", pluginName)) { @@ -1534,6 +1564,7 @@ void SystemOptions::parse(Arguments& arguments) dynamicPlugins.emplace_back(pluginName); } getAndDelOption(arguments, "--ignoreParsedPluginLibs", ignoreParsedPluginLibs); +#endif } constexpr int64_t WeightStreamingBudget::kDISABLE; @@ -1784,6 +1815,7 @@ void SafeBuilderOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--calib", calibFile); getAndDelOption(arguments, "--consistency", consistency); getAndDelOption(arguments, "--std", standard); +#if !TRT_WINML std::string pluginName; while (getAndDelOption(arguments, "--plugins", pluginName)) { @@ -1794,6 +1826,7 @@ void SafeBuilderOptions::parse(Arguments& arguments) { plugins.emplace_back(pluginName); } +#endif bool noBuilderCache{false}; getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); @@ -2078,7 +2111,9 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) "Refit: " << boolToEnabled(options.refittable) << std::endl << "Strip weights: " << boolToEnabled(options.stripWeights) << std::endl << "Version Compatible: " << boolToEnabled(options.versionCompatible) << std::endl << +#if !TRT_WINML "ONNX Plugin InstanceNorm: " << boolToEnabled(options.pluginInstanceNorm) << std::endl << +#endif "TensorRT runtime: " << options.useRuntime << std::endl << "Lean DLL Path: " << options.leanDLLPath << std::endl << "Tempfile Controls: "; printTempfileControls(os, options.tempfileControls) << std::endl << @@ -2138,6 +2173,7 @@ std::ostream& operator<<(std::ostream& os, const SystemOptions& options) "Device: " << options.device << std::endl << "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << std::endl; +#if !TRT_WINML os << "Plugins:"; for (const auto& p : options.plugins) @@ -2164,7 +2200,7 @@ std::ostream& operator<<(std::ostream& os, const SystemOptions& options) os << "ignoreParsedPluginLibs: " << options.ignoreParsedPluginLibs << std::endl; os << std::endl; - +#endif return os; // clang-format on } @@ -2286,19 +2322,23 @@ std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) { os << " + FP8"; } + if (options.int4) + { + os << " + INT4"; + } os << std::endl; os << "Calibration file: " << options.calibFile << std::endl; os << "Serialized Network: " << options.serialized << std::endl; printIOFormats(os, "Input(s)", options.inputFormats); printIOFormats(os, "Output(s)", options.outputFormats); - +#if !TRT_WINML os << "Plugins:"; for (const auto& p : options.plugins) { os << " " << p; } - +#endif os << "timingCacheMode: "; printTimingCache(os, options.timingCacheMode) << std::endl; os << "timingCacheFile: " << options.timingCacheFile << std::endl; @@ -2356,11 +2396,13 @@ void BuildOptions::help(std::ostream& os) R"( type ::= "fp32"|"fp16"|"bf16"|"int32"|"int64"|"int8"|"uint8"|"bool")" "\n" R"( fmt ::= ("chw"|"chw2"|"chw4"|"hwc8"|"chw16"|"chw32"|"dhwc8"|)" "\n" R"( "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4")["+"fmt])" "\n" - " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s) in MiB." "\n" - " Note: Also accepts decimal sizes, e.g. 0.25MiB. Will be rounded down to the nearest integer bytes." "\n" + " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s)" "\n" + " Supports the following base-2 suffixes: " << getAvailableUnitSuffixes() << "." "\n" + " If none of suffixes is appended, the defualt unit is in MiB." "\n" + " Note: Also accepts decimal sizes, e.g. 0.25M. Will be rounded down to the nearest integer bytes." "\n" " In particular, for dlaSRAM the bytes will be rounded down to the nearest power of 2." "\n" R"( Pool constraint: poolspec ::= poolfmt[","poolspec])" "\n" - " poolfmt ::= pool:sizeInMiB" "\n" + " poolfmt ::= pool:size" "\n" R"( pool ::= "workspace"|"dlaSRAM"|"dlaLocalDRAM"|"dlaGlobalDRAM"|"tacticSharedMem")" "\n" " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)." "\n" " Please only assign once." "\n" @@ -2371,11 +2413,16 @@ void BuildOptions::help(std::ostream& os) " --stripWeights Strip weights from plan. This flag works with either refit or refit with identical weights. Default""\n" " to latter, but you can switch to the former by enabling both --stripWeights and --refit at the same""\n" " time." "\n" + " --stripAllWeights Alias for combining the --refit and --stripWeights options. It marks all weights as refittable," "\n" + " disregarding any performance impact. Additionally, it strips all refittable weights after the " "\n" + " engine is built." "\n" " --weightless [Deprecated] this knob has been deprecated. Please use --stripWeights" "\n" " --versionCompatible, --vc Mark the engine as version compatible. This allows the engine to be used with newer versions" "\n" " of TensorRT on the same host OS, as well as TensorRT's dispatch and lean runtimes." "\n" +#if !TRT_WINML " --pluginInstanceNorm, --pi Set `kNATIVE_INSTANCENORM` to false in the ONNX parser. This will cause the ONNX parser to use" "\n" " a plugin InstanceNorm implementation over the native implementation when parsing." "\n" +#endif R"( --useRuntime=runtime TensorRT runtime to execute engine. "lean" and "dispatch" require loading VC engine and do)" "\n" " not support building an engine." "\n" R"( runtime::= "full"|"lean"|"dispatch")" "\n" @@ -2383,7 +2430,6 @@ void BuildOptions::help(std::ostream& os) " --excludeLeanRuntime When --versionCompatible is enabled, this flag indicates that the generated engine should" "\n" " not include an embedded lean runtime. If this is set, the user must explicitly specify a" "\n" " valid lean runtime to use when loading the engine." "\n" - " Only supported with weights within the engine." "\n" " --sparsity=spec Control sparsity (default = disabled). " "\n" R"( Sparsity: spec ::= "disable", "enable", "force")" "\n" " Note: Description about each of these options is as below" "\n" @@ -2399,6 +2445,7 @@ void BuildOptions::help(std::ostream& os) " --bf16 Enable bf16 precision, in addition to fp32 (default = disabled)" "\n" " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" " --fp8 Enable fp8 precision, in addition to fp32 (default = disabled)" "\n" + " --int4 Enable int4 precision, in addition to fp32 (default = disabled)" "\n" " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" " --stronglyTyped Create a strongly typed network. (default = disabled)" "\n" " --directIO Avoid reformatting at network boundaries. (default = disabled)" "\n" @@ -2499,12 +2546,16 @@ void SystemOptions::help(std::ostream& os) os << "=== System Options ===" << std::endl << " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << +#if TRT_WINML + std::endl; +#else " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << " --dynamicPlugins Plugin library (.so) to load dynamically and may be serialized with the engine if they are included in --setPluginsToSerialize (can be specified multiple times)" << std::endl << " --setPluginsToSerialize Plugin library (.so) to be serialized with the engine (can be specified multiple times)" << std::endl << " --ignoreParsedPluginLibs By default, when building a version-compatible engine, plugin libraries specified by the ONNX parser " << std::endl << " are implicitly serialized with the engine (unless --excludeLeanRuntime is specified) and loaded dynamically. " << std::endl << " Enable this flag to ignore these plugin libraries instead." << std::endl; +#endif // clang-format on } @@ -2569,12 +2620,12 @@ void InferenceOptions::help(std::ostream& os) R"( Ival ::= name":"file)" << std::endl << " --weightStreamingBudget Set the maximum amount of GPU memory TensorRT is allowed to use for weights." << std::endl << " It can take on the following values:" << std::endl << + " -2: (default) Disable weight streaming at runtime." << std::endl << " -1: TensorRT will automatically decide the budget." << std::endl << - " 0: (default) Disable weight streaming at runtime." << std::endl << - " 0-100%: Percentage of streamable weights that should be streamed." << std::endl << - " 100% saves the most memory but will have the worst performance." << std::endl << + " 0-100%: Percentage of streamable weights that reside on the GPU." << std::endl << + " 0% saves the most memory but will have the worst performance." << std::endl << " Requires the % character." << std::endl << - " >0B: The exact amount of streambale weights that reside on the GPU. Supports the " << std::endl << + " >=0B: The exact amount of streamable weights that reside on the GPU. Supports the " << std::endl << " following base-2 suffixes: " << getAvailableUnitSuffixes() << "." << std::endl; // clang-format on } @@ -2672,7 +2723,9 @@ void SafeBuilderOptions::printHelp(std::ostream& os) " --std Build standard serialized engine, (default = disabled)" << std::endl << " --calib= Read INT8 calibration cache file" << std::endl << " --serialized= Save the serialized network" << std::endl << +#if !TRT_WINML " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << +#endif " --verbose or -v Use verbose logging (default = false)" << std::endl << " --help or -h Print this message" << std::endl << " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" << std::endl << diff --git a/samples/common/sampleOptions.h b/samples/common/sampleOptions.h index cddbc60d..3c00ffa3 100644 --- a/samples/common/sampleOptions.h +++ b/samples/common/sampleOptions.h @@ -147,10 +147,10 @@ using StringSet = std::unordered_set; class WeightStreamingBudget { public: - static constexpr int64_t kDISABLE{0}; + static constexpr int64_t kDISABLE{-2}; static constexpr int64_t kAUTOMATIC{-1}; int64_t bytes{kDISABLE}; - double percent{static_cast(kDISABLE)}; + double percent{static_cast(100.0)}; bool isDisabled() { @@ -198,10 +198,15 @@ constexpr nvinfer1::TempfileControlFlags getTempfileControlDefaults() class BuildOptions : public Options { public: + // Unit in MB. double workspace{-1.0}; + // Unit in MB. double dlaSRAM{-1.0}; + // Unit in MB. double dlaLocalDRAM{-1.0}; + // Unit in MB. double dlaGlobalDRAM{-1.0}; + // Unit in KB. double tacticSharedMem{-1.0}; int32_t avgTiming{defaultAvgTiming}; size_t calibProfile{defaultOptProfileIndex}; @@ -210,6 +215,7 @@ class BuildOptions : public Options bool bf16{false}; bool int8{false}; bool fp8{false}; + bool int4{false}; bool stronglyTyped{false}; bool directIO{false}; PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE}; @@ -349,6 +355,7 @@ class SafeBuilderOptions : public Options std::vector outputFormats; bool int8{false}; bool fp8{false}; + bool int4{false}; std::string calibFile{}; std::vector plugins; bool consistency{false}; diff --git a/samples/common/sampleReporting.h b/samples/common/sampleReporting.h index c6813fe6..74f510cf 100644 --- a/samples/common/sampleReporting.h +++ b/samples/common/sampleReporting.h @@ -22,12 +22,7 @@ #include #include -#include "NvInfer.h" - -#include "sampleDevice.h" -#include "sampleInference.h" #include "sampleOptions.h" -#include "sampleUtils.h" namespace sample { diff --git a/samples/common/sampleUtils.cpp b/samples/common/sampleUtils.cpp index 522cde65..689e5857 100644 --- a/samples/common/sampleUtils.cpp +++ b/samples/common/sampleUtils.cpp @@ -37,7 +37,8 @@ size_t dataTypeSize(nvinfer1::DataType dataType) case nvinfer1::DataType::kUINT8: case nvinfer1::DataType::kINT8: case nvinfer1::DataType::kFP8: return 1U; - case nvinfer1::DataType::kINT4: ASSERT(false && "Element size is not implemented for sub-byte data-types (INT4)"); + case nvinfer1::DataType::kINT4: + ASSERT(false && "Element size is not implemented for sub-byte data-types."); } return 0; } @@ -398,8 +399,9 @@ void sparsify(Weights const& weights, int32_t k, int32_t trs, std::vector=10.0.0 git+https://github.com/facebookresearch/detectron2.git @@ -7,5 +7,5 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/efficientdet/requirements.txt b/samples/python/efficientdet/requirements.txt index 4a29d8eb..e69a0278 100644 --- a/samples/python/efficientdet/requirements.txt +++ b/samples/python/efficientdet/requirements.txt @@ -1,10 +1,10 @@ Pillow>=10.0.0 -onnx==1.14.0 +onnx==1.16.0 onnxruntime==1.15.1 tf2onnx==1.8.1 cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/efficientnet/requirements.txt b/samples/python/efficientnet/requirements.txt index d1c21fbe..74c92ba5 100644 --- a/samples/python/efficientnet/requirements.txt +++ b/samples/python/efficientnet/requirements.txt @@ -1,10 +1,10 @@ Pillow>=10.0.0 -onnx==1.14.0 +onnx==1.16.0 tensorrt>=7.1.0.0 tf2onnx==1.8.1 cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/engine_refit_onnx_bidaf/requirements.txt b/samples/python/engine_refit_onnx_bidaf/requirements.txt index 8b4d45ea..cdb0f837 100644 --- a/samples/python/engine_refit_onnx_bidaf/requirements.txt +++ b/samples/python/engine_refit_onnx_bidaf/requirements.txt @@ -4,5 +4,5 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/introductory_parser_samples/requirements.txt b/samples/python/introductory_parser_samples/requirements.txt index a9ca7fb0..eaf6990e 100644 --- a/samples/python/introductory_parser_samples/requirements.txt +++ b/samples/python/introductory_parser_samples/requirements.txt @@ -3,5 +3,5 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/network_api_pytorch_mnist/requirements.txt b/samples/python/network_api_pytorch_mnist/requirements.txt index 70b77d3f..a9da4c91 100644 --- a/samples/python/network_api_pytorch_mnist/requirements.txt +++ b/samples/python/network_api_pytorch_mnist/requirements.txt @@ -13,5 +13,5 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/non_zero_plugin/README.md b/samples/python/non_zero_plugin/README.md new file mode 100644 index 00000000..8d6afcc5 --- /dev/null +++ b/samples/python/non_zero_plugin/README.md @@ -0,0 +1,104 @@ +# Python-based NonZero Plugin for TensorRT using IPluginV3 + +## Description + +This sample, `non_zero_plugin`, implements a Python-based plugin for the NonZero operation, configurable to use a `CUDA Python` or `PyTorch` backend. + +NonZero is an operation where the non-zero indices of the input tensor is found. + +## How does this sample work? + +This sample creates and runs a TensorRT engine built from a network containing a single NonZeroPlugin node. It demonstrates how +custom layers with data-dependent output shapes can be implemented and added to a TensorRT network using Python. + +### Implementing a NonZero plugin using IPluginV3 interface + +Until `IPluginV3` (and associated interfaces), TensorRT plugins could not have outputs whose shapes depended on the input values (they could only depend +on input shapes). `IPluginV3OneBuild` which exposes a build capability for `IPluginV3`, provides support for such data-dependent output shapes. + +`NonZeroPlugin` in this sample is written to handle 2-D input tensors of shape $R \times C$. Assume that the tensor contains $K$ non-zero elements and that the +non-zero indices are required in a row ordering (each set of indices in its own row). Then the output shape would be $K \times 2$. + +The output shapes are expressed to the TensorRT builder through the `IPluginV3OneBuild.get_output_shapes()` API. Expressing the second dimension of the output is +straightforward: +``` +# output_dims[0] = trt.DimsExprs(2) +output_dims[0][1] = exprBuilder.constant(2) +``` + +The extent of each data-dependent dimension in the plugin must be expressed in terms of a *_size tensor_*. A size tensor is a scalar output of type +`trt.int32` or `trt.int64` that must be added as one of the plugin outputs. In this case, it is sufficient to declare one size tensor to denote the extent of the +first dimension of the non-zero indices output. To declare a size tensor, one must provide an upper-bound and optimum value for its extent as `IDimensionExpr`s. These can be formed through the `IExprBuilder` argument passed to the `IPluginV3OneBuild.get_output_shapes()` method. + - For unknown inputs, the upper-bound is the total number of elements in the input + ``` + upper_bound = exprBuilder.operation(trt.DimensionOperation.PROD, inputs[0][0], inputs[0][1]) + ``` + - A good estimate for the optimum is that half of the elements are non-zero + ``` + opt_value = exprBuilder.operation(trt.DimensionOperation.FLOOR_DIV, upper_bound, exprBuilder.constant(2)) + ``` + +Now we can declare the size tensor using the `IExprBuilder.declare_size_tensor()` method, which also requires the specification of the output index at which the size tensor would reside. Let us place it after the non-zero indices output: +``` +num_non_zero_size_tensor = exprBuilder.declare_size_tensor(1, opt_value, upper_bound) +``` + +Now we are ready to specify the extent of the first dimension of the non-zero indices output: +``` +# output_dims[0] = trt.DimsExprs(0) +output_dims[0][0] = num_non_zero_size_tensor +``` +Note that the size tensor is declared to be a scalar (0-D): + +### Creating network and building the engine + +To add the plugin to the network, the `INetworkDefinition::add_plugin_v3()` method must be used. + +Similar to `IPluginCreator` used for V2 plugins, V3 plugins must be accompanied by the registration of a plugin creator implementing the `IPluginCreatorV3One` interface. + +## Running the sample + +1. Run the sample to create a TensorRT inference engine and run inference: + `python3 non_zero_plugin.py [-h] [--precision {fp32,fp16}] [--backend {cuda_python,torch}] [--net_type {onnx,inetdef}]` + +2. Verify that the sample ran successfully. If the sample runs successfully you should see the following message: + ``` + Inference result correct! + ``` + +### Sample `--help` options + +To see the full list of available options and their descriptions, use the `-h` or `--help` command line option. + + +# Additional resources + +The following resources provide a deeper understanding about the V3 TensorRT plugins and the NonZero operation: + +**NonZero** +- [ONNX: NonZero](https://onnx.ai/onnx/operators/onnx__NonZero.html) + +**C++-based NonZero Plugin sample** +- [NonZero C++ Plugin](../../sampleNonZeroPlugin/) + +**TensorRT plugins** +- [Extending TensorRT with Custom Layers](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending) +- [TensorRT Python-based Plugins](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/#add_custom_layer_python) + +**Other documentation** +- [Introduction To NVIDIA’s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples) +- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/#python_topics) +- [NVIDIA’s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html) + +# License + +For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation. + +# Changelog + +April 2024 +This is the first version of this `README.md` file. + +# Known issues + +There are no known issues in this sample. diff --git a/samples/python/non_zero_plugin/non_zero_plugin.py b/samples/python/non_zero_plugin/non_zero_plugin.py new file mode 100644 index 00000000..89ef3826 --- /dev/null +++ b/samples/python/non_zero_plugin/non_zero_plugin.py @@ -0,0 +1,352 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import onnx_graphsurgeon as gs +import numpy as np +import onnx +import os +import sys + +import tensorrt as trt +from polygraphy.backend.trt import ( + CreateConfig, + EngineFromNetwork, + NetworkFromOnnxPath, + TrtRunner, + create_network, + engine_from_network, +) + +import argparse + +from polygraphy import mod + +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import checkCudaErrors, KernelHelper, UnownedMemory, volume + +cuda = mod.lazy_import("cuda.cuda") +cudart = mod.lazy_import("cuda.cudart") +nvrtc = mod.lazy_import("cuda.nvrtc") + +torch = mod.lazy_import("torch") +cp = mod.lazy_import("cupy") + +non_zero_half_kernel = r''' +#include +extern "C" __global__ +void find_non_zero_indices_half( + half const* X, int* indices, int* count, int R, int C) +{ + int row = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the row index is within bounds + if (row < R) + { + + for (int col = 0; col < C; ++col) + { + half const z = static_cast(0.F); + if (X[col + C * row] != z) + { + int index = atomicAdd(count, 1); // Increment count atomically and get the previous value + indices[2 * index] = row; + indices[2 * index + 1] = col; + } + } + } +} +''' + +non_zero_float_kernel = r''' +extern "C" __global__ +void find_non_zero_indices_float( + float const* X, int* indices, int* count, int R, int C) +{ + int row = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the row index is within bounds + if (row < R) + { + + for (int col = 0; col < C; ++col) + { + if (X[col + C * row] != 0.F) + { + int index = atomicAdd(count, 1); // Increment count atomically and get the previous value + indices[2 * index] = row; + indices[2 * index + 1] = col; + } + } + } +} +''' + +class NonZeroPlugin(trt.IPluginV3, trt.IPluginV3OneCore, trt.IPluginV3OneBuild, trt.IPluginV3OneRuntime): + def __init__(self, backend = None): + trt.IPluginV3.__init__(self) + trt.IPluginV3OneCore.__init__(self) + trt.IPluginV3OneBuild.__init__(self) + trt.IPluginV3OneRuntime.__init__(self) + + self.num_outputs = 2 + self.plugin_namespace = "" + self.plugin_name = "NonZeroPlugin" + self.plugin_version = "1" + + if backend is not None: + self.backend = backend.tobytes().decode("utf-8") + else: + self.backend = "cuda_python" + + self.cuDevice = None + + def get_capability_interface(self, type): + return self + + def get_output_data_types(self, input_types): + return [trt.DataType.INT32, trt.DataType.INT32] + + def get_output_shapes(self, inputs, shape_inputs, exprBuilder): + # First output is 2-D + # Second output is a size tensor, which must be declared a scalar (0-D) + output_dims = [trt.DimsExprs(2), trt.DimsExprs(0)] + + upper_bound = exprBuilder.operation(trt.DimensionOperation.PROD, inputs[0][0], inputs[0][1]) + opt_value = exprBuilder.operation(trt.DimensionOperation.FLOOR_DIV, upper_bound, exprBuilder.constant(2)) + num_non_zero_size_tensor = exprBuilder.declare_size_tensor(1, opt_value, upper_bound) + + output_dims[0][0] = num_non_zero_size_tensor + output_dims[0][1] = exprBuilder.constant(2) + + return output_dims + + def get_fields_to_serialize(self): + return trt.PluginFieldCollection( + [ + trt.PluginField( + "backend", self.backend.encode(), trt.PluginFieldType.CHAR + ) + ] + ) + + def configure_plugin(self, inp, out): + if self.backend == "cuda_python": + err, self.cuDevice = cuda.cuDeviceGet(0) + + def on_shape_change(self, inp, out): + if self.backend == "cuda_python": + err, self.cuDevice = cuda.cuDeviceGet(0) + + def supports_format_combination(self, pos, in_out, num_inputs): + assert num_inputs == 1 + assert pos < len(in_out) + + type_ok = False + + # first input should be float16 or float32 + if pos == 0: + type_ok = in_out[0].desc.type == trt.DataType.FLOAT or in_out[0].desc.type == trt.DataType.HALF + elif pos == 1: + type_ok = in_out[1].desc.type == trt.DataType.INT32 + else: # pos == 2 + # size tensor outputs must be NCHW INT32 + type_ok = in_out[2].desc.type == trt.DataType.INT32 + + return in_out[pos].desc.format == trt.TensorFormat.LINEAR and type_ok + + def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): + inp_dtype = trt.nptype(input_desc[0].type) + + if self.backend == "cuda_python": + R = input_desc[0].dims[0] + C = input_desc[0].dims[1] + + blockSize = 256 + numBlocks = int((C + blockSize - 1) // blockSize) + + d_in = np.array([inputs[0]], dtype=np.uint64) + d_out_0 = np.array([outputs[0]], dtype=np.uint64) + d_out_1 = np.array([outputs[1]], dtype=np.uint64) + + args = [d_in, d_out_0, d_out_1, np.array(R, dtype=np.uint32), np.array(C, dtype=np.uint32)] + kernelArgs = np.array([arg.ctypes.data for arg in args], dtype=np.uint64) + + stream_ptr = np.array([stream], dtype=np.uint64) + + if inp_dtype == np.float32: + kernelHelper = KernelHelper(non_zero_float_kernel, int(self.cuDevice)) + _non_zero_float_kernel = kernelHelper.getFunction(b'find_non_zero_indices_float') + checkCudaErrors(cuda.cuLaunchKernel(_non_zero_float_kernel, + numBlocks, 1, 1, + blockSize, 1, 1, + 0, + stream_ptr, + kernelArgs, 0)) + elif inp_dtype == np.float16: + kernelHelper = KernelHelper(non_zero_half_kernel, int(self.cuDevice)) + _non_zero_half_kernel = kernelHelper.getFunction(b'find_non_zero_indices_half') + checkCudaErrors(cuda.cuLaunchKernel(_non_zero_half_kernel, + numBlocks, 1, 1, + blockSize, 1, 1, + 0, + stream_ptr, + kernelArgs, 0)) + else: + raise ValueError("inp_dtype not valid") + + elif self.backend == "torch": + inp_mem = UnownedMemory(inputs[0], input_desc[0].dims, inp_dtype) + + out_mem = UnownedMemory( + outputs[0], 2 * volume(input_desc[0].dims), np.int32 + ) + + out_1_mem = UnownedMemory(outputs[1], 1, np.int32) + + a_t = torch.as_tensor(inp_mem.d, device="cuda") + out = torch.nonzero(a_t) + + out_mem.d[: volume(out.shape)] = cp.reshape(cp.asarray(out), (-1,)) + cp.copyto(out_1_mem.d, cp.reshape(cp.asarray([out.shape[0]]), (-1,))) + + else: + raise ValueError(f"backend not valid: {self.backend}") + + def attach_to_context(self, context): + return self.clone() + + def set_tactic(self, tactic): + pass + + def clone(self): + cloned_plugin = NonZeroPlugin() + cloned_plugin.__dict__.update(self.__dict__) + return cloned_plugin + + # + # The following defaults take effect since the respective methods are not overriden + # + + # def get_valid_tactics(self): + # return [] + + # def get_workspace_size(self, input_desc, output_desc): + # return 0 + + # def destroy(self): + # pass + + +class NonZeroPluginCreator(trt.IPluginCreatorV3One): + def __init__(self): + trt.IPluginCreatorV3One.__init__(self) + self.name = "NonZeroPlugin" + self.plugin_namespace = "" + self.plugin_version = "1" + self.field_names = trt.PluginFieldCollection( + [trt.PluginField("backend", np.array([]), trt.PluginFieldType.CHAR)] + ) + + def create_plugin(self, name, fc, phase): + backend = None + for f in fc: + if f.name == "backend": + backend = f.data[:-1] if f.data[-1] == 0 else f.data + return NonZeroPlugin(backend) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--precision', type=str, default="fp32", choices=["fp32", "fp16"]) + parser.add_argument("--backend", type=str, default="torch", choices=["cuda_python", "torch"]) + parser.add_argument('--net_type', type=str, default="onnx", choices=["onnx", "inetdef"]) + + args = parser.parse_args() + + if args.backend == "cuda_python": + # Initialize CUDA Driver API + err, = cuda.cuInit(0) + # Retrieve handle for device 0 + err, cuDevice = cuda.cuDeviceGet(0) + # Create context + _, cudaCtx = cuda.cuCtxCreate(0, cuDevice) + + precision = np.float32 if args.precision == "fp32" else np.float16 + + inp_shape = (128, 128) + X = np.random.normal(size=inp_shape).astype(precision) + # Zero out a random set of indices + indices = np.random.choice(np.prod(inp_shape), replace=False, size=np.random.randint(0, np.prod(inp_shape) + 1)) + X[np.unravel_index(indices, inp_shape)] = 0 + + # Register plugin creator + plg_registry = trt.get_plugin_registry() + my_plugin_creator = NonZeroPluginCreator() + plg_registry.register_creator(my_plugin_creator, "") + + if args.net_type == "onnx": + # create ONNX model + onnx_path = "test_NonZeroPlugin.onnx" + inputX = gs.Variable(name="X", shape=inp_shape, dtype=precision) + Y = gs.Variable(name="Y", dtype=np.int32) + Y_num = gs.Variable(name="Y_num", dtype=np.int32) + nonZeroPluginNode = gs.Node( + name="NonZeroPlugin", + op="NonZeroPlugin", + inputs=[inputX], + outputs=[Y, Y_num], + attrs={"backend": args.backend.encode()}, + ) + graph = gs.Graph(nodes=[nonZeroPluginNode], inputs=[inputX], outputs=[Y], opset=16) + onnx.save(gs.export_onnx(graph), onnx_path) + + # build engine + build_engine = EngineFromNetwork( + NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16) + ) + else: + # Create plugin object + builder, network = create_network() + plg_creator = plg_registry.get_creator("NonZeroPlugin", "1", "") + plugin_fields_list = [ + trt.PluginField("backend", args.backend.encode(), trt.PluginFieldType.CHAR) + ] + pfc = trt.PluginFieldCollection(plugin_fields_list) + plugin = plg_creator.create_plugin("NonZeroPlugin", pfc, trt.TensorRTPhase.BUILD) + + # Populate network + inputX = network.add_input(name="X", dtype=trt.float32 if precision==np.float32 else trt.float16, shape=inp_shape) + out = network.add_plugin_v3([inputX], [], plugin) + out.get_output(0).name = "Y" + network.mark_output(tensor=out.get_output(0)) + build_engine = engine_from_network((builder, network), CreateConfig(fp16=precision==trt.float16)) + + # Compare against Numpy's nonzero + Y_ref = np.transpose(np.nonzero(X)) + + # Run + with TrtRunner(build_engine, "trt_runner")as runner: + outputs = runner.infer({"X": X}) + Y = outputs["Y"] + Y = Y[np.lexsort(np.fliplr(Y).T)] + + if np.allclose(Y, Y_ref): + print("Inference result correct!") + else: + print("Inference result incorrect!") + + if args.backend == "cuda_python": + checkCudaErrors(cuda.cuCtxDestroy(cudaCtx)) diff --git a/samples/python/non_zero_plugin/requirements.txt b/samples/python/non_zero_plugin/requirements.txt new file mode 100644 index 00000000..3f8c71f0 --- /dev/null +++ b/samples/python/non_zero_plugin/requirements.txt @@ -0,0 +1,13 @@ +cuda-python +cupy-cuda12x +torch +--extra-index-url https://pypi.ngc.nvidia.com +polygraphy +colored +numpy==1.23.5; platform_system != "Windows" +--extra-index-url https://pypi.ngc.nvidia.com +onnx-graphsurgeon +pywin32; platform_system == "Windows" +pyyaml==6.0.1 +requests==2.31.0 +tqdm==4.66.4 diff --git a/samples/python/onnx_custom_plugin/requirements.txt b/samples/python/onnx_custom_plugin/requirements.txt index 840248c0..e713a19b 100644 --- a/samples/python/onnx_custom_plugin/requirements.txt +++ b/samples/python/onnx_custom_plugin/requirements.txt @@ -1,5 +1,5 @@ nltk>=3.5 -onnx==1.14.0 +onnx==1.16.0 --extra-index-url https://pypi.ngc.nvidia.com onnx-graphsurgeon>=0.3.20 wget>=3.2 @@ -7,5 +7,5 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/onnx_packnet/requirements.txt b/samples/python/onnx_packnet/requirements.txt index 41d78164..84b01b48 100644 --- a/samples/python/onnx_packnet/requirements.txt +++ b/samples/python/onnx_packnet/requirements.txt @@ -1,4 +1,4 @@ -onnx==1.14.0 +onnx==1.16.0 --extra-index-url https://pypi.ngc.nvidia.com onnx-graphsurgeon>=0.3.20 -f https://download.pytorch.org/whl/torch_stable.html @@ -13,5 +13,5 @@ torchvision==0.15.1; python_version>="3.11" and (platform_machine=="aarch64" and torchvision==0.15.1+cpu; python_version>="3.11" and ((platform_machine=="x86_64" and sys.platform=="linux") or sys.platform=="win32") pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/python_plugin/utils.py b/samples/python/plugin_utils.py similarity index 94% rename from samples/python/python_plugin/utils.py rename to samples/python/plugin_utils.py index 1a1aa16c..65106ecb 100644 --- a/samples/python/python_plugin/utils.py +++ b/samples/python/plugin_utils.py @@ -22,6 +22,7 @@ import threading import tensorrt as trt +import cupy as cp def parseArgs(): @@ -149,3 +150,9 @@ def clone(self): def release(self): checkCudaErrors(cuda.cuCtxDestroy(self.cuda_ctx)) + +class UnownedMemory: + def __init__(self, ptr, shape, dtype): + mem = cp.cuda.UnownedMemory(ptr, volume(shape) * cp.dtype(dtype).itemsize, self) + cupy_ptr = cp.cuda.MemoryPointer(mem, 0) + self.d = cp.ndarray(shape, dtype=dtype, memptr=cupy_ptr) diff --git a/samples/python/python_plugin/README.md b/samples/python/python_plugin/README.md index 948d7627..49585b22 100644 --- a/samples/python/python_plugin/README.md +++ b/samples/python/python_plugin/README.md @@ -114,8 +114,9 @@ When multiple options are available to compute the same op, and it's not possibl it is useful to ask TensorRT to time all available options during the build stage. In V2 plugins, TensorRT would only time different type/format combinations supported by the plugin, but V3 plugins allow users to specify any number of custom tactics to time also (in addition to type/format combinations). -In this example, we specify two custom tactics: PyTorch's [torch.nn.functional.pad](https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html) and a custom kernel written -using OpenAI triton. +In this example, we specify two custom tactics: PyTorch's [torch.nn.functional.pad](https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html) and a custom kernel written using OpenAI Triton. + +It is possible to advertise tactics specific to a format combination. e.g. In this sample, we can support both tactics for FP32 I/O, and only support the OpenAI Triton tactic for FP16 I/O. To achieve this, return in `get_valid_tactics()` the set of tactics `T(f)` supported by the plugin for the format combination `f` indicated by the immediately preceding call to `configure_plugin()`. To enable this behavior in this sample, pass the flag `--per-format-tactics`. ### Multiple plugins instances diff --git a/samples/python/python_plugin/circ_pad_plugin_cpp.py b/samples/python/python_plugin/circ_pad_plugin_cpp.py index a7cb8d2f..3f8f87f7 100644 --- a/samples/python/python_plugin/circ_pad_plugin_cpp.py +++ b/samples/python/python_plugin/circ_pad_plugin_cpp.py @@ -67,7 +67,7 @@ def parseArgs(): pads = (1, 1, 1, 1) # create ONNX model - onnx_path = "test_CircPadPlugin.onnx" + onnx_path = f"test_CircPadPlugin_cpp_{args.precision}.onnx" inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision) Y = gs.Variable(name="Y", dtype=precision) myPluginNode = gs.Node( diff --git a/samples/python/python_plugin/circ_pad_plugin_cuda_python.py b/samples/python/python_plugin/circ_pad_plugin_cuda_python.py index 212e3e74..b8e6a1e2 100644 --- a/samples/python/python_plugin/circ_pad_plugin_cuda_python.py +++ b/samples/python/python_plugin/circ_pad_plugin_cuda_python.py @@ -18,6 +18,8 @@ import onnx_graphsurgeon as gs import numpy as np import onnx +import sys +import os import tensorrt as trt from polygraphy.backend.trt import ( @@ -28,7 +30,8 @@ ) from polygraphy.json import to_json, from_json -from utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager from cuda import cuda circ_pad_half_kernel = r""" @@ -340,7 +343,7 @@ def deserialize_plugin(self, name, data): plg_registry.register_creator(my_plugin_creator, "") # create ONNX model - onnx_path = "test_CircPadPlugin.onnx" + onnx_path = f"test_CircPadPlugin_cuda_python_{args.precision}.onnx" inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision) Y = gs.Variable(name="Y", dtype=precision) myPluginNode = gs.Node( diff --git a/samples/python/python_plugin/circ_pad_plugin_cupy.py b/samples/python/python_plugin/circ_pad_plugin_cupy.py index 19545a11..1309bfdf 100644 --- a/samples/python/python_plugin/circ_pad_plugin_cupy.py +++ b/samples/python/python_plugin/circ_pad_plugin_cupy.py @@ -21,6 +21,8 @@ import cupy as cp import time import pickle +import sys +import os import tensorrt as trt from polygraphy.backend.trt import ( @@ -32,7 +34,8 @@ from polygraphy.json import to_json, from_json -from utils import volume, parseArgs +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import volume, parseArgs circ_pad_half_kernel = cp.RawKernel( r""" @@ -291,7 +294,7 @@ def deserialize_plugin(self, name, data): plg_registry.register_creator(my_plugin_creator, "") # create ONNX model - onnx_path = "test_CircPadPlugin.onnx" + onnx_path = f"test_CircPadPlugin_cupy_{args.precision}.onnx" inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision) Y = gs.Variable(name="Y", dtype=precision) myPluginNode = gs.Node( diff --git a/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py b/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py index 6abf526f..5d145b51 100644 --- a/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py +++ b/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py @@ -17,6 +17,8 @@ import onnx_graphsurgeon as gs import numpy as np +import sys +import os import tensorrt as trt from polygraphy.backend.trt import ( @@ -28,7 +30,8 @@ from polygraphy.json import to_json, from_json -from utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager from cuda import cuda circ_pad_half_kernel = r""" diff --git a/samples/python/python_plugin/circ_pad_plugin_multi_tactic.py b/samples/python/python_plugin/circ_pad_plugin_multi_tactic.py index 43e1be49..431ccc92 100644 --- a/samples/python/python_plugin/circ_pad_plugin_multi_tactic.py +++ b/samples/python/python_plugin/circ_pad_plugin_multi_tactic.py @@ -20,6 +20,8 @@ import onnx import cupy as cp import logging +import sys +import os import tensorrt as trt from polygraphy.backend.trt import ( @@ -37,7 +39,10 @@ from polygraphy.json import to_json, from_json import torch -from utils import volume, parseArgs +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import volume, parseArgs + +import argparse logger = logging.getLogger("CircPadMultiTactic") @@ -82,7 +87,12 @@ def __init__(self, fc=None, phase=None): trt.IPluginV3OneRuntime.__init__(self) self.pads = [] self.X_shape = [] - + + self.per_format_tactics = ( + False # whether per-format tactics or global tactics should be used + ) + self.curr_type = None # format being timed currently by TRT auto-tuner + self.num_outputs = 1 self.plugin_namespace = "" self.plugin_name = "CircPadPlugin" @@ -94,8 +104,11 @@ def __init__(self, fc=None, phase=None): self.tactic = None if fc is not None: - assert fc[0].name == "pads" - self.pads = fc[0].data + for f in fc: + if f.name == "pads": + self.pads = f.data + elif f.name == "per_format_tactics": + self.per_format_tactics = int(f.data) if phase is not None: self.phase = phase @@ -117,16 +130,29 @@ def get_output_shapes(self, inputs, shape_inputs, exprBuilder): ) return [output_dims] - + def get_fields_to_serialize(self): return trt.PluginFieldCollection([ - trt.PluginField("pads", self.pads, trt.PluginFieldType.INT32) + trt.PluginField("pads", self.pads, trt.PluginFieldType.INT32), + trt.PluginField( + "per_format_tactics", + np.array([self.per_format_tactics], dtype=np.int32), + trt.PluginFieldType.INT32, + ), ]) def configure_plugin(self, inp, out): - pass + assert inp[0].desc.type == trt.float32 or inp[0].desc.type == trt.float16 + self.curr_type = inp[0].desc.type def on_shape_change(self, inp, out): + if ( + self.phase == trt.TensorRTPhase.RUNTIME + and self.per_format_tactics + and inp[0].type == trt.float16 + ): + assert self.tactic == Tactic.TRITON + X_dims = inp[0].dims self.X_shape = np.zeros((len(X_dims),)) for i in range(len(X_dims)): @@ -197,7 +223,7 @@ def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream): out_dims = out_dims.tolist() blockSize = 256 - numBlocks = tuple(int((np.prod(out_dims) + blockSize - 1) // blockSize)) + numBlocks = tuple([int((np.prod(out_dims) + blockSize - 1) // blockSize)]) circ_pad[numBlocks](a_t, all_pads[0], all_pads[2], all_pads[4], all_pads[6], @@ -213,6 +239,10 @@ def attach_to_context(self, context): return self.clone() def get_valid_tactics(self): + assert self.curr_type is not None + if self.per_format_tactics and self.curr_type == trt.float16: + return [int(Tactic.TRITON)] + return [int(Tactic.TORCH), int(Tactic.TRITON)] def set_tactic(self, tactic): @@ -244,7 +274,10 @@ def __init__(self): self.plugin_namespace = "" self.plugin_version = "1" self.field_names = trt.PluginFieldCollection([ - trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32) + trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32), + trt.PluginField( + "per_format_tactics", np.array([]), trt.PluginFieldType.INT32 + ), ]) def create_plugin(self, name, fc, phase): @@ -255,8 +288,27 @@ def create_plugin(self, name, fc, phase): logging.basicConfig() logger.setLevel(logging.INFO) - args = parseArgs() + parser = argparse.ArgumentParser( + description="Options for Circular Padding plugin multi-tactic sample" + ) + + parser.add_argument( + "--precision", + type=str, + default="fp32", + choices=["fp32", "fp16"], + help="Precision to use for plugin", + ) + parser.add_argument( + "--per-format-tactics", + action="store_true", + help="Whether per-format tactics or global tactics should be used", + ) + + args = parser.parse_args() + precision = np.float32 if args.precision == "fp32" else np.float16 + is_tactics_per_format = 1 if args.per_format_tactics else 0 inp_shape = (10, 3, 32, 32) X_A = np.random.normal(size=inp_shape).astype(precision) @@ -270,7 +322,7 @@ def create_plugin(self, name, fc, phase): plg_registry.register_creator(my_plugin_creator, "") # create ONNX model - onnx_path = "test_CircPadPlugin.onnx" + onnx_path = f"test_CircPadPlugin_multi_tactic_{args.precision}.onnx" inputA = gs.Variable(name="X_A", shape=inp_shape, dtype=precision) inputB = gs.Variable(name="X_B", shape=inp_shape, dtype=precision) Y_A = gs.Variable(name="Y_A", dtype=precision) @@ -280,14 +332,20 @@ def create_plugin(self, name, fc, phase): op="CircPadPlugin", inputs=[inputA], outputs=[Y_A], - attrs={"pads": pads}, + attrs={ + "pads": pads, + "per_format_tactics": np.array([is_tactics_per_format], dtype=np.int32), + }, ) myPluginNode_B = gs.Node( name="CircPadPlugin_B", op="CircPadPlugin", inputs=[inputB], outputs=[Y_B], - attrs={"pads": pads}, + attrs={ + "pads": pads, + "per_format_tactics": np.array([is_tactics_per_format], dtype=np.int32), + }, ) graph = gs.Graph(nodes=[myPluginNode_A, myPluginNode_B], inputs=[inputA, inputB], outputs=[Y_A, Y_B], opset=16) diff --git a/samples/python/python_plugin/circ_pad_plugin_numba.py b/samples/python/python_plugin/circ_pad_plugin_numba.py index d568419d..faaa1314 100644 --- a/samples/python/python_plugin/circ_pad_plugin_numba.py +++ b/samples/python/python_plugin/circ_pad_plugin_numba.py @@ -20,6 +20,8 @@ import onnx import cupy as cp from numba import cuda +import sys +import os import tensorrt as trt from polygraphy.backend.trt import ( @@ -30,7 +32,10 @@ ) from polygraphy.json import to_json, from_json -from utils import volume, parseArgs + +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import volume, parseArgs + @cuda.jit @@ -222,7 +227,7 @@ def deserialize_plugin(self, name, data): plg_registry.register_creator(my_plugin_creator, "") # create ONNX model - onnx_path = "test_CircPadPlugin.onnx" + onnx_path = f"test_CircPadPlugin_numba_{args.precision}.onnx" inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision) Y = gs.Variable(name="Y", dtype=precision) myPluginNode = gs.Node( diff --git a/samples/python/python_plugin/circ_pad_plugin_torch.py b/samples/python/python_plugin/circ_pad_plugin_torch.py index 76e8cc41..95861bee 100644 --- a/samples/python/python_plugin/circ_pad_plugin_torch.py +++ b/samples/python/python_plugin/circ_pad_plugin_torch.py @@ -19,6 +19,8 @@ import numpy as np import onnx import cupy as cp +import sys +import os import tensorrt as trt from polygraphy.backend.trt import ( @@ -31,7 +33,9 @@ from polygraphy.json import to_json, from_json import torch -from utils import volume, parseArgs +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import volume, parseArgs + class CircPadPlugin(trt.IPluginV2DynamicExt): @@ -180,7 +184,7 @@ def deserialize_plugin(self, name, data): plg_registry.register_creator(my_plugin_creator, "") # create ONNX model - onnx_path = "test_CircPadPlugin.onnx" + onnx_path = f"test_CircPadPlugin_torch_{args.precision}.onnx" inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision) Y = gs.Variable(name="Y", dtype=precision) myPluginNode = gs.Node( diff --git a/samples/python/python_plugin/circ_pad_plugin_triton.py b/samples/python/python_plugin/circ_pad_plugin_triton.py index 686d4e5c..15990a6a 100644 --- a/samples/python/python_plugin/circ_pad_plugin_triton.py +++ b/samples/python/python_plugin/circ_pad_plugin_triton.py @@ -19,6 +19,8 @@ import numpy as np import onnx import cupy as cp +import sys +import os import triton import triton.language as tl @@ -34,7 +36,9 @@ from polygraphy.json import to_json, from_json import torch -from utils import volume, parseArgs +sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +from plugin_utils import volume, parseArgs + @triton.jit @@ -263,7 +267,7 @@ def deserialize_plugin(self, name, data): plg_registry.register_creator(my_plugin_creator, "") # create ONNX model - onnx_path = "test_CircPadPlugin.onnx" + onnx_path = f"test_CircPadPlugin_triton_{args.precision}.onnx" inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision) Y = gs.Variable(name="Y", dtype=precision) myPluginNode = gs.Node( diff --git a/samples/python/python_plugin/requirements.txt b/samples/python/python_plugin/requirements.txt index 8550a865..70791739 100644 --- a/samples/python/python_plugin/requirements.txt +++ b/samples/python/python_plugin/requirements.txt @@ -12,4 +12,4 @@ onnx-graphsurgeon pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 diff --git a/samples/python/python_plugin/requirements.yml b/samples/python/python_plugin/requirements.yml deleted file mode 100644 index 30f1cca4..00000000 --- a/samples/python/python_plugin/requirements.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- -args: - polygraphy: - - '--extra-index-url https://pypi.ngc.nvidia.com' - torch: [] -conditions: - cuda-python: - - cuda-python - onnx-graphsurgeon: - - onnx-graphsurgeon - triton: - - triton; platform_system != "Windows" - numpy: - - numpy==1.23.5; platform_system != "Windows" - torch: - - torch -packages: - - cuda-python - - cupy-cuda12x - - numba - - triton - - torch - - polygraphy - - colored - - numpy - - onnx-graphsurgeon - - pywin32 -... diff --git a/samples/python/requirements.txt b/samples/python/requirements.txt index 2560ce53..09413658 100644 --- a/samples/python/requirements.txt +++ b/samples/python/requirements.txt @@ -1,4 +1,4 @@ pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/sample_weight_stripping/notebooks/weight_stripping.ipynb b/samples/python/sample_weight_stripping/notebooks/weight_stripping.ipynb index 1ec7ef1e..da5d15bd 100644 --- a/samples/python/sample_weight_stripping/notebooks/weight_stripping.ipynb +++ b/samples/python/sample_weight_stripping/notebooks/weight_stripping.ipynb @@ -53,32 +53,32 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-05-24 22:19:02-- https://download.onnxruntime.ai/onnx/models/resnet50.tar.gz\n", - "Resolving download.onnxruntime.ai (download.onnxruntime.ai)... 13.107.246.71, 2620:1ec:bdf::71\n", - "Connecting to download.onnxruntime.ai (download.onnxruntime.ai)|13.107.246.71|:443... connected.\n", + "--2024-05-20 20:02:41-- https://download.onnxruntime.ai/onnx/models/resnet50.tar.gz\n", + "Resolving download.onnxruntime.ai (download.onnxruntime.ai)... 13.107.246.69, 13.107.213.69, 2620:1ec:46::69, ...\n", + "Connecting to download.onnxruntime.ai (download.onnxruntime.ai)|13.107.246.69|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 101632129 (97M) [application/octet-stream]\n", "Saving to: ‘resnet50.tar.gz’\n", "\n", - "resnet50.tar.gz 100%[===================>] 96.92M 7.59MB/s in 14s \n", + "resnet50.tar.gz 100%[===================>] 96.92M 7.35MB/s in 14s \n", "\n", - "2024-05-24 22:19:16 (6.97 MB/s) - ‘resnet50.tar.gz’ saved [101632129/101632129]\n", + "2024-05-20 20:02:56 (6.87 MB/s) - ‘resnet50.tar.gz’ saved [101632129/101632129]\n", "\n", - "--2024-05-24 22:19:20-- https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz\n", + "--2024-05-20 20:03:00-- https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz\n", "Resolving developer.nvidia.com (developer.nvidia.com)... 152.195.19.142\n", "Connecting to developer.nvidia.com (developer.nvidia.com)|152.195.19.142|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz [following]\n", - "--2024-05-24 22:19:21-- https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz\n", + "--2024-05-20 20:03:00-- https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz\n", "Resolving developer.download.nvidia.com (developer.download.nvidia.com)... 152.195.19.142\n", "Connecting to developer.download.nvidia.com (developer.download.nvidia.com)|152.195.19.142|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 2349001911 (2.2G) [application/x-gzip]\n", "Saving to: ‘TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz’\n", "\n", - "TensorRT-10.0.1.6.L 100%[===================>] 2.19G 214MB/s in 9.9s \n", + "TensorRT-10.0.1.6.L 100%[===================>] 2.19G 227MB/s in 10s \n", "\n", - "2024-05-24 22:19:31 (227 MB/s) - ‘TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz’ saved [2349001911/2349001911]\n", + "2024-05-20 20:03:12 (215 MB/s) - ‘TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz’ saved [2349001911/2349001911]\n", "\n" ] } @@ -88,10 +88,9 @@ "!tar -xzf resnet50.tar.gz\n", "\n", "!wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz\n", - "!tar -xzf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz TensorRT-10.0.1.6/data/int8_api/reference_labels.txt TensorRT-10.0.1.6/data/int8_api/airliner.ppm TensorRT-10.0.1.6/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10.0.1\n", + "!tar -xzf TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz TensorRT-10.0.1.6/data/int8_api/reference_labels.txt TensorRT-10.0.1.6/data/int8_api/airliner.ppm\n", "!mv TensorRT-10.0.1.6/data/int8_api/airliner.ppm .\n", "!mv TensorRT-10.0.1.6/data/int8_api/reference_labels.txt .\n", - "!mv TensorRT-10.0.1.6/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10.0.1 .\n", "!rm -rf TensorRT-10.0.1.6 TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz" ] }, @@ -102,16 +101,14 @@ "source": [ "### Install pre-requisites\n", "\n", - "The Python sample is built on top of the native TensorRT API. To streamline the code and enhance its readability, we use polygraphy in the notebook. We also use matpoltlib to visualize the data. Therefore, it's necessary to install polygraphy and matplotlib." + "The Python sample is built on top of the native TensorRT API. To streamline the code and enhance its readability, we use polygraphy in the notebook. We also use matpoltlib to visualize the data. Therefore, it's necessary to install packages from both the requirement.txt file, polygraphy and matplotlib." ] }, { "cell_type": "code", "execution_count": 2, "id": "66a9caf4-aabc-42bd-a387-95a7eba342a3", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -123,44 +120,11 @@ } ], "source": [ - "%pip install --upgrade --force-reinstall tensorrt==10.0.1\n", - "%pip install -q --upgrade --force-reinstall matplotlib colored polygraphy>=0.49.9 --extra-index-url https://pypi.ngc.nvidia.com;" - ] - }, - { - "cell_type": "markdown", - "id": "fd8915e2-8fed-4fa3-8e78-04e5359ac44f", - "metadata": {}, - "source": [ - "⚠️ **Make sure to restart the kernels before continuing to check that the installed version of TensorRT is 10.0.1+ and Polygraphy is 0.49.9+.** " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "97289f92-f858-452f-8c68-9b0848a58508", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "polygraphy version 0.49.9\n", - "tensorrt version 10.0.1\n" - ] - } - ], - "source": [ + "%pip install -q -r ../requirements.txt;\n", + "%pip install -q matplotlib colored polygraphy --extra-index-url https://pypi.ngc.nvidia.com;\n", "import sys\n", "import site\n", - "sys.path.append(site.getusersitepackages())\n", - "\n", - "import polygraphy\n", - "import tensorrt\n", - "print(f\"polygraphy version {polygraphy.__version__}\")\n", - "print(f\"tensorrt version {tensorrt.__version__}\")" + "sys.path.append(site.getusersitepackages())" ] }, { @@ -177,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "42b888e4-fb2e-45a2-bf03-452a976b52aa", "metadata": {}, "outputs": [ @@ -197,17 +161,17 @@ " Tactic Sources | [EDGE_MASK_CONVOLUTIONS, JIT_CONVOLUTIONS]\n", " Profiling Verbosity | ProfilingVerbosity.DETAILED\n", " Preview Features | [PROFILE_SHARING_0806]\u001b[0m\n", - "\u001b[38;5;10m[I] Finished engine building in 37.975 seconds\u001b[0m\n", + "\u001b[38;5;10m[I] Finished engine building in 38.595 seconds\u001b[0m\n", "[I] Saving engine to resnet50_full.plan\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -236,7 +200,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "19483305-5006-4f00-a0d3-5de6d6504406", "metadata": {}, "outputs": [ @@ -255,17 +219,17 @@ " Tactic Sources | [EDGE_MASK_CONVOLUTIONS, JIT_CONVOLUTIONS]\n", " Profiling Verbosity | ProfilingVerbosity.DETAILED\n", " Preview Features | [PROFILE_SHARING_0806]\u001b[0m\n", - "\u001b[38;5;10m[I] Finished engine building in 34.358 seconds\u001b[0m\n", + "\u001b[38;5;10m[I] Finished engine building in 34.624 seconds\u001b[0m\n", "[I] Saving engine to resnet50_stripped.plan\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -294,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "f1d42483-1176-407d-89ca-d45506f12db5", "metadata": {}, "outputs": [ @@ -341,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "776fd81f-d309-4f73-8fde-57d7928bc929", "metadata": {}, "outputs": [ @@ -374,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "d4584157-09f1-45d0-89af-d6bb408b670d", "metadata": {}, "outputs": [ @@ -414,7 +378,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "51cb8223-9127-4c17-9734-50cb793c3c5c", "metadata": {}, "outputs": [ @@ -431,7 +395,7 @@ "True" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -459,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "42fe4c5f-108e-4d7f-ae80-a569c76dfe6c", "metadata": {}, "outputs": [ @@ -493,7 +457,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "6f11ebb7-346d-4d4e-963b-9339d8e4e639", "metadata": {}, "outputs": [], @@ -524,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "082e336c-0f8e-4d48-9d69-28dcd6fd9b0f", "metadata": {}, "outputs": [ @@ -532,7 +496,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Full engine inference time on 100 iterations: 0.2797 seconds\n" + "Full engine inference time on 100 iterations: 0.2675 seconds\n" ] } ], @@ -559,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "b48f4c09-ab6a-4a6b-bf36-628edf2ca7de", "metadata": {}, "outputs": [ @@ -567,7 +531,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Refitted stripped engine inference time on 100 iterations: 0.2845 seconds\n" + "Refitted stripped engine inference time on 100 iterations: 0.2671 seconds\n" ] } ], @@ -591,13 +555,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "f4f8c300-eb3a-4b59-958c-5332e8dc546f", "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAIjCAYAAADhisjVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABPSElEQVR4nO3deVzU1f7H8feAsriAOyNKAu7mgrmQW1qSaKtZht66LmlmuSWiSaW41NW8mmialqVoq21qWaHJFX/pNc1dc18xA1wIEFBUZn5/+HBuE6gzOIh+fT0fj3lc5nzP98znO/D43nfHM2dMVqvVKgAAAMCg3Iq7AAAAAKAoEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgB3Lb+/e9/Kzg4WO7u7goJCSnucnCbS0xMlMlkUmJiYnGXAsDFCLwAXCYuLk4mk8n28PLyUp06dTR48GClpqa69LVWrlypUaNGqU2bNlqwYIH+9a9/uXT8O1ViYqK6desms9ksDw8PValSRY8++qi++eab4i4NAAqtRHEXAMB4JkyYoKCgIJ0/f15r167VnDlz9MMPP2jXrl0qVaqUS17jP//5j9zc3PThhx/Kw8PDJWPe6WJiYjRhwgTVrl1bL7zwgmrUqKEzZ87ohx9+0JNPPqlPPvlE//jHP4q7zCJz33336dy5c/w9AQZE4AXgcl26dFHz5s0lSf3791fFihX19ttva9myZerZs+cNjZ2Tk6NSpUrp5MmT8vb2dlk4sVqtOn/+vLy9vV0y3u3mq6++0oQJE/TUU0/p008/VcmSJW3HRo4cqRUrVujixYvFWGHROX/+vDw8POTm5iYvL6/iLgdAEWBJA4Ai98ADD0iSjhw5Ymv7+OOP1axZM3l7e6tChQrq0aOHjh8/bndehw4d1LBhQ23evFn33XefSpUqpVdffVUmk0kLFixQdna2bflEXFycJOnSpUuaOHGiatasKU9PTwUGBurVV19Vbm6u3diBgYF65JFHtGLFCjVv3lze3t567733bOs4v/jiC40fP17VqlVT2bJl9dRTTykjI0O5ubl6+eWXVaVKFZUpU0Z9+/bNN/aCBQv0wAMPqEqVKvL09FSDBg00Z86cfO/LlRrWrl2rli1bysvLS8HBwVq0aFG+vunp6Ro+fLgCAwPl6emp6tWrq1evXjp9+rStT25urmJiYlSrVi15enoqICBAo0aNyldfQcaMGaMKFSpo/vz5dmH3ivDwcD3yyCO25ydPnlS/fv3k5+cnLy8vNWnSRAsXLrQ75+jRozKZTJo6dapmz56t4OBglSpVSp06ddLx48dltVo1ceJEVa9eXd7e3nr88ceVlpZW4Hu0cuVKhYSEyMvLSw0aNMi3xCItLU1RUVFq1KiRypQpIx8fH3Xp0kXbt2+363fl9/v555/r9ddfV7Vq1VSqVCllZmYWuIb3wIEDevLJJ2U2m+Xl5aXq1aurR48eysjIsPVx9m/Okd83ANdihhdAkTt06JAkqWLFipKkN998U2PGjNHTTz+t/v3769SpU3rnnXd03333aevWrSpXrpzt3DNnzqhLly7q0aOHnn32Wfn5+al58+Z6//33tXHjRn3wwQeSpNatW0u6PKO8cOFCPfXUUxoxYoQ2bNigSZMmac+ePVqyZIldXfv27VPPnj31wgsv6Pnnn1fdunVtxyZNmiRvb2+NHj1aBw8e1DvvvKOSJUvKzc1Nf/75p8aNG6dffvlFcXFxCgoK0tixY23nzpkzR3fffbcee+wxlShRQt99951eeuklWSwWDRo0yK6GgwcP6qmnnlK/fv3Uu3dvzZ8/X3369FGzZs109913S5KysrLUrl077dmzR88995zuuecenT59Wt9++61+//13VapUSRaLRY899pjWrl2rAQMGqH79+tq5c6emT5+u/fv3a+nSpVf9/Rw4cEB79+7Vc889p7Jly17393nu3Dl16NBBBw8e1ODBgxUUFKQvv/xSffr0UXp6uoYNG2bX/5NPPtGFCxc0ZMgQpaWlacqUKXr66af1wAMPKDExUa+88ortPY6KitL8+fPz1RcREaGBAweqd+/eWrBggbp37674+Hg9+OCDkqTDhw9r6dKl6t69u4KCgpSamqr33ntP7du31+7du+Xv72835sSJE+Xh4aGoqCjl5uYW+C8FFy5cUHh4uHJzczVkyBCZzWadOHFCy5cvV3p6unx9fSU59zfnyO8bQBGwAoCLLFiwwCrJumrVKuupU6esx48ft37++efWihUrWr29va2///679ejRo1Z3d3frm2++aXfuzp07rSVKlLBrb9++vVWSde7cufleq3fv3tbSpUvbtW3bts0qydq/f3+79qioKKsk63/+8x9bW40aNaySrPHx8XZ9V69ebZVkbdiwofXChQu29p49e1pNJpO1S5cudv1btWplrVGjhl1bTk5OvnrDw8OtwcHBdm1Xavi///s/W9vJkyetnp6e1hEjRtjaxo4da5Vk/eabb/KNa7FYrFar1frRRx9Z3dzcrD///LPd8blz51olWdetW5fv3CuWLVtmlWSdPn36Vfv8VWxsrFWS9eOPP7a1XbhwwdqqVStrmTJlrJmZmVar1Wo9cuSIVZK1cuXK1vT0dFvf6OhoqyRrkyZNrBcvXrS19+zZ0+rh4WE9f/68re3Ke/T111/b2jIyMqxVq1a1Nm3a1NZ2/vx5a15enl2dR44csXp6elonTJhga7vy+w0ODs73e7pybPXq1Var1WrdunWrVZL1yy+/vOp7UZi/uev9vgG4HksaALhcWFiYKleurICAAPXo0UNlypTRkiVLVK1aNX3zzTeyWCx6+umndfr0advDbDardu3aWr16td1Ynp6e6tu3r0Ov+8MPP0iSIiMj7dpHjBghSfr+++/t2oOCghQeHl7gWL169bL7p/3Q0FBZrVY999xzdv1CQ0N1/PhxXbp0ydb213XAGRkZOn36tNq3b6/Dhw/b/VO4JDVo0EDt2rWzPa9cubLq1q2rw4cP29q+/vprNWnSRE888US+Ok0mkyTpyy+/VP369VWvXj279/XKcpK/v69/lZmZKUkOze5Kl99ns9lstx67ZMmSGjp0qLKysrRmzRq7/t27d7fNhkqX3zNJevbZZ1WiRAm79gsXLujEiRN25/v7+9tdu4+Pj3r16qWtW7cqJSVF0uW/Eze3y/+XlpeXpzNnzqhMmTKqW7eutmzZku8aevfufd312ldqXrFihXJycq76XkiO/8058vsG4HosaQDgcrNnz1adOnVUokQJ+fn5qW7durYwcuDAAVmtVtWuXbvAc/++frRatWoOfzDt2LFjcnNzU61atezazWazypUrp2PHjtm1BwUFXXWsu+66y+75lfATEBCQr91isSgjI8O2ZGPdunWKiYnR+vXr8wWljIwMu/D399eRpPLly+vPP/+0PT906JCefPLJq9YqXX5f9+zZo8qVKxd4/OTJk1c918fHR5J09uzZa77GFceOHVPt2rVtv9Mr6tevbzv+V868l5Lsrl2SatWqZQv2V9SpU0fS5XXCZrNZFotFM2bM0LvvvqsjR44oLy/P1vfK7+WvrvW7/2ufyMhIvf322/rkk0/Url07PfbYY3r22WdttTr7N+fI7xuA6xF4Abhcy5Ytbbs0/J3FYpHJZNKPP/4od3f3fMfLlClj97wwuyb8PRxdzbXGLqi2a7VbrVZJl8Npx44dVa9ePb399tsKCAiQh4eHfvjhB02fPl0Wi8Wp8RxlsVjUqFEjvf322wUe/3u4/Kt69epJknbu3OnUazqqsO+lM/71r39pzJgxeu655zRx4kRVqFBBbm5uevnll/O955Ljf1fTpk1Tnz59tGzZMq1cuVJDhw7VpEmT9Msvv6h69eq2fo7+zbnymgE4jsAL4KaqWbOmrFargoKCbLN0rlKjRg1ZLBYdOHDANtsoSampqUpPT1eNGjVc+noF+e6775Sbm6tvv/3WbjbvWksKrqdmzZratWvXdfts375dHTt2dDh8XVGnTh3VrVtXy5Yt04wZM/L9R8ff1ahRQzt27JDFYrGb5d27d6/tuCsdPHhQVqvV7rr2798v6fLOB9LlbdXuv/9+ffjhh3bnpqenq1KlSjf0+o0aNVKjRo30+uuv67///a/atGmjuXPn6o033rgl/uYAXB9reAHcVN26dZO7u7vGjx+fb1bLarXqzJkzhR77oYcekiTFxsbatV+Z9Xz44YcLPbajrszg/fXaMjIytGDBgkKP+eSTT2r79u35PvH/19d5+umndeLECc2bNy9fn3Pnzik7O/uarzF+/HidOXNG/fv3t1uPfMXKlSu1fPlySZff55SUFC1evNh2/NKlS3rnnXdUpkwZtW/f3qnru54//vjD7tozMzO1aNEihYSEyGw2S7r8vv/97+nLL7/Mtx7YGZmZmfnei0aNGsnNzc225dit8DcH4PqY4QVwU9WsWVNvvPGGoqOjdfToUXXt2lVly5bVkSNHtGTJEg0YMEBRUVGFGrtJkybq3bu33n//faWnp6t9+/bauHGjFi5cqK5du+r+++938dXk16lTJ3l4eOjRRx/VCy+8oKysLM2bN09VqlRRcnJyocYcOXKkvvrqK3Xv3l3PPfecmjVrprS0NH377beaO3eumjRpon/+85/64osvNHDgQK1evVpt2rRRXl6e9u7dqy+++MK23/DVREREaOfOnXrzzTe1detW9ezZ0/ZNa/Hx8UpISNCnn34qSRowYIDee+899enTR5s3b1ZgYKC++uorrVu3TrGxsQ5/+M1RderUUb9+/fTrr7/Kz89P8+fPV2pqqt1/RDzyyCOaMGGC+vbtq9atW2vnzp365JNPFBwcXOjX/c9//qPBgwere/fuqlOnji5duqSPPvpI7u7utjXVt8LfHIDrI/ACuOlGjx6tOnXqaPr06Ro/fryky2tMO3XqpMcee+yGxv7ggw8UHBysuLg4LVmyRGazWdHR0YqJiXFF6ddVt25dffXVV3r99dcVFRUls9msF198UZUrV863w4OjypQpo59//lkxMTFasmSJFi5cqCpVqqhjx462daRubm5aunSppk+frkWLFmnJkiUqVaqUgoODNWzYMIeWj7zxxht64IEHNHPmTM2ZM0dpaWkqX7687r33Xi1btsz2u/H29lZiYqJGjx6thQsXKjMzU3Xr1tWCBQvUp0+fQl3jtdSuXVvvvPOORo4cqX379ikoKEiLFy+222Hj1VdfVXZ2tj799FMtXrxY99xzj77//nuNHj260K/bpEkThYeH67vvvtOJEydUqlQpNWnSRD/++KPuvfdeW7/i/psDcH0mKyvlAQC3qMDAQDVs2NC2nAIACoM1vAAAADA0Ai8AAAAMjcALAAAAQ2MNLwAAAAyNGV4AAAAYGoEXAAAAhsY+vAWwWCz6448/VLZsWae/ohMAAABFz2q16uzZs/L397f7mvOCEHgL8McffyggIKC4ywAAAMB1HD9+3PYlPFdD4C3Ala/FPH78uHx8fIq5GgAAAPxdZmamAgICHPo6cwJvAa4sY/Dx8SHwAgAA3MIcWX7Kh9YAAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZWorgLAAAYn2m8qbhLAHATWGOsxV1CgZjhBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBobEt2izCxYw9wR7Demjv2AIChMcMLAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQ7slAu/s2bMVGBgoLy8vhYaGauPGjVftO2/ePLVr107ly5dX+fLlFRYWlq9/nz59ZDKZ7B6dO3cu6ssAAADALajYA+/ixYsVGRmpmJgYbdmyRU2aNFF4eLhOnjxZYP/ExET17NlTq1ev1vr16xUQEKBOnTrpxIkTdv06d+6s5ORk2+Ozzz67GZcDAACAW4zJarVai7OA0NBQtWjRQrNmzZIkWSwWBQQEaMiQIRo9evR1z8/Ly1P58uU1a9Ys9erVS9LlGd709HQtXbq0UDVlZmbK19dXGRkZ8vHxKdQYzjKZbsrLAChmxXvHLT6m8dzkgDuBNebm3eScyWvFOsN74cIFbd68WWFhYbY2Nzc3hYWFaf369Q6NkZOTo4sXL6pChQp27YmJiapSpYrq1q2rF198UWfOnLnqGLm5ucrMzLR7AAAAwBiKNfCePn1aeXl58vPzs2v38/NTSkqKQ2O88sor8vf3twvNnTt31qJFi5SQkKC33npLa9asUZcuXZSXl1fgGJMmTZKvr6/tERAQUPiLAgAAwC2lRHEXcCMmT56szz//XImJifLy8rK19+jRw/Zzo0aN1LhxY9WsWVOJiYnq2LFjvnGio6MVGRlpe56ZmUnoBQAAMIhineGtVKmS3N3dlZqaateempoqs9l8zXOnTp2qyZMna+XKlWrcuPE1+wYHB6tSpUo6ePBggcc9PT3l4+Nj9wAAAIAxFGvg9fDwULNmzZSQkGBrs1gsSkhIUKtWra563pQpUzRx4kTFx8erefPm132d33//XWfOnFHVqlVdUjcAAABuH8W+LVlkZKTmzZunhQsXas+ePXrxxReVnZ2tvn37SpJ69eql6OhoW/+33npLY8aM0fz58xUYGKiUlBSlpKQoKytLkpSVlaWRI0fql19+0dGjR5WQkKDHH39ctWrVUnh4eLFcIwAAAIpPsa/hjYiI0KlTpzR27FilpKQoJCRE8fHxtg+yJSUlyc3tf7l8zpw5unDhgp566im7cWJiYjRu3Di5u7trx44dWrhwodLT0+Xv769OnTpp4sSJ8vT0vKnXBgAAgOJX7Pvw3orYhxdAUblT77jswwvcGdiHFwAAACgGBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKGVcPaE3NxcbdiwQceOHVNOTo4qV66spk2bKigoqCjqAwAAAG6Iw4F33bp1mjFjhr777jtdvHhRvr6+8vb2VlpamnJzcxUcHKwBAwZo4MCBKlu2bFHWDAAAADjMoSUNjz32mCIiIhQYGKiVK1fq7NmzOnPmjH7//Xfl5OTowIEDev3115WQkKA6derop59+Kuq6AQAAAIc4NMP78MMP6+uvv1bJkiULPB4cHKzg4GD17t1bu3fvVnJyskuLBAAAAArLocD7wgsvODxggwYN1KBBg0IXBAAAALiS07s0/Prrr9qwYUO+9g0bNmjTpk0uKQoAAABwFacD76BBg3T8+PF87SdOnNCgQYNcUhQAAADgKk4H3t27d+uee+7J1960aVPt3r27UEXMnj1bgYGB8vLyUmhoqDZu3HjVvvPmzVO7du1Uvnx5lS9fXmFhYfn6W61WjR07VlWrVpW3t7fCwsJ04MCBQtUGAACA25vTgdfT01Opqan52pOTk1WihNPb+mrx4sWKjIxUTEyMtmzZoiZNmig8PFwnT54ssH9iYqJ69uyp1atXa/369QoICFCnTp104sQJW58pU6Zo5syZmjt3rjZs2KDSpUsrPDxc58+fd7o+AAAA3N5MVqvV6swJPXv2VHJyspYtWyZfX19JUnp6urp27aoqVaroiy++cKqA0NBQtWjRQrNmzZIkWSwWBQQEaMiQIRo9evR1z8/Ly1P58uU1a9Ys9erVS1arVf7+/hoxYoSioqIkSRkZGfLz81NcXJx69OiRb4zc3Fzl5ubanmdmZiogIEAZGRny8fFx6noKy2S6KS8DoJg5d8c1DtN4bnLAncAac/NucpmZmfL19XUorzk9wzt16lQdP35cNWrU0P3336/7779fQUFBSklJ0bRp05wa68KFC9q8ebPCwsL+V5Cbm8LCwrR+/XqHxsjJydHFixdVoUIFSdKRI0eUkpJiN6avr69CQ0OvOuakSZPk6+trewQEBDh1HQAAALh1OR14q1Wrph07dmjKlClq0KCBmjVrphkzZmjnzp1OB8XTp08rLy9Pfn5+du1+fn5KSUlxaIxXXnlF/v7+toB75TxnxoyOjlZGRobtUdCH8gAAAHB7cn7RraTSpUtrwIABrq7FaZMnT9bnn3+uxMREeXl5FXocT09PeXp6urAyAAAA3CqcnuGVpI8++kht27aVv7+/jh07JkmaPn26li1b5tQ4lSpVkru7e74PwaWmpspsNl/z3KlTp2ry5MlauXKlGjdubGu/cl5hxgQAAIDxOB1458yZo8jISHXp0kV//vmn8vLyJEnly5dXbGysU2N5eHioWbNmSkhIsLVZLBYlJCSoVatWVz1vypQpmjhxouLj49W8eXO7Y0FBQTKbzXZjZmZmasOGDdccEwAAAMbkdOB95513NG/ePL322mt225A1b95cO3fudLqAyMhIzZs3TwsXLtSePXv04osvKjs7W3379pUk9erVS9HR0bb+b731lsaMGaP58+crMDBQKSkpSklJUVZWliTJZDLp5Zdf1htvvKFvv/1WO3fuVK9eveTv76+uXbs6XR8AAABub06v4T1y5IiaNm2ar93T01PZ2dlOFxAREaFTp05p7NixSklJUUhIiOLj420fOktKSpKb2/9y+Zw5c3ThwgU99dRTduPExMRo3LhxkqRRo0YpOztbAwYMUHp6utq2bav4+PgbWucLAACA25PTgTcoKEjbtm1TjRo17Nrj4+NVv379QhUxePBgDR48uMBjiYmJds+PHj163fFMJpMmTJigCRMmFKoeAAAAGIfTgTcyMlKDBg3S+fPnZbVatXHjRn322WeaNGmSPvjgg6KoEQAAACg0pwNv//795e3trddff105OTn6xz/+IX9/f82YMaPAbzEDAAAAilOh9uF95pln9MwzzygnJ0dZWVmqUqWKq+sCAAAAXMLpXRrOnTunnJwcSVKpUqV07tw5xcbGauXKlS4vDgAAALhRTgfexx9/XIsWLZIkpaenq2XLlpo2bZoef/xxzZkzx+UFAgAAADfC6cC7ZcsWtWvXTpL01VdfyWw269ixY1q0aJFmzpzp8gIBAACAG+F04M3JyVHZsmUlSStXrlS3bt3k5uame++91/Y1wwAAAMCtwunAW6tWLS1dulTHjx/XihUr1KlTJ0nSyZMn5ePj4/ICAQAAgBvhdOAdO3asoqKiFBgYqNDQULVq1UrS5dnegr6BDQAAAChOTm9L9tRTT6lt27ZKTk5WkyZNbO0dO3bUE0884dLiAAAAgBtVqH14zWazzGazXVvLli1dUhAAAADgSg4taRg4cKB+//13hwZcvHixPvnkkxsqCgAAAHAVh2Z4K1eurLvvvltt2rTRo48+qubNm8vf319eXl76888/tXv3bq1du1aff/65/P399f777xd13QAAAIBDTFar1epIx9TUVH3wwQf6/PPPtXv3brtjZcuWVVhYmPr376/OnTsXSaE3U2Zmpnx9fZWRkXHTdp4wmW7KywAoZo7dcY3HNJ6bHHAnsMbcvJucM3nN4cD7V3/++aeSkpJ07tw5VapUSTVr1pTJQImNwAugqBB4ARjZrRp4C/WhtfLly6t8+fKFKg4AAAC4mZzehxcAAAC4nRB4AQAAYGgEXgAAABgagRcAAACG5nTgPXfunHJycmzPjx07ptjYWK1cudKlhQEAAACu4HTgffzxx7Vo0SJJUnp6ukJDQzVt2jQ9/vjjmjNnjssLBAAAAG6E04F3y5YtateunSTpq6++kp+fn44dO6ZFixZp5syZLi8QAAAAuBFOB96cnByVLVtWkrRy5Up169ZNbm5uuvfee3Xs2DGXFwgAAADcCKcDb61atbR06VIdP35cK1asUKdOnSRJJ0+evGnfSgYAAAA4yunAO3bsWEVFRSkwMFChoaFq1aqVpMuzvU2bNnV5gQAAAMCNcPqrhZ966im1bdtWycnJatKkia29Y8eOeuKJJ1xaHAAAAHCjnA68kmQ2m2U2m+3aWrZs6ZKCAAAAAFdyOvBmZ2dr8uTJSkhI0MmTJ2WxWOyOHz582GXFAQAAADfK6cDbv39/rVmzRv/85z9VtWpVmUymoqgLAAAAcAmnA++PP/6o77//Xm3atCmKegAAAACXcnqXhvLly6tChQpFUQsAAADgck4H3okTJ2rs2LHKyckpinoAAAAAl3J6ScO0adN06NAh+fn5KTAwUCVLlrQ7vmXLFpcVBwAAANwopwNv165di6AMAAAAoGg4HXhjYmKKog4AAACgSBTqiyckafPmzdqzZ48k6e677+ZrhQEAAHBLcjrwnjx5Uj169FBiYqLKlSsnSUpPT9f999+vzz//XJUrV3Z1jQAAAEChOb1Lw5AhQ3T27Fn99ttvSktLU1pamnbt2qXMzEwNHTq0KGoEAAAACs3pGd74+HitWrVK9evXt7U1aNBAs2fPVqdOnVxaHAAAAHCjnJ7htVgs+bYik6SSJUvKYrG4pCgAAADAVZwOvA888ICGDRumP/74w9Z24sQJDR8+XB07dnRpcQAAAMCNcjrwzpo1S5mZmQoMDFTNmjVVs2ZNBQUFKTMzU++8805R1AgAAAAUmtNreAMCArRlyxatWrVKe/fulSTVr19fYWFhLi8OAAAAuFGF2ofXZDLpwQcf1IMPPujqegAAAACXcijwzpw5UwMGDJCXl5dmzpx5zb5sTQYAAIBbiclqtVqv1ykoKEibNm1SxYoVFRQUdPXBTCYdPnzYpQUWh8zMTPn6+iojI0M+Pj435TVNppvyMgCK2fXvuMZkGs9NDrgTWGNu3k3Ombzm0AzvkSNHCvwZAAAAuNU5vUvDhAkTlJOTk6/93LlzmjBhgkuKAgAAAFzF6cA7fvx4ZWVl5WvPycnR+PHjXVIUAAAA4CpOB16r1SpTAQtOt2/frgoVKrikKAAAAMBVHN6WrHz58jKZTDKZTKpTp45d6M3Ly1NWVpYGDhxYJEUCAAAAheVw4I2NjZXVatVzzz2n8ePHy9fX13bMw8NDgYGBatWqVZEUCQAAABSWw4G3d+/eki5vUda6dWuVLFmyyIoCAAAAXMXpb1pr37697efz58/rwoULdsdv1r61AAAAgCOc/tBaTk6OBg8erCpVqqh06dIqX7683QMAAAC4lTgdeEeOHKn//Oc/mjNnjjw9PfXBBx9o/Pjx8vf316JFi4qiRgAAAKDQnF7S8N1332nRokXq0KGD+vbtq3bt2qlWrVqqUaOGPvnkEz3zzDNFUScAAABQKE7P8KalpSk4OFjS5fW6aWlpkqS2bdvq//7v/1xbHQAAAHCDnA68wcHBOnLkiCSpXr16+uKLLyRdnvktV66cS4sDAAAAbpTTgbdv377avn27JGn06NGaPXu2vLy8NHz4cI0cOdLlBQIAAAA3wuk1vMOHD7f9HBYWpr1792rz5s2qVauWGjdu7NLiAAAAgBvl1AzvxYsX1bFjRx04cMDWVqNGDXXr1o2wCwAAgFuSU4G3ZMmS2rFjR1HVAgAAALic02t4n332WX344YdFUQsAAADgck6v4b106ZLmz5+vVatWqVmzZipdurTd8bfffttlxQEAAAA3yukZ3l27dumee+5R2bJltX//fm3dutX22LZtm9MFzJ49W4GBgfLy8lJoaKg2btx41b6//fabnnzySQUGBspkMik2NjZfn3HjxslkMtk96tWr53RdAAAAMAanZ3hXr17tshdfvHixIiMjNXfuXIWGhio2Nlbh4eHat2+fqlSpkq9/Tk6OgoOD1b17d7vdIv7u7rvv1qpVq2zPS5Rw+jIBAABgEE7P8F5x8OBBrVixQufOnZMkWa1Wp8d4++239fzzz6tv375q0KCB5s6dq1KlSmn+/PkF9m/RooX+/e9/q0ePHvL09LzquCVKlJDZbLY9KlWq5HRtAAAAMAanA++ZM2fUsWNH1alTRw899JCSk5MlSf369dOIESMcHufChQvavHmzwsLC/leMm5vCwsK0fv16Z8uyc+DAAfn7+ys4OFjPPPOMkpKSrtk/NzdXmZmZdg8AAAAYg9OBd/jw4SpZsqSSkpJUqlQpW3tERITi4+MdHuf06dPKy8uTn5+fXbufn59SUlKcLcsmNDRUcXFxio+P15w5c3TkyBG1a9dOZ8+eveo5kyZNkq+vr+0REBBQ6NcHAADArcXpxa0rV67UihUrVL16dbv22rVr69ixYy4rrLC6dOli+7lx48YKDQ1VjRo19MUXX6hfv34FnhMdHa3IyEjb88zMTEIvAACAQTgdeLOzs+1mdq9IS0u75rrav6tUqZLc3d2Vmppq156amiqz2exsWVdVrlw51alTRwcPHrxqH09PT6dqBwAAwO3D6SUN7dq106JFi2zPTSaTLBaLpkyZovvvv9/hcTw8PNSsWTMlJCTY2iwWixISEtSqVStny7qqrKwsHTp0SFWrVnXZmAAAALh9OD3DO2XKFHXs2FGbNm3ShQsXNGrUKP32229KS0vTunXrnBorMjJSvXv3VvPmzdWyZUvFxsYqOztbffv2lST16tVL1apV06RJkyRd/qDb7t27bT+fOHFC27ZtU5kyZVSrVi1JUlRUlB599FHVqFFDf/zxh2JiYuTu7q6ePXs6e6kAAAAwAKcDb8OGDbV//37NmjVLZcuWVVZWlrp166ZBgwY5PYsaERGhU6dOaezYsUpJSVFISIji4+NtH2RLSkqSm9v/JqH/+OMPNW3a1PZ86tSpmjp1qtq3b6/ExERJ0u+//66ePXvqzJkzqly5stq2batffvlFlStXdvZSAQAAYAAmq5Mb6CYlJSkgIEAmk6nAY3fddZfLiisumZmZ8vX1VUZGhnx8fG7KaxbwdgIwoEJsWW4IpvHc5IA7gTXm5t3knMlrTq/hDQoK0qlTp/K1nzlzRkFBQc4OBwAAABQppwOv1WotcHY3KytLXl5eLikKAAAAcBWH1/Be2afWZDJpzJgxdluT5eXlacOGDQoJCXF5gQAAAMCNcDjwbt26VdLlGd6dO3fKw8PDdszDw0NNmjRRVFSU6ysEAAAAboDDgXf16tWSpL59+2rGjBk37cNcAAAAwI1weluyBQsWFEUdAAAAQJFwKPB269ZNcXFx8vHxUbdu3a7Z95tvvnFJYQAAAIArOBR4fX19bTsz+Pr6FmlBAAAAgCs5FHj/uoyBJQ0AAAC4nTi9Dy8AAABwOyHwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQ3PqiydOnz6t+fPna/369UpJSZEkmc1mtW7dWn369FHlypWLpEgAAACgsBye4f31119Vp04dzZw5U76+vrrvvvt03333ydfXVzNnzlS9evW0adOmoqwVAAAAcJrDM7xDhgxR9+7dNXfuXNuXUFxhtVo1cOBADRkyROvXr3d5kQAAAEBhORx4t2/frri4uHxhV5JMJpOGDx+upk2burQ4AAAA4EY5vKTBbDZr48aNVz2+ceNG+fn5uaQoAAAAwFUcnuGNiorSgAEDtHnzZnXs2NEWblNTU5WQkKB58+Zp6tSpRVYoAAAAUBgOB95BgwapUqVKmj59ut59913l5eVJktzd3dWsWTPFxcXp6aefLrJCAQAAgMJwaluyiIgIRURE6OLFizp9+rQkqVKlSipZsmSRFAcAAADcKKcC7xUlS5ZUhQoVbD8DAAAAtyqnvmntp59+0kMPPaTy5curVKlSKlWqlMqXL6+HHnpIq1atKqoaAQAAgEJzOPAuXLhQDz30kHx9fTV9+nQtX75cy5cv1/Tp01WuXDk99NBD+uijj4qyVgAAAMBpJqvVanWkY506dTRs2DANGjSowOPvvvuupk+frgMHDri0wOKQmZkpX19fZWRkyMfH56a8ZgHbGwMwIMfuuMZjGs9NDrgTWGNu3k3Ombzm8AxvUlKSwsLCrnq8Y8eO+v333x2vEgAAALgJHA68d999tz788MOrHp8/f74aNGjgkqIAAAAAV3F4l4Zp06bpkUceUXx8vMLCwvJ98cThw4f1/fffF1mhAAAAQGE4HHg7dOigXbt2ac6cOfrll1+UkpIi6fJXDnfp0kUDBw5UYGBgUdUJAAAAFIpT+/AGBgbqrbfeKqpaAAAAAJdzah9eAAAA4HbjssC7fft2ubu7u2o4AAAAwCVcOsPr4Ja+AAAAwE3j8Brebt26XfN4RkaGTHx7AgAAAG4xDgfe7777Tg8++KBtO7K/y8vLc1lRAAAAgKs4HHjr16+vJ598Uv369Svw+LZt27R8+XKXFQYAAAC4gsNreJs1a6YtW7Zc9binp6fuuusulxQFAAAAuIrDM7xz58695rKF+vXr68iRIy4pCgAAAHAVhwOvp6dnUdYBAAAAFAm+eAIAAACGRuAFAACAoRF4AQAAYGgEXgAAABiaw4H3rrvu0uDBg7Vy5UpdunSpKGsCAAAAXMbhwPvRRx/J09NTgwYNUqVKlRQREaFPPvlE6enpRVgeAAAAcGMcDrzt27fXtGnTdODAAa1bt04hISF65513ZDab9cADDyg2NlaHDx8uyloBAAAApxVqDe/dd9+t6Oho/fLLLzp69Kh69uyphIQENWzYUA0bNtT333/v6joBAACAQnH4iyeuxmw26/nnn9fzzz+vnJwcrVixgi+pAAAAwC3jhgPvX5UqVUpPPPGEK4cEAAAAbgjbkgEAAMDQCLwAAAAwNAIvAAAADK3QgffgwYNasWKFzp07J0myWq0uKwoAAABwFacD75kzZxQWFqY6derooYceUnJysiSpX79+GjFihMsLBAAAAG6E04F3+PDhKlGihJKSklSqVClbe0REhOLj411aHAAAAHCjnN6WbOXKlVqxYoWqV69u1167dm0dO3bMZYUBAAAAruD0DG92drbdzO4VaWlpfOEEAAAAbjlOB9527dpp0aJFtucmk0kWi0VTpkzR/fff79LiAAAAgBvl9JKGKVOmqGPHjtq0aZMuXLigUaNG6bffflNaWprWrVtXFDUCAAAAheb0DG/Dhg21f/9+tW3bVo8//riys7PVrVs3bd26VTVr1iyKGgEAAIBCc3qGV5J8fX312muvuboWAAAAwOUKFXjPnz+vHTt26OTJk7JYLHbHHnvsMZcUBgAAALiC04E3Pj5evXr10unTp/MdM5lMysvLc0lhAAAAgCs4vYZ3yJAh6t69u5KTk2WxWOwehF0AAADcapwOvKmpqYqMjJSfn19R1AMAAAC4lNOB96mnnlJiYqLLCpg9e7YCAwPl5eWl0NBQbdy48ap9f/vtNz355JMKDAyUyWRSbGzsDY8JAAAAY3N6De+sWbPUvXt3/fzzz2rUqJFKlixpd3zo0KEOj7V48WJFRkZq7ty5Cg0NVWxsrMLDw7Vv3z5VqVIlX/+cnBwFBwere/fuGj58uEvGBAAAgLGZrFar1ZkTPvzwQw0cOFBeXl6qWLGiTCbT/wYzmXT48GGHxwoNDVWLFi00a9YsSZLFYlFAQICGDBmi0aNHX/PcwMBAvfzyy3r55ZddNuYVmZmZ8vX1VUZGhnx8fBy+nhvxl7cRgIE5d8c1DtN4bnLAncAac/Nucs7kNaeXNLz22msaP368MjIydPToUR05csT2cCbsXrhwQZs3b1ZYWNj/inFzU1hYmNavX+9sWTc0Zm5urjIzM+0eAAAAMAanA++FCxcUEREhNzenT7Vz+vRp5eXl5fvwm5+fn1JSUm7qmJMmTZKvr6/tERAQUKjXBwAAwK3H6dTau3dvLV68uChqKTbR0dHKyMiwPY4fP17cJQEAAMBFnP7QWl5enqZMmaIVK1aocePG+T609vbbbzs0TqVKleTu7q7U1FS79tTUVJnNZmfLuqExPT095enpWajXBAAAwK3N6RnenTt3qmnTpnJzc9OuXbu0detW22Pbtm0Oj+Ph4aFmzZopISHB1maxWJSQkKBWrVo5W1aRjQkAAIDbm9MzvKtXr3bZi0dGRqp3795q3ry5WrZsqdjYWGVnZ6tv376SpF69eqlatWqaNGmSpMvrh3fv3m37+cSJE9q2bZvKlCmjWrVqOTQmAAAA7ixOB15XioiI0KlTpzR27FilpKQoJCRE8fHxtg+dJSUl2X047o8//lDTpk1tz6dOnaqpU6eqffv2ti/DuN6YAAAAuLM4tA9vt27dFBcXJx8fH3Xr1u2afb/55huXFVdc2IcXQFFhH14ARnar7sPr0Ayvr6+v7QsmfH19b7xCAAAA4CZxKPAuWLBAEyZMUFRUlBYsWFDUNQEAAAAu4/AuDePHj1dWVlZR1gIAAAC4nMOB14GlvgAAAMAtx6l9eE18sgoAAAC3Gae2JatTp851Q29aWtoNFQQAAAC4klOBd/z48ezSAAAAgNuKU4G3R48eqlKlSlHVAgAAALicw2t4Wb8LAACA2xG7NAAAAMDQHF7SYLFYirIOAAAAoEg4tS0ZAAAAcLsh8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADO2WCLyzZ89WYGCgvLy8FBoaqo0bN16z/5dffql69erJy8tLjRo10g8//GB3vE+fPjKZTHaPzp07F+UlAAAA4BZV7IF38eLFioyMVExMjLZs2aImTZooPDxcJ0+eLLD/f//7X/Xs2VP9+vXT1q1b1bVrV3Xt2lW7du2y69e5c2clJyfbHp999tnNuBwAAADcYkxWq9VanAWEhoaqRYsWmjVrliTJYrEoICBAQ4YM0ejRo/P1j4iIUHZ2tpYvX25ru/feexUSEqK5c+dKujzDm56erqVLlzpUQ25urnJzc23PMzMzFRAQoIyMDPn4+NzA1TnOZLopLwOgmBXvHbf4mMZzkwPuBNaYm3eTy8zMlK+vr0N5rVhneC9cuKDNmzcrLCzM1ubm5qawsDCtX7++wHPWr19v11+SwsPD8/VPTExUlSpVVLduXb344os6c+bMVeuYNGmSfH19bY+AgIAbuCoAAADcSoo18J4+fVp5eXny8/Oza/fz81NKSkqB56SkpFy3f+fOnbVo0SIlJCTorbfe0po1a9SlSxfl5eUVOGZ0dLQyMjJsj+PHj9/glQEAAOBWUaK4CygKPXr0sP3cqFEjNW7cWDVr1lRiYqI6duyYr7+np6c8PT1vZokAAAC4SYp1hrdSpUpyd3dXamqqXXtqaqrMZnOB55jNZqf6S1JwcLAqVaqkgwcP3njRAAAAuK0Ua+D18PBQs2bNlJCQYGuzWCxKSEhQq1atCjynVatWdv0l6aeffrpqf0n6/fffdebMGVWtWtU1hQMAAOC2UezbkkVGRmrevHlauHCh9uzZoxdffFHZ2dnq27evJKlXr16Kjo629R82bJji4+M1bdo07d27V+PGjdOmTZs0ePBgSVJWVpZGjhypX375RUePHlVCQoIef/xx1apVS+Hh4cVyjQAAACg+xb6GNyIiQqdOndLYsWOVkpKikJAQxcfH2z6YlpSUJDe3/+Xy1q1b69NPP9Xrr7+uV199VbVr19bSpUvVsGFDSZK7u7t27NihhQsXKj09Xf7+/urUqZMmTpzIOl0AAIA7ULHvw3srcmZfN1dhH17gznCn3nHZhxe4M7APLwAAAFAMCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwNAIvAAAADI3ACwAAAEMj8AIAAMDQCLwAAAAwtFsi8M6ePVuBgYHy8vJSaGioNm7ceM3+X375perVqycvLy81atRIP/zwg91xq9WqsWPHqmrVqvL29lZYWJgOHDhQlJcAAACAW1SxB97FixcrMjJSMTEx2rJli5o0aaLw8HCdPHmywP7//e9/1bNnT/Xr109bt25V165d1bVrV+3atcvWZ8qUKZo5c6bmzp2rDRs2qHTp0goPD9f58+dv1mUBAADgFmGyWq3W4iwgNDRULVq00KxZsyRJFotFAQEBGjJkiEaPHp2vf0REhLKzs7V8+XJb27333quQkBDNnTtXVqtV/v7+GjFihKKioiRJGRkZ8vPzU1xcnHr06HHdmjIzM+Xr66uMjAz5+Pi46EqvzWS6KS8DoJgV7x23+JjGc5MD7gTWmJt3k3Mmr5W4STUV6MKFC9q8ebOio6NtbW5ubgoLC9P69esLPGf9+vWKjIy0awsPD9fSpUslSUeOHFFKSorCwsJsx319fRUaGqr169cXGHhzc3OVm5tre56RkSHp8hsJAK50x95W+Ac24I5wM7PTlddyZO62WAPv6dOnlZeXJz8/P7t2Pz8/7d27t8BzUlJSCuyfkpJiO36l7Wp9/m7SpEkaP358vvaAgADHLgQAHOTrW9wVAEDR8Z18829yZ8+ele91bq7FGnhvFdHR0XazxhaLRWlpaapYsaJMrDVAEcjMzFRAQICOHz9+05bNAMDNxH0ORc1qters2bPy9/e/bt9iDbyVKlWSu7u7UlNT7dpTU1NlNpsLPMdsNl+z/5X/TU1NVdWqVe36hISEFDimp6enPD097drKlSvnzKUAheLj48P/EQAwNO5zKErXm9m9olh3afDw8FCzZs2UkJBga7NYLEpISFCrVq0KPKdVq1Z2/SXpp59+svUPCgqS2Wy265OZmakNGzZcdUwAAAAYV7EvaYiMjFTv3r3VvHlztWzZUrGxscrOzlbfvn0lSb169VK1atU0adIkSdKwYcPUvn17TZs2TQ8//LA+//xzbdq0Se+//74kyWQy6eWXX9Ybb7yh2rVrKygoSGPGjJG/v7+6du1aXJcJAACAYlLsgTciIkKnTp3S2LFjlZKSopCQEMXHx9s+dJaUlCQ3t/9NRLdu3VqffvqpXn/9db366quqXbu2li5dqoYNG9r6jBo1StnZ2RowYIDS09PVtm1bxcfHy8vL66ZfH1AQT09PxcTE5FtKAwBGwX0Ot5Ji34cXAAAAKErF/k1rAAAAQFEi8AIAAMDQCLwAAAAwNAIvUASsVqsGDBigChUqyGQyadu2bdc9x2Qy2b4i++jRow6fBwB/FxgYqNjYWIf736r3nD59+twyOyyNGzfuqvv549ZH4AWKQHx8vOLi4rR8+XIlJyfb7SICAEXt119/1YABA1w6ZlxcnEu+lMmZMD5jxgzFxcXd8GsCxb4tGWBEhw4dUtWqVdW6deviLgXAHahy5crFXcINycvLk8lkcvhbtIDrYYYXcLE+ffpoyJAhSkpKkslkUmBgYIEzGiEhIRo3blyx1Ajg1rJ8+XKVK1dOeXl5kqRt27bJZDJp9OjRtj79+/fXs88+K0lau3at2rVrJ29vbwUEBGjo0KHKzs629f37PWfv3r1q27atvLy81KBBA61atcpuGdUVhw8f1v33369SpUqpSZMmWr9+vSQpMTFRffv2VUZGhkwmk0wm01XvX1arVePGjdNdd90lT09P+fv7a+jQoZKkDh066NixYxo+fLhtHOl/s8fffvutGjRoIE9PTyUlJeVb0tChQwcNHjxYgwcPlq+vrypVqqQxY8borzusBgYGauLEierZs6dKly6tatWqafbs2XY1pqenq3///qpcubJ8fHz0wAMPaPv27XZ9Jk+eLD8/P5UtW1b9+vXT+fPnr/brw22AwAu42IwZMzRhwgRVr15dycnJ+vXXX4u7JAC3uHbt2uns2bPaunWrJGnNmjWqVKmSEhMTbX3WrFmjDh066NChQ+rcubOefPJJ7dixQ4sXL9batWs1ePDgAsfOy8tT165dVapUKW3YsEHvv/++XnvttQL7vvbaa4qKitK2bdtUp04d9ezZU5cuXVLr1q0VGxsrHx8fJScnKzk5WVFRUQWO8fXXX2v69Ol67733dODAAS1dulSNGjWSJH3zzTeqXr26JkyYYBvnipycHL311lv64IMP9Ntvv6lKlSoFjr9w4UKVKFFCGzdu1IwZM/T222/rgw8+sOvz73//W02aNNHWrVs1evRoDRs2TD/99JPtePfu3XXy5En9+OOP2rx5s+655x517NhRaWlpkqQvvvhC48aN07/+9S9t2rRJVatW1bvvvltgPbg9sKQBcDFfX1+VLVtW7u7uMpvNxV0OgNuAr6+vQkJClJiYqObNmysxMVHDhw/X+PHjlZWVpYyMDB08eFDt27fXpEmT9Mwzz+jll1+WJNWuXVszZ85U+/btNWfOnHzfKvrTTz/p0KFDSkxMtN2T3nzzTT344IP56oiKitLDDz8sSRo/frzuvvtuHTx4UPXq1ZOvr69MJtN172tJSUkym80KCwtTyZIlddddd6lly5aSpAoVKsjd3V1ly5bNN87Fixf17rvvqkmTJtccPyAgQNOnT5fJZFLdunW1c+dOTZ8+Xc8//7ytT5s2bWyz43Xq1NG6des0ffp0Pfjgg1q7dq02btyokydP2r4FburUqVq6dKm++uorDRgwQLGxserXr5/69esnSXrjjTe0atUqZnlvY8zwAgBwC2jfvr0SExNltVr1888/q1u3bqpfv77Wrl2rNWvWyN/fX7Vr19b27dsVFxenMmXK2B7h4eGyWCw6cuRIvnH37dungIAAu4B5JYD+XePGjW0/V61aVZJ08uTJq9b8r3/9y66OpKQkde/eXefOnVNwcLCef/55LVmyRJcuXbru9Xt4eNi9/tXce++9tqUQktSqVSsdOHDAthzkSttftWrVSnv27JEkbd++XVlZWapYsaJd7UeOHNGhQ4ckSXv27FFoaGi+MXD7YoYXuAnc3Nz092/xvnjxYjFVA+BW1KFDB82fP1/bt29XyZIlVa9ePXXo0EGJiYn6888/1b59e0lSVlaWXnjhBdu62L+66667bqiGkiVL2n6+EiotFstV+w8cOFBPP/207bm/v79KlCihffv2adWqVfrpp5/00ksv6d///rfWrFljN/7feXt72wXZopKVlaWqVavaLRe5whW7UODWROAFboLKlSvbrVXLzMwscCYGwJ3ryjre6dOn28Jthw4dNHnyZP35558aMWKEJOmee+7R7t27VatWLYfGrVu3ro4fP67U1FT5+flJUqE+W+Dh4WE3iypdXqJQoUKFfH29vb316KOP6tFHH9WgQYNUr1497dy5U/fcc0+B4zhjw4YNds9/+eUX1a5dW+7u7nZtf+9Tv359SZffv5SUFJUoUUKBgYEFvkb9+vW1YcMG9erV66pj4vbCkgbgJnjggQf00Ucf6eeff9bOnTvVu3dvu5szAJQvX16NGzfWJ598og4dOkiS7rvvPm3ZskX79++3heBXXnlF//3vfzV48GBt27ZNBw4c0LJly676obUHH3xQNWvWVO/evbVjxw6tW7dOr7/+uiQ5NaMaGBiorKwsJSQk6PTp08rJySmwX1xcnD788EPt2rVLhw8f1scffyxvb2/VqFHDNs7//d//6cSJEzp9+rTDr39FUlKSIiMjtW/fPn322Wd65513NGzYMLs+69at05QpU7R//37Nnj1bX375pa1PWFiYWrVqpa5du2rlypU6evSo/vvf/+q1117Tpk2bJEnDhg3T/PnztWDBAu3fv18xMTH67bffnK4Vtw4CL3ATREdHq3379nrkkUf08MMPq2vXrqpZs2ZxlwXgFtO+fXvl5eXZAm+FChXUoEEDmc1m1a1bV9LldbZr1qzR/v371a5dOzVt2lRjx46Vv79/gWO6u7tr6dKlysrKUosWLdS/f3/bLg1//4DbtbRu3VoDBw5URESEKleurClTphTYr1y5cpo3b57atGmjxo0ba9WqVfruu+9UsWJFSdKECRN09OhR1axZs1D7Bffq1Uvnzp1Ty5YtNWjQIA0bNizfl2yMGDFCmzZtUtOmTfXGG2/o7bffVnh4uKTLIf+HH37Qfffdp759+6pOnTrq0aOHjh07ZpsBj4iI0JgxYzRq1Cg1a9ZMx44d04svvuh0rbh1mKx/X1gIAAAMbd26dWrbtq0OHjx4W/3Hd4cOHRQSEnLNb2oLDAzUyy+/bNvFApBYwwsAgOEtWbJEZcqUUe3atXXw4EENGzZMbdq0ua3CLnAjCLwAABjc2bNn9corrygpKUmVKlVSWFiYpk2bVtxlATcNSxoAAABgaHxoDQAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AWA64iLi1O5cuVsz8eNG6eQkJBrntOnTx917dq1SOtyxPvvv6+AgAC5ubkpNjbWodqL29GjR2UymbRt27biLgWAQRB4ARhWnz59ZDKZbI+KFSuqc+fO2rFjh1PjREREaP/+/UVUZdHJzMzU4MGD9corr+jEiRMaMGCAoqKilJCQUNyl2RT0HwYBAQFKTk5Ww4YNi6coAIZD4AVgaJ07d1ZycrKSk5OVkJCgEiVK6JFHHnFqDG9vb1WpUqWIKiw6SUlJunjxoh5++GFVrVpVpUqVUpkyZVSxYsUif+2LFy8W+lx3d3eZzWaVKMF3IwFwDQIvAEPz9PSU2WyW2WxWSEiIRo8erePHj+vUqVOSpMTERJlMJqWnp9vO2bZtm0wmk44ePSop/5KGv8vLy1NkZKTKlSunihUratSoUXLkO33WrVunDh06qFSpUipfvrzCw8P1559/SpJyc3M1dOhQValSRV5eXmrbtq1+/fVX27lX6k5ISFDz5s1VqlQptW7dWvv27bPV3KhRI0lScHCw7Xr+vqTh0qVLGjp0qK32V155Rb1797abdQ0MDFRsbKxd7SEhIRo3bpztuclk0pw5c/TYY4+pdOnSevPNN5WXl6d+/fopKChI3t7eqlu3rmbMmGE7Z9y4cVq4cKGWLVtmm4VPTEwscEnDmjVr1LJlS3l6eqpq1aoaPXq0Ll26ZDveoUMHDR06VKNGjVKFChVkNpvt6rNarRo3bpzuuusueXp6yt/fX0OHDr3u7wiAMRB4AdwxsrKy9PHHH6tWrVouneWcNm2a4uLiNH/+fK1du1ZpaWlasmTJNc/Ztm2bOnbsqAYNGmj9+vVau3atHn30UeXl5UmSRo0apa+//loLFy7Uli1bVKtWLYWHhystLc1unNdee03Tpk3Tpk2bVKJECT333HOSLi/DWLVqlSRp48aNSk5OVkBAQL463nrrLX3yySdasGCB1q1bp8zMTC1durRQ78O4ceP0xBNPaOfOnXruuedksVhUvXp1ffnll9q9e7fGjh2rV199VV988YUkKSoqSk8//bTdLHzr1q3zjXvixAk99NBDatGihbZv3645c+boww8/1BtvvGHXb+HChSpdurQ2bNigKVOmaMKECfrpp58kSV9//bWmT5+u9957TwcOHNDSpUtt/0EAwPj49yIAhrZ8+XKVKVNGkpSdna2qVatq+fLlcnNz3X/vx8bGKjo6Wt26dZMkzZ07VytWrLjmOVOmTFHz5s317rvv2truvvtuW51z5sxRXFycunTpIkmaN2+efvrpJ3344YcaOXKk7Zw333xT7du3lySNHj1aDz/8sM6fPy9vb29bqK9cubLMZnOBdbzzzjuKjo7WE088IUmaNWuWfvjhh8K8DfrHP/6hvn372rWNHz/e9nNQUJDWr1+vL774Qk8//bTKlCkjb29v5ebmXrU+SXr33XcVEBCgWbNmyWQyqV69evrjjz/0yiuvaOzYsbbfZePGjRUTEyNJql27tmbNmqWEhAQ9+OCDSkpKktlsVlhYmEqWLKm77rpLLVu2LNR1Arj9MMMLwNDuv/9+bdu2Tdu2bdPGjRsVHh6uLl266NixYy4ZPyMjQ8nJyQoNDbW1lShRQs2bN7/meVdmeAty6NAhXbx4UW3atLG1lSxZUi1bttSePXvs+jZu3Nj2c9WqVSVJJ0+edLj21NRUu+Dn7u6uZs2aOXT+3xV0zbNnz1azZs1UuXJllSlTRu+//76SkpKcGnfPnj1q1aqVTCaTra1NmzbKysrS77//bmv763shXX4/rrwX3bt317lz5xQcHKznn39eS5YssVsSAcDYCLwADK106dKqVauWatWqpRYtWuiDDz5Qdna25s2bJ0m22cG/rrm9kQ9cOcrb29sl45QsWdL285VAaLFYXDL2FW5ubvnWJBf0HpUuXdru+eeff66oqCj169dPK1eu1LZt29S3b19duHDBpfVd8df3Qrr8flx5LwICArRv3z69++678vb21ksvvaT77rvvpvyuARQ/Ai+AO4rJZJKbm5vOnTsn6fI/90tScnKyrY8z+7/6+vqqatWq2rBhg63t0qVL2rx58zXPa9y48VW3B6tZs6Y8PDy0bt06W9vFixf166+/qkGDBg7Xdj2+vr7y8/Oz+zBcXl6etmzZYtevcuXKdu9PZmamjhw5ct3x161bp9atW+ull15S06ZNVatWLR06dMiuj4eHh23d8tXUr19f69evtwvd69atU9myZVW9evXr1nGFt7e3Hn30Uc2cOVOJiYlav369du7c6fD5AG5fBF4Ahpabm6uUlBSlpKRoz549GjJkiLKysvToo49KkmrVqqWAgACNGzdOBw4c0Pfff69p06Y59RrDhg3T5MmTtXTpUu3du1cvvfSS3a4PBYmOjtavv/6ql156STt27NDevXs1Z84cnT59WqVLl9aLL76okSNHKj4+Xrt379bzzz+vnJwc9evXr7BvRYGGDBmiSZMmadmyZdq3b5+GDRumP//80275wAMPPKCPPvpIP//8s3bu3KnevXvL3d39umPXrl1bmzZt0ooVK7R//36NGTPGLlxLl3eA2LFjh/bt26fTp08XOOP60ksv6fjx4xoyZIj27t2rZcuWKSYmRpGRkQ6vxY6Li9OHH36oXbt26fDhw/r444/l7e2tGjVqOHQ+gNsbH1oDYGjx8fG2ta1ly5ZVvXr19OWXX6pDhw6SLv8z+GeffaYXX3xRjRs3VosWLfTGG2+oe/fuDr/GiBEjlJycrN69e8vNzU3PPfecnnjiCWVkZFz1nDp16mjlypV69dVX1bJlS3l7eys0NFQ9e/aUJE2ePFkWi0X//Oc/dfbsWTVv3lwrVqxQ+fLlC/9mFOCVV15RSkqKevXqJXd3dw0YMEDh4eF2gTY6OlpHjhzRI488Il9fX02cONGhGd4XXnhBW7duVUREhEwmk3r27KmXXnpJP/74o63P888/r8TERDVv3lxZWVlavXq1AgMD7capVq2afvjhB40cOVJNmjRRhQoV1K9fP73++usOX2e5cuU0efJkRUZGKi8vT40aNdJ33313U/YkBlD8TFZHNosEANwRLBaL6tevr6effloTJ04s7nIAwCWY4QWAO9ixY8e0cuVKtW/fXrm5uZo1a5aOHDmif/zjH8VdGgC4DGt4AeAO5ubmpri4OLVo0UJt2rTRzp07tWrVKtWvX7+4SwMAl2FJAwAAAAyNGV4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBo/w/SY6hPTdYnBgAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAIjCAYAAADhisjVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABPGElEQVR4nO3deVzU1f7H8feAsriAO0iSgLupYC7klpYo2mqWoreuu5blUkgmleLW1bxumaZlKVpWtqk3MzS54k0zzQWXct8wA1wIEFBQmN8fPpxfE6gzMIh+fT0fj3nknDnf8/18Bx7f3h7PnDGZzWazAAAAAINyKukCAAAAgOJE4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AVwx/r3v/+tgIAAOTs7KygoqKTLwR0uLi5OJpNJcXFxJV0KAAcj8AJwmOjoaJlMJsvDzc1NdevW1bBhw5ScnOzQc61bt06jR49WmzZttHjxYv3rX/9y6Ph3q7i4OHXv3l3e3t5ycXFRtWrV9Pjjj+ubb74p6dIAoNBKlXQBAIxn4sSJ8vf316VLl7Rp0ybNnz9fa9as0b59+1SmTBmHnOO///2vnJyc9NFHH8nFxcUhY97toqKiNHHiRNWpU0fPP/+8atasqfPnz2vNmjV6+umntWzZMv3jH/8o6TKLzYMPPqiLFy/y+wQYEIEXgMN17dpVzZs3lyQNGjRIlStX1syZM7Vq1Sr17t27SGNnZWWpTJkyOnPmjNzd3R0WTsxmsy5duiR3d3eHjHen+eqrrzRx4kQ988wz+vTTT1W6dGnLa6+++qrWrl2ry5cvl2CFxefSpUtycXGRk5OT3NzcSrocAMWAJQ0Ait3DDz8sSTp+/Lil7ZNPPlGzZs3k7u6uSpUqqVevXjp16pTVcR06dFCjRo20Y8cOPfjggypTpoxef/11mUwmLV68WJmZmZblE9HR0ZKkK1euaNKkSapVq5ZcXV3l5+en119/XdnZ2VZj+/n56bHHHtPatWvVvHlzubu76/3337es4/ziiy80YcIE3XPPPSpfvryeeeYZpaWlKTs7Wy+//LKqVaumcuXKqX///vnGXrx4sR5++GFVq1ZNrq6uatiwoebPn5/vfblWw6ZNm9SyZUu5ubkpICBAS5cuzdc3NTVVr7zyivz8/OTq6qoaNWqoT58+OnfunKVPdna2oqKiVLt2bbm6usrX11ejR4/OV19Bxo4dq0qVKmnRokVWYfea0NBQPfbYY5bnZ86c0cCBA+Xl5SU3NzcFBgZqyZIlVsecOHFCJpNJ06dP17x58xQQEKAyZcqoc+fOOnXqlMxmsyZNmqQaNWrI3d1dTz75pFJSUgp8j9atW6egoCC5ubmpYcOG+ZZYpKSkKCIiQo0bN1a5cuXk4eGhrl27avfu3Vb9rv18P//8c7355pu65557VKZMGaWnpxe4hvfw4cN6+umn5e3tLTc3N9WoUUO9evVSWlqapY+9v3O2/LwBOBYzvACK3dGjRyVJlStXliS99dZbGjt2rHr27KlBgwbp7Nmzevfdd/Xggw9q165dqlChguXY8+fPq2vXrurVq5eee+45eXl5qXnz5vrggw+0bds2ffjhh5Kk1q1bS7o6o7xkyRI988wzGjVqlLZu3aopU6Zo//79WrFihVVdBw8eVO/evfX8889r8ODBqlevnuW1KVOmyN3dXWPGjNGRI0f07rvvqnTp0nJyctKff/6p8ePH6+eff1Z0dLT8/f01btw4y7Hz58/XfffdpyeeeEKlSpXSt99+qxdffFF5eXl66aWXrGo4cuSInnnmGQ0cOFB9+/bVokWL1K9fPzVr1kz33XefJCkjI0Pt2rXT/v37NWDAAN1///06d+6c/vOf/+j3339XlSpVlJeXpyeeeEKbNm3SkCFD1KBBA+3du1ezZs3SoUOHtHLlyuv+fA4fPqwDBw5owIABKl++/E1/nhcvXlSHDh105MgRDRs2TP7+/vryyy/Vr18/paamauTIkVb9ly1bppycHA0fPlwpKSmaNm2aevbsqYcfflhxcXF67bXXLO9xRESEFi1alK++sLAwvfDCC+rbt68WL16sHj16KCYmRp06dZIkHTt2TCtXrlSPHj3k7++v5ORkvf/++2rfvr1+++03+fj4WI05adIkubi4KCIiQtnZ2QX+S0FOTo5CQ0OVnZ2t4cOHy9vbW6dPn9bq1auVmpoqT09PSfb9ztny8wZQDMwA4CCLFy82SzKvX7/efPbsWfOpU6fMn3/+ubly5cpmd3d38++//24+ceKE2dnZ2fzWW29ZHbt3715zqVKlrNrbt29vlmResGBBvnP17dvXXLZsWau2+Ph4syTzoEGDrNojIiLMksz//e9/LW01a9Y0SzLHxMRY9d2wYYNZkrlRo0bmnJwcS3vv3r3NJpPJ3LVrV6v+rVq1MtesWdOqLSsrK1+9oaGh5oCAAKu2azX873//s7SdOXPG7Orqah41apSlbdy4cWZJ5m+++SbfuHl5eWaz2Wz++OOPzU5OTuYff/zR6vUFCxaYJZk3b96c79hrVq1aZZZknjVr1nX7/NXs2bPNksyffPKJpS0nJ8fcqlUrc7ly5czp6elms9lsPn78uFmSuWrVqubU1FRL38jISLMkc2BgoPny5cuW9t69e5tdXFzMly5dsrRde4++/vprS1taWpq5evXq5qZNm1raLl26ZM7NzbWq8/jx42ZXV1fzxIkTLW3Xfr4BAQH5fk7XXtuwYYPZbDabd+3aZZZk/vLLL6/7XhTmd+5mP28AjseSBgAOFxISoqpVq8rX11e9evVSuXLltGLFCt1zzz365ptvlJeXp549e+rcuXOWh7e3t+rUqaMNGzZYjeXq6qr+/fvbdN41a9ZIksLDw63aR40aJUn67rvvrNr9/f0VGhpa4Fh9+vSx+qf94OBgmc1mDRgwwKpfcHCwTp06pStXrlja/roOOC0tTefOnVP79u117Ngxq38Kl6SGDRuqXbt2ludVq1ZVvXr1dOzYMUvb119/rcDAQD311FP56jSZTJKkL7/8Ug0aNFD9+vWt3tdry0n+/r7+VXp6uiTZNLsrXX2fvb29rdZjly5dWiNGjFBGRoY2btxo1b9Hjx6W2VDp6nsmSc8995xKlSpl1Z6Tk6PTp09bHe/j42N17R4eHurTp4927dqlpKQkSVd/T5ycrv4vLTc3V+fPn1e5cuVUr1497dy5M9819O3b96brta/VvHbtWmVlZV33vZBs/52z5ecNwPFY0gDA4ebNm6e6deuqVKlS8vLyUr169Sxh5PDhwzKbzapTp06Bx/59/eg999xj8wfTTp48KScnJ9WuXduq3dvbWxUqVNDJkyet2v39/a871r333mv1/Fr48fX1zdeel5entLQ0y5KNzZs3KyoqSlu2bMkXlNLS0qzC39/PI0kVK1bUn3/+aXl+9OhRPf3009etVbr6vu7fv19Vq1Yt8PUzZ85c91gPDw9J0oULF254jmtOnjypOnXqWH6m1zRo0MDy+l/Z815Ksrp2Sapdu7Yl2F9Tt25dSVfXCXt7eysvL0/vvPOO3nvvPR0/fly5ubmWvtd+Ln91o5/9X/uEh4dr5syZWrZsmdq1a6cnnnhCzz33nKVWe3/nbPl5A3A8Ai8Ah2vZsqVll4a/y8vLk8lk0vfffy9nZ+d8r5crV87qeWF2Tfh7OLqeG41dUG03ajebzZKuhtOOHTuqfv36mjlzpnx9feXi4qI1a9Zo1qxZysvLs2s8W+Xl5alx48aaOXNmga//PVz+Vf369SVJe/futeuctirse2mPf/3rXxo7dqwGDBigSZMmqVKlSnJyctLLL7+c7z2XbP+9mjFjhvr166dVq1Zp3bp1GjFihKZMmaKff/5ZNWrUsPSz9XfOkdcMwHYEXgC3VK1atWQ2m+Xv72+ZpXOUmjVrKi8vT4cPH7bMNkpScnKyUlNTVbNmTYeeryDffvutsrOz9Z///MdqNu9GSwpuplatWtq3b99N++zevVsdO3a0OXxdU7duXdWrV0+rVq3SO++8k+8vHX9Xs2ZN7dmzR3l5eVazvAcOHLC87khHjhyR2Wy2uq5Dhw5JurrzgXR1W7WHHnpIH330kdWxqampqlKlSpHO37hxYzVu3FhvvvmmfvrpJ7Vp00YLFizQ5MmTb4vfOQA3xxpeALdU9+7d5ezsrAkTJuSb1TKbzTp//nyhx37kkUckSbNnz7Zqvzbr+eijjxZ6bFtdm8H767WlpaVp8eLFhR7z6aef1u7du/N94v+v5+nZs6dOnz6thQsX5utz8eJFZWZm3vAcEyZM0Pnz5zVo0CCr9cjXrFu3TqtXr5Z09X1OSkrS8uXLLa9fuXJF7777rsqVK6f27dvbdX0388cff1hde3p6upYuXaqgoCB5e3tLuvq+//336csvv8y3Htge6enp+d6Lxo0by8nJybLl2O3wOwfg5pjhBXBL1apVS5MnT1ZkZKROnDihbt26qXz58jp+/LhWrFihIUOGKCIiolBjBwYGqm/fvvrggw+Umpqq9u3ba9u2bVqyZIm6deumhx56yMFXk1/nzp3l4uKixx9/XM8//7wyMjK0cOFCVatWTYmJiYUa89VXX9VXX32lHj16aMCAAWrWrJlSUlL0n//8RwsWLFBgYKD++c9/6osvvtALL7ygDRs2qE2bNsrNzdWBAwf0xRdfWPYbvp6wsDDt3btXb731lnbt2qXevXtbvmktJiZGsbGx+vTTTyVJQ4YM0fvvv69+/fppx44d8vPz01dffaXNmzdr9uzZNn/4zVZ169bVwIED9csvv8jLy0uLFi1ScnKy1V8iHnvsMU2cOFH9+/dX69attXfvXi1btkwBAQGFPu9///tfDRs2TD169FDdunV15coVffzxx3J2drasqb4dfucA3ByBF8AtN2bMGNWtW1ezZs3ShAkTJF1dY9q5c2c98cQTRRr7ww8/VEBAgKKjo7VixQp5e3srMjJSUVFRjij9purVq6evvvpKb775piIiIuTt7a2hQ4eqatWq+XZ4sFW5cuX0448/KioqSitWrNCSJUtUrVo1dezY0bKO1MnJSStXrtSsWbO0dOlSrVixQmXKlFFAQIBGjhxp0/KRyZMn6+GHH9acOXM0f/58paSkqGLFinrggQe0atUqy8/G3d1dcXFxGjNmjJYsWaL09HTVq1dPixcvVr9+/Qp1jTdSp04dvfvuu3r11Vd18OBB+fv7a/ny5VY7bLz++uvKzMzUp59+quXLl+v+++/Xd999pzFjxhT6vIGBgQoNDdW3336r06dPq0yZMgoMDNT333+vBx54wNKvpH/nANycycxKeQDAbcrPz0+NGjWyLKcAgMJgDS8AAAAMjcALAAAAQyPwAgAAwNBYwwsAAABDY4YXAAAAhkbgBQAAgKGxD28B8vLy9Mcff6h8+fJ2f0UnAAAAip/ZbNaFCxfk4+Nj9TXnBSHwFuCPP/6Qr69vSZcBAACAmzh16pTlS3iuh8BbgGtfi3nq1Cl5eHiUcDUAAAD4u/T0dPn6+tr0deYE3gJcW8bg4eFB4AUAALiN2bL8lA+tAQAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvAAAADA0Ai8AAAAMrVRJF4CrTKaSrgDArWA2l3QFJcM0gZsccDcwR92eNzlmeAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKEReAEAAGBoBF4AAAAYGoEXAAAAhkbgBQAAgKHdFoF33rx58vPzk5ubm4KDg7Vt27br9l24cKHatWunihUrqmLFigoJCcnXv1+/fjKZTFaPLl26FPdlAAAA4DZU4oF3+fLlCg8PV1RUlHbu3KnAwECFhobqzJkzBfaPi4tT7969tWHDBm3ZskW+vr7q3LmzTp8+bdWvS5cuSkxMtDw+++yzW3E5AAAAuM2YzGazuSQLCA4OVosWLTR37lxJUl5ennx9fTV8+HCNGTPmpsfn5uaqYsWKmjt3rvr06SPp6gxvamqqVq5cWaia0tPT5enpqbS0NHl4eBRqDHuZTLfkNABKWMnecUuOaQI3OeBuYI66dTc5e/Jaic7w5uTkaMeOHQoJCbG0OTk5KSQkRFu2bLFpjKysLF2+fFmVKlWyao+Li1O1atVUr149DR06VOfPn7/uGNnZ2UpPT7d6AAAAwBhKNPCeO3dOubm58vLysmr38vJSUlKSTWO89tpr8vHxsQrNXbp00dKlSxUbG6u3335bGzduVNeuXZWbm1vgGFOmTJGnp6fl4evrW/iLAgAAwG2lVEkXUBRTp07V559/rri4OLm5uVnae/XqZflz48aN1aRJE9WqVUtxcXHq2LFjvnEiIyMVHh5ueZ6enk7oBQAAMIgSneGtUqWKnJ2dlZycbNWenJwsb2/vGx47ffp0TZ06VevWrVOTJk1u2DcgIEBVqlTRkSNHCnzd1dVVHh4eVg8AAAAYQ4kGXhcXFzVr1kyxsbGWtry8PMXGxqpVq1bXPW7atGmaNGmSYmJi1Lx585ue5/fff9f58+dVvXp1h9QNAACAO0eJb0sWHh6uhQsXasmSJdq/f7+GDh2qzMxM9e/fX5LUp08fRUZGWvq//fbbGjt2rBYtWiQ/Pz8lJSUpKSlJGRkZkqSMjAy9+uqr+vnnn3XixAnFxsbqySefVO3atRUaGloi1wgAAICSU+JreMPCwnT27FmNGzdOSUlJCgoKUkxMjOWDbAkJCXJy+v9cPn/+fOXk5OiZZ56xGicqKkrjx4+Xs7Oz9uzZoyVLlig1NVU+Pj7q3LmzJk2aJFdX11t6bQAAACh5Jb4P7+2IfXgBFJe79Y7LPrzA3YF9eAEAAIASQOAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGgEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGi3ReCdN2+e/Pz85ObmpuDgYG3btu26fRcuXKh27dqpYsWKqlixokJCQvL1N5vNGjdunKpXry53d3eFhITo8OHDxX0ZAAAAuA2VeOBdvny5wsPDFRUVpZ07dyowMFChoaE6c+ZMgf3j4uLUu3dvbdiwQVu2bJGvr686d+6s06dPW/pMmzZNc+bM0YIFC7R161aVLVtWoaGhunTp0q26LAAAANwmTGaz2VySBQQHB6tFixaaO3euJCkvL0++vr4aPny4xowZc9Pjc3NzVbFiRc2dO1d9+vSR2WyWj4+PRo0apYiICElSWlqavLy8FB0drV69et10zPT0dHl6eiotLU0eHh5Fu0AbmUy35DQASljJ3nFLjmkCNzngbmCOunU3OXvyWonO8Obk5GjHjh0KCQmxtDk5OSkkJERbtmyxaYysrCxdvnxZlSpVkiQdP35cSUlJVmN6enoqODj4umNmZ2crPT3d6gEAAABjKNHAe+7cOeXm5srLy8uq3cvLS0lJSTaN8dprr8nHx8cScK8dZ8+YU6ZMkaenp+Xh6+tr76UAAADgNlXia3iLYurUqfr888+1YsUKubm5FXqcyMhIpaWlWR6nTp1yYJUAAAAoSaVK8uRVqlSRs7OzkpOTrdqTk5Pl7e19w2OnT5+uqVOnav369WrSpIml/dpxycnJql69utWYQUFBBY7l6uoqV1fXQl4FAAAAbmclOsPr4uKiZs2aKTY21tKWl5en2NhYtWrV6rrHTZs2TZMmTVJMTIyaN29u9Zq/v7+8vb2txkxPT9fWrVtvOCYAAACMqURneCUpPDxcffv2VfPmzdWyZUvNnj1bmZmZ6t+/vySpT58+uueeezRlyhRJ0ttvv61x48bp008/lZ+fn2Vdbrly5VSuXDmZTCa9/PLLmjx5surUqSN/f3+NHTtWPj4+6tatW0ldJgAAAEpIiQfesLAwnT17VuPGjVNSUpKCgoIUExNj+dBZQkKCnJz+fyJ6/vz5ysnJ0TPPPGM1TlRUlMaPHy9JGj16tDIzMzVkyBClpqaqbdu2iomJKdI6XwAAANyZSnwf3tsR+/ACKC536x2XfXiBuwP78AIAAAAlgMALAAAAQ7N7DW92dra2bt2qkydPKisrS1WrVlXTpk3l7+9fHPUBAAAARWJz4N28ebPeeecdffvtt7p8+bI8PT3l7u6ulJQUZWdnKyAgQEOGDNELL7yg8uXLF2fNAAAAgM1sWtLwxBNPKCwsTH5+flq3bp0uXLig8+fP6/fff1dWVpYOHz6sN998U7Gxsapbt65++OGH4q4bAAAAsIlNM7yPPvqovv76a5UuXbrA1wMCAhQQEKC+ffvqt99+U2JiokOLBAAAAArLpsD7/PPP2zxgw4YN1bBhw0IXBAAAADiS3bs0/PLLL9q6dWu+9q1bt2r79u0OKQoAAABwFLsD70svvaRTp07laz99+rReeuklhxQFAAAAOIrdgfe3337T/fffn6+9adOm+u233xxSFAAAAOAodgdeV1dXJScn52tPTExUqVJ2b+sLAAAAFCu7A2/nzp0VGRmptLQ0S1tqaqpef/11derUyaHFAQAAAEVl95Ts9OnT9eCDD6pmzZpq2rSpJCk+Pl5eXl76+OOPHV4gAAAAUBR2B9577rlHe/bs0bJly7R79265u7urf//+6t2793X36QUAAABKSqEW3ZYtW1ZDhgxxdC0AAACAw9m9hleSPv74Y7Vt21Y+Pj46efKkJGnWrFlatWqVQ4sDAAAAisruwDt//nyFh4era9eu+vPPP5WbmytJqlixombPnu3o+gAAAIAisTvwvvvuu1q4cKHeeOMNq23Imjdvrr179zq0OAAAAKCo7A68x48ft+zO8Feurq7KzMx0SFEAAACAo9gdeP39/RUfH5+vPSYmRg0aNHBETQAAAIDD2L1LQ3h4uF566SVdunRJZrNZ27Zt02effaYpU6boww8/LI4aAQAAgEKzO/AOGjRI7u7uevPNN5WVlaV//OMf8vHx0TvvvKNevXoVR40AAABAoRVqH95nn31Wzz77rLKyspSRkaFq1ao5ui4AAADAIexew3vx4kVlZWVJksqUKaOLFy9q9uzZWrduncOLAwAAAIrK7sD75JNPaunSpZKk1NRUtWzZUjNmzNCTTz6p+fPnO7xAAAAAoCjsDrw7d+5Uu3btJElfffWVvL29dfLkSS1dulRz5sxxeIEAAABAUdgdeLOyslS+fHlJ0rp169S9e3c5OTnpgQcesHzNMAAAAHC7sDvw1q5dWytXrtSpU6e0du1ade7cWZJ05swZeXh4OLxAAAAAoCjsDrzjxo1TRESE/Pz8FBwcrFatWkm6Ottb0DewAQAAACXJ7m3JnnnmGbVt21aJiYkKDAy0tHfs2FFPPfWUQ4sDAAAAiqpQ+/B6e3vL29vbqq1ly5YOKQgAAABwJJuWNLzwwgv6/fffbRpw+fLlWrZsWZGKAgAAABzFphneqlWr6r777lObNm30+OOPq3nz5vLx8ZGbm5v+/PNP/fbbb9q0aZM+//xz+fj46IMPPijuugEAAACbmMxms9mWjsnJyfrwww/1+eef67fffrN6rXz58goJCdGgQYPUpUuXYin0VkpPT5enp6fS0tJu2c4TJtMtOQ2AEmbbHdd4TBO4yQF3A3PUrbvJ2ZPXbA68f/Xnn38qISFBFy9eVJUqVVSrVi2ZDJTYCLwAiguBF4CR3a6Bt1AfWqtYsaIqVqxYqOIAAACAW8nufXgBAACAOwmBFwAAAIZG4AUAAIChEXgBAABgaHYH3osXLyorK8vy/OTJk5o9e7bWrVvn0MIAAAAAR7A78D755JNaunSpJCk1NVXBwcGaMWOGnnzySc2fP9/hBQIAAABFYXfg3blzp9q1aydJ+uqrr+Tl5aWTJ09q6dKlmjNnjsMLBAAAAIrC7sCblZWl8uXLS5LWrVun7t27y8nJSQ888IBOnjzp8AIBAACAorA78NauXVsrV67UqVOntHbtWnXu3FmSdObMmVv2rWQAAACArewOvOPGjVNERIT8/PwUHBysVq1aSbo629u0aVOHFwgAAAAUhd1fLfzMM8+obdu2SkxMVGBgoKW9Y8eOeuqppxxaHAAAAFBUdgdeSfL29pa3t7dVW8uWLR1SEAAAAOBIdgfezMxMTZ06VbGxsTpz5ozy8vKsXj927JjDigMAAACKyu7AO2jQIG3cuFH//Oc/Vb16dZlMpuKoCwAAAHAIuwPv999/r++++05t2rQpjnoAAAAAh7J7l4aKFSuqUqVKxVELAAAA4HB2B95JkyZp3LhxysrKKo56AAAAAIeye0nDjBkzdPToUXl5ecnPz0+lS5e2en3nzp0OKw4AAAAoKrsDb7du3YqhDAAAAKB42B14o6KiiqMOAAAAoFgU6osnJGnHjh3av3+/JOm+++7ja4UBAABwW7I78J45c0a9evVSXFycKlSoIElKTU3VQw89pM8//1xVq1Z1dI0AAABAodm9S8Pw4cN14cIF/frrr0pJSVFKSor27dun9PR0jRgxojhqBAAAAArN7hnemJgYrV+/Xg0aNLC0NWzYUPPmzVPnzp0dWhwAAABQVHbP8Obl5eXbikySSpcurby8PIcUBQAAADiK3YH34Ycf1siRI/XHH39Y2k6fPq1XXnlFHTt2dGhxAAAAQFHZHXjnzp2r9PR0+fn5qVatWqpVq5b8/f2Vnp6ud999tzhqBAAAAArN7jW8vr6+2rlzp9avX68DBw5Ikho0aKCQkBCHFwcAAAAUVaH24TWZTOrUqZM6derk6HoAAAAAh7Ip8M6ZM0dDhgyRm5ub5syZc8O+bE0GAACA24nJbDabb9bJ399f27dvV+XKleXv73/9wUwmHTt2zKEFloT09HR5enoqLS1NHh4et+ScJtMtOQ2AEnbzO64xmSZwkwPuBuaoW3eTsyev2TTDe/z48QL/DAAAANzu7N6lYeLEicrKysrXfvHiRU2cONEhRQEAAACOYnfgnTBhgjIyMvK1Z2VlacKECQ4pCgAAAHAUuwOv2WyWqYAFp7t371alSpUcUhQAAADgKDYH3ooVK6pSpUoymUyqW7euKlWqZHl4enqqU6dO6tmzp90FzJs3T35+fnJzc1NwcLC2bdt23b6//vqrnn76afn5+clkMmn27Nn5+owfP14mk8nqUb9+fbvrAgAAgDHYvA/v7NmzZTabNWDAAE2YMEGenp6W11xcXOTn56dWrVrZdfLly5crPDxcCxYsUHBwsGbPnq3Q0FAdPHhQ1apVy9c/KytLAQEB6tGjh1555ZXrjnvfffdp/fr1luelShVqu2EAAAAYgM1JsG/fvpKublHWunVrlS5dusgnnzlzpgYPHqz+/ftLkhYsWKDvvvtOixYt0pgxY/L1b9GihVq0aCFJBb5+TalSpeTt7V3k+gAAAHDns3sNb/v27S1h99KlS0pPT7d62ConJ0c7duyw+kpiJycnhYSEaMuWLfaWZeXw4cPy8fFRQECAnn32WSUkJNywf3Z2dqGvAwAAALc3uwNvVlaWhg0bpmrVqqls2bKqWLGi1cNW586dU25urry8vKzavby8lJSUZG9ZFsHBwYqOjlZMTIzmz5+v48ePq127drpw4cJ1j5kyZYo8PT0tD19f30KfHwAAALcXuwPvq6++qv/+97+aP3++XF1d9eGHH2rChAny8fHR0qVLi6NGu3Tt2lU9evRQkyZNFBoaqjVr1ig1NVVffPHFdY+JjIxUWlqa5XHq1KlbWDEAAACKk92f5vr222+1dOlSdejQQf3791e7du1Uu3Zt1axZU8uWLdOzzz5r0zhVqlSRs7OzkpOTrdqTk5Mduv62QoUKqlu3ro4cOXLdPq6urnJ1dXXYOQEAAHD7sHuGNyUlRQEBAZIkDw8PpaSkSJLatm2r//3vfzaP4+LiombNmik2NtbSlpeXp9jYWLt3e7iRjIwMHT16VNWrV3fYmAAAALhz2B14AwICdPz4cUlS/fr1LUsFvv32W1WoUMGuscLDw7Vw4UItWbJE+/fv19ChQ5WZmWnZtaFPnz6KjIy09M/JyVF8fLzi4+OVk5Oj06dPKz4+3mr2NiIiQhs3btSJEyf0008/6amnnpKzs7N69+5t76UCAADAAOxe0tC/f3/t3r1b7du315gxY/T4449r7ty5unz5smbOnGnXWGFhYTp79qzGjRunpKQkBQUFKSYmxvJBtoSEBDk5/X8m/+OPP9S0aVPL8+nTp2v69Olq37694uLiJEm///67evfurfPnz6tq1apq27atfv75Z1WtWtXeSwUAAIABmMxms7koA5w8eVI7duxQ7dq11aRJE0fVVaLS09Pl6emptLQ0eXh43JJzFvBtzQAMqGh33DuXaQI3OeBuYI66dTc5e/KaXUsaLl++rI4dO+rw4cOWtpo1a6p79+6GCbsAAAAwFrsCb+nSpbVnz57iqgUAAABwOLs/tPbcc8/po48+Ko5aAAAAAIez+0NrV65c0aJFi7R+/Xo1a9ZMZcuWtXrd3g+uAQAAAMXJ7sC7b98+3X///ZKkQ4cOWb1m4pNXAAAAuM3YHXg3bNhQHHUAAAAAxcLuNbzXHDlyRGvXrtXFixclSUXc3QwAAAAoFnYH3vPnz6tjx46qW7euHnnkESUmJkqSBg4cqFGjRjm8QAAAAKAo7A68r7zyikqXLq2EhASVKVPG0h4WFqaYmBiHFgcAAAAUld1reNetW6e1a9eqRo0aVu116tTRyZMnHVYYAAAA4Ah2z/BmZmZazexek5KSIldXV4cUBQAAADiK3YG3Xbt2Wrp0qeW5yWRSXl6epk2bpoceesihxQEAAABFZfeShmnTpqljx47avn27cnJyNHr0aP36669KSUnR5s2bi6NGAAAAoNDsnuFt1KiRDh06pLZt2+rJJ59UZmamunfvrl27dqlWrVrFUSMAAABQaHbP8CYkJMjX11dvvPFGga/de++9DikMAAAAcAS7Z3j9/f119uzZfO3nz5+Xv7+/Q4oCAAAAHMXuwGs2m2UymfK1Z2RkyM3NzSFFAQAAAI5i85KG8PBwSVd3ZRg7dqzV1mS5ubnaunWrgoKCHF4gAAAAUBQ2B95du3ZJujrDu3fvXrm4uFhec3FxUWBgoCIiIhxfIQAAAFAENgfeDRs2SJL69++vd955Rx4eHsVWFAAAAOAodu/SsHjx4uKoAwAAACgWNgXe7t27Kzo6Wh4eHurevfsN+37zzTcOKQwAAABwBJsCr6enp2VnBk9Pz2ItCAAAAHAkmwLvX5cxsKQBAAAAdxK79+EFAAAA7iQEXgAAABgagRcAAACGRuAFAACAoRF4AQAAYGh2ffHEuXPntGjRIm3ZskVJSUmSJG9vb7Vu3Vr9+vVT1apVi6VIAAAAoLBsnuH95ZdfVLduXc2ZM0eenp568MEH9eCDD8rT01Nz5sxR/fr1tX379uKsFQAAALCbzTO8w4cPV48ePbRgwQLLl1BcYzab9cILL2j48OHasmWLw4sEAAAACsvmwLt7925FR0fnC7uSZDKZ9Morr6hp06YOLQ4AAAAoKpuXNHh7e2vbtm3XfX3btm3y8vJySFEAAACAo9g8wxsREaEhQ4Zox44d6tixoyXcJicnKzY2VgsXLtT06dOLrVAAAACgMGwOvC+99JKqVKmiWbNm6b333lNubq4kydnZWc2aNVN0dLR69uxZbIUCAAAAhWHXtmRhYWEKCwvT5cuXde7cOUlSlSpVVLp06WIpDgAAACgquwLvNaVLl1alSpUsfwYAAABuV3Z909oPP/ygRx55RBUrVlSZMmVUpkwZVaxYUY888ojWr19fXDUCAAAAhWZz4F2yZIkeeeQReXp6atasWVq9erVWr16tWbNmqUKFCnrkkUf08ccfF2etAAAAgN1MZrPZbEvHunXrauTIkXrppZcKfP29997TrFmzdPjwYYcWWBLS09Pl6emptLQ0eXh43JJzFrC9MQADsu2OazymCdzkgLuBOerW3eTsyWs2z/AmJCQoJCTkuq937NhRv//+u+1VAgAAALeAzYH3vvvu00cffXTd1xctWqSGDRs6pCgAAADAUWzepWHGjBl67LHHFBMTo5CQkHxfPHHs2DF99913xVYoAAAAUBg2B94OHTpo3759mj9/vn7++WclJSVJuvqVw127dtULL7wgPz+/4qoTAAAAKBS79uH18/PT22+/XVy1AAAAAA5n1z68AAAAwJ3GYYF39+7dcnZ2dtRwAAAAgEM4dIbXxi19AQAAgFvG5jW83bt3v+HraWlpMvHtCQAAALjN2Bx4v/32W3Xq1MmyHdnf5ebmOqwoAAAAwFFsDrwNGjTQ008/rYEDBxb4enx8vFavXu2wwgAAAABHsHkNb7NmzbRz587rvu7q6qp7773XIUUBAAAAjmLzDO+CBQtuuGyhQYMGOn78uEOKAgAAABzF5sDr6upanHUAAAAAxYIvngAAAIChEXgBAABgaAReAAAAGBqBFwAAAIZmc+C99957NWzYMK1bt05XrlwpzpoAAAAAh7E58H788cdydXXVSy+9pCpVqigsLEzLli1TampqMZYHAAAAFI3Ngbd9+/aaMWOGDh8+rM2bNysoKEjvvvuuvL299fDDD2v27Nk6duxYcdYKAAAA2K1Qa3jvu+8+RUZG6ueff9aJEyfUu3dvxcbGqlGjRmrUqJG+++47R9cJAAAAFIrNXzxxPd7e3ho8eLAGDx6srKwsrV27li+pAAAAwG2jyIH3r8qUKaOnnnrKkUMCAAAARcK2ZAAAADA0Ai8AAAAMjcALAAAAQyt04D1y5IjWrl2rixcvSpLMZrPDigIAAAAcxe7Ae/78eYWEhKhu3bp65JFHlJiYKEkaOHCgRo0a5fACAQAAgKKwO/C+8sorKlWqlBISElSmTBlLe1hYmGJiYhxaHAAAAFBUdgfedevW6e2331aNGjWs2uvUqaOTJ0/aXcC8efPk5+cnNzc3BQcHa9u2bdft++uvv+rpp5+Wn5+fTCaTZs+eXeQxAQAAYGx2B97MzEyrmd1rUlJS7P7CieXLlys8PFxRUVHauXOnAgMDFRoaqjNnzhTYPysrSwEBAZo6daq8vb0dMiYAAACMze7A265dOy1dutTy3GQyKS8vT9OmTdNDDz1k11gzZ87U4MGD1b9/fzVs2FALFixQmTJltGjRogL7t2jRQv/+97/Vq1ev64Zre8cEAACAsdn9TWvTpk1Tx44dtX37duXk5Gj06NH69ddflZKSos2bN9s8Tk5Ojnbs2KHIyEhLm5OTk0JCQrRlyxZ7yyrSmNnZ2crOzrY8T09PL9T5AQAAcPuxe4a3UaNGOnTokNq2basnn3xSmZmZ6t69u3bt2qVatWrZPM65c+eUm5srLy8vq3YvLy8lJSXZW1aRxpwyZYo8PT0tD19f30KdHwAAALcfu2d4JcnT01NvvPGGo2spMZGRkQoPD7c8T09PJ/QCAAAYRKEC76VLl7Rnzx6dOXNGeXl5Vq898cQTNo1RpUoVOTs7Kzk52ao9OTn5uh9IK64xXV1d7f7AHQAAAO4MdgfemJgY9enTR+fOncv3mslkUm5urk3juLi4qFmzZoqNjVW3bt0kSXl5eYqNjdWwYcPsLavYxgQAAMCdze41vMOHD1ePHj2UmJiovLw8q4etYfea8PBwLVy4UEuWLNH+/fs1dOhQZWZmqn///pKkPn36WH0ALScnR/Hx8YqPj1dOTo5Onz6t+Ph4HTlyxOYxAQAAcHexe4Y3OTlZ4eHh+T4YVhhhYWE6e/asxo0bp6SkJAUFBSkmJsYydkJCgpyc/j+T//HHH2ratKnl+fTp0zV9+nS1b99ecXFxNo0JAACAu4vJbDab7TlgwIABatOmjQYOHFhcNZW49PR0eXp6Ki0tTR4eHrfknCbTLTkNgBJm3x3XOEwTuMkBdwNz1K27ydmT1+ye4Z07d6569OihH3/8UY0bN1bp0qWtXh8xYoS9QwIAAADFxu7A+9lnn2ndunVyc3NTXFycTH+ZmjSZTAReAAAA3FbsDrxvvPGGJkyYoDFjxlitrwUAAABuR3Yn1pycHIWFhRF2AQAAcEewO7X27dtXy5cvL45aAAAAAIeze0lDbm6upk2bprVr16pJkyb5PrQ2c+ZMhxUHAAAAFJXdgXfv3r2WvXD37dtn9ZqJvbUAAABwm7E78G7YsKE46gAAAACKBZ88AwAAgKHZNMPbvXt3RUdHy8PDQ927d79h32+++cYhhQEAAACOYFPg9fT0tKzP9fT0LNaCAAAAAEeyKfAuXrxYEydOVEREhBYvXlzcNQEAAAAOY/Ma3gkTJigjI6M4awEAAAAczubAazabi7MOAAAAoFjYtUsD++wCAADgTmPXPrx169a9aehNSUkpUkEAAACAI9kVeCdMmMAuDQAAALij2BV4e/XqpWrVqhVXLQAAAIDD2byGl/W7AAAAuBOxSwMAAAAMzeYlDXl5ecVZBwAAAFAs7NqWDAAAALjTEHgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGBqBFwAAAIZG4AUAAIChEXgBAABgaAReAAAAGNptEXjnzZsnPz8/ubm5KTg4WNu2bbth/y+//FL169eXm5ubGjdurDVr1li93q9fP5lMJqtHly5divMSAAAAcJsq8cC7fPlyhYeHKyoqSjt37lRgYKBCQ0N15syZAvv/9NNP6t27twYOHKhdu3apW7du6tatm/bt22fVr0uXLkpMTLQ8Pvvss1txOQAAALjNmMxms7kkCwgODlaLFi00d+5cSVJeXp58fX01fPhwjRkzJl//sLAwZWZmavXq1Za2Bx54QEFBQVqwYIGkqzO8qampWrlypU01ZGdnKzs72/I8PT1dvr6+SktLk4eHRxGuznYm0y05DYASVrJ33JJjmsBNDrgbmKNu3U0uPT1dnp6eNuW1Ep3hzcnJ0Y4dOxQSEmJpc3JyUkhIiLZs2VLgMVu2bLHqL0mhoaH5+sfFxalatWqqV6+ehg4dqvPnz1+3jilTpsjT09Py8PX1LcJVAQAA4HZSooH33Llzys3NlZeXl1W7l5eXkpKSCjwmKSnppv27dOmipUuXKjY2Vm+//bY2btyorl27Kjc3t8AxIyMjlZaWZnmcOnWqiFcGAACA20Wpki6gOPTq1cvy58aNG6tJkyaqVauW4uLi1LFjx3z9XV1d5erqeitLBAAAwC1SojO8VapUkbOzs5KTk63ak5OT5e3tXeAx3t7edvWXpICAAFWpUkVHjhwpetEAAAC4o5Ro4HVxcVGzZs0UGxtracvLy1NsbKxatWpV4DGtWrWy6i9JP/zww3X7S9Lvv/+u8+fPq3r16o4pHAAAAHeMEt+WLDw8XAsXLtSSJUu0f/9+DR06VJmZmerfv78kqU+fPoqMjLT0HzlypGJiYjRjxgwdOHBA48eP1/bt2zVs2DBJUkZGhl599VX9/PPPOnHihGJjY/Xkk0+qdu3aCg0NLZFrBAAAQMkp8TW8YWFhOnv2rMaNG6ekpCQFBQUpJibG8sG0hIQEOTn9fy5v3bq1Pv30U7355pt6/fXXVadOHa1cuVKNGjWSJDk7O2vPnj1asmSJUlNT5ePjo86dO2vSpEms0wUAALgLlfg+vLcje/Z1cxT24QXuDnfrHZd9eIG7A/vwAgAAACWAwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0G6LwDtv3jz5+fnJzc1NwcHB2rZt2w37f/nll6pfv77c3NzUuHFjrVmzxup1s9mscePGqXr16nJ3d1dISIgOHz5cnJcAAACA21SJB97ly5crPDxcUVFR2rlzpwIDAxUaGqozZ84U2P+nn35S7969NXDgQO3atUvdunVTt27dtG/fPkufadOmac6cOVqwYIG2bt2qsmXLKjQ0VJcuXbpVlwUAAIDbhMlsNptLsoDg4GC1aNFCc+fOlSTl5eXJ19dXw4cP15gxY/L1DwsLU2ZmplavXm1pe+CBBxQUFKQFCxbIbDbLx8dHo0aNUkREhCQpLS1NXl5eio6OVq9evW5aU3p6ujw9PZWWliYPDw8HXemNmUy35DQASljJ3nFLjmkCNzngbmCOunU3OXvyWqlbVFOBcnJytGPHDkVGRlranJycFBISoi1bthR4zJYtWxQeHm7VFhoaqpUrV0qSjh8/rqSkJIWEhFhe9/T0VHBwsLZs2VJg4M3OzlZ2drbleVpamqSrbyQAONJde1vhH9iAu8KtzE7XzmXL3G2JBt5z584pNzdXXl5eVu1eXl46cOBAgcckJSUV2D8pKcny+rW26/X5uylTpmjChAn52n19fW27EACwkadnSVcAAMXHc+qtv8lduHBBnje5uZZo4L1dREZGWs0a5+XlKSUlRZUrV5aJtQYoBunp6fL19dWpU6du2bIZALiVuM+huJnNZl24cEE+Pj437VuigbdKlSpydnZWcnKyVXtycrK8vb0LPMbb2/uG/a/9Nzk5WdWrV7fqExQUVOCYrq6ucnV1tWqrUKGCPZcCFIqHhwf/IwBgaNznUJxuNrN7TYnu0uDi4qJmzZopNjbW0paXl6fY2Fi1atWqwGNatWpl1V+SfvjhB0t/f39/eXt7W/VJT0/X1q1brzsmAAAAjKvElzSEh4erb9++at68uVq2bKnZs2crMzNT/fv3lyT16dNH99xzj6ZMmSJJGjlypNq3b68ZM2bo0Ucf1eeff67t27frgw8+kCSZTCa9/PLLmjx5surUqSN/f3+NHTtWPj4+6tatW0ldJgAAAEpIiQfesLAwnT17VuPGjVNSUpKCgoIUExNj+dBZQkKCnJz+fyK6devW+vTTT/Xmm2/q9ddfV506dbRy5Uo1atTI0mf06NHKzMzUkCFDlJqaqrZt2yomJkZubm63/PqAgri6uioqKirfUhoAMAruc7idlPg+vAAAAEBxKvFvWgMAAACKE4EXAAAAhkbgBQAAgKEReIFiYDabNWTIEFWqVEkmk0nx8fE3PcZkMlm+IvvEiRM2HwcAf+fn56fZs2fb3P92vef069fvttlhafz48dfdzx+3PwIvUAxiYmIUHR2t1atXKzEx0WoXEQAobr/88ouGDBni0DGjo6Md8qVM9oTxd955R9HR0UU+J1Di25IBRnT06FFVr15drVu3LulSANyFqlatWtIlFElubq5MJpPN36IF3AwzvICD9evXT8OHD1dCQoJMJpP8/PwKnNEICgrS+PHjS6RGALeX1atXq0KFCsrNzZUkxcfHy2QyacyYMZY+gwYN0nPPPSdJ2rRpk9q1ayd3d3f5+vpqxIgRyszMtPT9+z3nwIEDatu2rdzc3NSwYUOtX7/eahnVNceOHdNDDz2kMmXKKDAwUFu2bJEkxcXFqX///kpLS5PJZJLJZLru/ctsNmv8+PG699575erqKh8fH40YMUKS1KFDB508eVKvvPKKZRzp/2eP//Of/6hhw4ZydXVVQkJCviUNHTp00LBhwzRs2DB5enqqSpUqGjt2rP66w6qfn58mTZqk3r17q2zZsrrnnns0b948qxpTU1M1aNAgVa1aVR4eHnr44Ye1e/duqz5Tp06Vl5eXypcvr4EDB+rSpUvX+/HhDkDgBRzsnXfe0cSJE1WjRg0lJibql19+KemSANzm2rVrpwsXLmjXrl2SpI0bN6pKlSqKi4uz9Nm4caM6dOigo0ePqkuXLnr66ae1Z88eLV++XJs2bdKwYcMKHDs3N1fdunVTmTJltHXrVn3wwQd64403Cuz7xhtvKCIiQvHx8apbt6569+6tK1euqHXr1po9e7Y8PDyUmJioxMRERUREFDjG119/rVmzZun999/X4cOHtXLlSjVu3FiS9M0336hGjRqaOHGiZZxrsrKy9Pbbb+vDDz/Ur7/+qmrVqhU4/pIlS1SqVClt27ZN77zzjmbOnKkPP/zQqs+///1vBQYGateuXRozZoxGjhypH374wfJ6jx49dObMGX3//ffasWOH7r//fnXs2FEpKSmSpC+++ELjx4/Xv/71L23fvl3Vq1fXe++9V2A9uDOwpAFwME9PT5UvX17Ozs7y9vYu6XIA3AE8PT0VFBSkuLg4NW/eXHFxcXrllVc0YcIEZWRkKC0tTUeOHFH79u01ZcoUPfvss3r55ZclSXXq1NGcOXPUvn17zZ8/P9+3iv7www86evSo4uLiLPekt956S506dcpXR0REhB599FFJ0oQJE3TffffpyJEjql+/vjw9PWUymW56X0tISJC3t7dCQkJUunRp3XvvvWrZsqUkqVKlSnJ2dlb58uXzjXP58mW99957CgwMvOH4vr6+mjVrlkwmk+rVq6e9e/dq1qxZGjx4sKVPmzZtLLPjdevW1ebNmzVr1ix16tRJmzZt0rZt23TmzBnLt8BNnz5dK1eu1FdffaUhQ4Zo9uzZGjhwoAYOHChJmjx5stavX88s7x2MGV4AAG4D7du3V1xcnMxms3788Ud1795dDRo00KZNm7Rx40b5+PioTp062r17t6Kjo1WuXDnLIzQ0VHl5eTp+/Hi+cQ8ePChfX1+rgHktgP5dkyZNLH+uXr26JOnMmTPXrflf//qXVR0JCQnq0aOHLl68qICAAA0ePFgrVqzQlStXbnr9Li4uVue/ngceeMCyFEKSWrVqpcOHD1uWg1xr+6tWrVpp//79kqTdu3crIyNDlStXtqr9+PHjOnr0qCRp//79Cg4OzjcG7lzM8AK3gJOTk/7+Ld6XL18uoWoA3I46dOigRYsWaffu3SpdurTq16+vDh06KC4uTn/++afat28vScrIyNDzzz9vWRf7V/fee2+RaihdurTlz9dCZV5e3nX7v/DCC+rZs6fluY+Pj0qVKqWDBw9q/fr1+uGHH/Tiiy/q3//+tzZu3Gg1/t+5u7tbBdnikpGRoerVq1stF7nGEbtQ4PZE4AVugapVq1qtVUtPTy9wJgbA3evaOt5Zs2ZZwm2HDh00depU/fnnnxo1apQk6f7779dvv/2m2rVr2zRuvXr1dOrUKSUnJ8vLy0uSCvXZAhcXF6tZVOnqEoVKlSrl6+vu7q7HH39cjz/+uF566SXVr19fe/fu1f3331/gOPbYunWr1fOff/5ZderUkbOzs1Xb3/s0aNBA0tX3LykpSaVKlZKfn1+B52jQoIG2bt2qPn36XHdM3FlY0gDcAg8//LA+/vhj/fjjj9q7d6/69u1rdXMGgIoVK6pJkyZatmyZOnToIEl68MEHtXPnTh06dMgSgl977TX99NNPGjZsmOLj43X48GGtWrXquh9a69Spk2rVqqW+fftqz5492rx5s958801JsmtG1c/PTxkZGYqNjdW5c+eUlZVVYL/o6Gh99NFH2rdvn44dO6ZPPvlE7u7uqlmzpmWc//3vfzp9+rTOnTtn8/mvSUhIUHh4uA4ePKjPPvtM7777rkaOHGnVZ/PmzZo2bZoOHTqkefPm6csvv7T0CQkJUatWrdStWzetW7dOJ06c0E8//aQ33nhD27dvlySNHDlSixYt0uLFi3Xo0CFFRUXp119/tbtW3D4IvMAtEBkZqfbt2+uxxx7To48+qm7duqlWrVolXRaA20z79u2Vm5trCbyVKlVSw4YN5e3trXr16km6us5248aNOnTokNq1a6emTZtq3Lhx8vHxKXBMZ2dnrVy5UhkZGWrRooUGDRpk2aXh7x9wu5HWrVvrhRdeUFhYmKpWrapp06YV2K9ChQpauHCh2rRpoyZNmmj9+vX69ttvVblyZUnSxIkTdeLECdWqVatQ+wX36dNHFy9eVMuWLfXSSy9p5MiR+b5kY9SoUdq+fbuaNm2qyZMna+bMmQoNDZV0NeSvWbNGDz74oPr376+6deuqV69eOnnypGUGPCwsTGPHjtXo0aPVrFkznTx5UkOHDrW7Vtw+TOa/LywEAACGtnnzZrVt21ZHjhy5o/7y3aFDBwUFBd3wm9r8/Pz08ssvW3axACTW8AIAYHgrVqxQuXLlVKdOHR05ckQjR45UmzZt7qiwCxQFgRcAAIO7cOGCXnvtNSUkJKhKlSoKCQnRjBkzSros4JZhSQMAAAAMjQ+tAQAAwNAIvAAAADA0Ai8AAAAMjcALAAAAQyPwAgAAwNAIvABwE9HR0apQoYLl+fjx4xUUFHTDY/r166du3boVa122+OCDD+Tr6ysnJyfNnj3bptpL2okTJ2QymRQfH1/SpQAwCAIvAMPq16+fTCaT5VG5cmV16dJFe/bssWucsLAwHTp0qJiqLD7p6ekaNmyYXnvtNZ0+fVpDhgxRRESEYmNjS7o0i4L+YuDr66vExEQ1atSoZIoCYDgEXgCG1qVLFyUmJioxMVGxsbEqVaqUHnvsMbvGcHd3V7Vq1YqpwuKTkJCgy5cv69FHH1X16tVVpkwZlStXTpUrVy72c1++fLnQxzo7O8vb21ulSvHdSAAcg8ALwNBcXV3l7e0tb29vBQUFacyYMTp16pTOnj0rSYqLi5PJZFJqaqrlmPj4eJlMJp04cUJS/iUNf5ebm6vw8HBVqFBBlStX1ujRo2XLd/ps3rxZHTp0UJkyZVSxYkWFhobqzz//lCRlZ2drxIgRqlatmtzc3NS2bVv98ssvlmOv1R0bG6vmzZurTJkyat26tQ4ePGipuXHjxpKkgIAAy/X8fUnDlStXNGLECEvtr732mvr27Ws16+rn56fZs2db1R4UFKTx48dbnptMJs2fP19PPPGEypYtq7feeku5ubkaOHCg/P395e7urnr16umdd96xHDN+/HgtWbJEq1atsszCx8XFFbikYePGjWrZsqVcXV1VvXp1jRkzRleuXLG83qFDB40YMUKjR49WpUqV5O3tbVWf2WzW+PHjde+998rV1VU+Pj4aMWLETX9GAIyBwAvgrpGRkaFPPvlEtWvXdugs54wZMxQdHa1FixZp06ZNSklJ0YoVK254THx8vDp27KiGDRtqy5Yt2rRpkx5//HHl5uZKkkaPHq2vv/5aS5Ys0c6dO1W7dm2FhoYqJSXFapw33nhDM2bM0Pbt21WqVCkNGDBA0tVlGOvXr5ckbdu2TYmJifL19c1Xx9tvv61ly5Zp8eLF2rx5s9LT07Vy5cpCvQ/jx4/XU089pb1792rAgAHKy8tTjRo19OWXX+q3337TuHHj9Prrr+uLL76QJEVERKhnz55Ws/CtW7fON+7p06f1yCOPqEWLFtq9e7fmz5+vjz76SJMnT7bqt2TJEpUtW1Zbt27VtGnTNHHiRP3www+SpK+//lqzZs3S+++/r8OHD2vlypWWvxAAMD7+vQiAoa1evVrlypWTJGVmZqp69epavXq1nJwc9/f92bNnKzIyUt27d5ckLViwQGvXrr3hMdOmTVPz5s313nvvWdruu+8+S53z589XdHS0unbtKklauHChfvjhB3300Ud69dVXLce89dZbat++vSRpzJgxevTRR3Xp0iW5u7tbQn3VqlXl7e1dYB3vvvuuIiMj9dRTT0mS5s6dqzVr1hTmbdA//vEP9e/f36ptwoQJlj/7+/try5Yt+uKLL9SzZ0+VK1dO7u7uys7Ovm59kvTee+/J19dXc+fOlclkUv369fXHH3/otdde07hx4yw/yyZNmigqKkqSVKdOHc2dO1exsbHq1KmTEhIS5O3trZCQEJUuXVr33nuvWrZsWajrBHDnYYYXgKE99NBDio+PV3x8vLZt26bQ0FB17dpVJ0+edMj4aWlpSkxMVHBwsKWtVKlSat68+Q2PuzbDW5CjR4/q8uXLatOmjaWtdOnSatmypfbv32/Vt0mTJpY/V69eXZJ05swZm2tPTk62Cn7Ozs5q1qyZTcf/XUHXPG/ePDVr1kxVq1ZVuXLl9MEHHyghIcGucffv369WrVrJZDJZ2tq0aaOMjAz9/vvvlra/vhfS1ffj2nvRo0cPXbx4UQEBARo8eLBWrFhhtSQCgLEReAEYWtmyZVW7dm3Vrl1bLVq00IcffqjMzEwtXLhQkiyzg39dc1uUD1zZyt3d3SHjlC5d2vLna4EwLy/PIWNf4+TklG9NckHvUdmyZa2ef/7554qIiNDAgQO1bt06xcfHq3///srJyXFofdf89b2Qrr4f194LX19fHTx4UO+9957c3d314osv6sEHH7wlP2sAJY/AC+CuYjKZ5OTkpIsXL0q6+s/9kpSYmGjpY8/+r56enqpevbq2bt1qabty5Yp27Nhxw+OaNGly3e3BatWqJRcXF23evNnSdvnyZf3yyy9q2LChzbXdjKenp7y8vKw+DJebm6udO3da9atatarV+5Oenq7jx4/fdPzNmzerdevWevHFF9W0aVPVrl1bR48eterj4uJiWbd8PQ0aNNCWLVusQvfmzZtVvnx51ahR46Z1XOPu7q7HH39cc+bMUVxcnLZs2aK9e/fafDyAOxeBF4ChZWdnKykpSUlJSdq/f7+GDx+ujIwMPf7445Kk2rVry9fXV+PHj9fhw4f13XffacaMGXadY+TIkZo6dapWrlypAwcO6MUXX7Ta9aEgkZGR+uWXX/Tiiy9qz549OnDggObPn69z586pbNmyGjp0qF599VXFxMTot99+0+DBg5WVlaWBAwcW9q0o0PDhwzVlyhStWrVKBw8e1MiRI/Xnn39aLR94+OGH9fHHH+vHH3/U3r171bdvXzk7O9907Dp16mj79u1au3atDh06pLFjx1qFa+nqDhB79uzRwYMHde7cuQJnXF988UWdOnVKw4cP14EDB7Rq1SpFRUUpPDzc5rXY0dHR+uijj7Rv3z4dO3ZMn3zyidzd3VWzZk2bjgdwZ+NDawAMLSYmxrK2tXz58qpfv76+/PJLdejQQdLVfwb/7LPPNHToUDVp0kQtWrTQ5MmT1aNHD5vPMWrUKCUmJqpv375ycnLSgAED9NRTTyktLe26x9StW1fr1q3T66+/rpYtW8rd3V3BwcHq3bu3JGnq1KnKy8vTP//5T124cEHNmzfX2rVrVbFixcK/GQV47bXXlJSUpD59+sjZ2VlDhgxRaGioVaCNjIzU8ePH9dhjj8nT01OTJk2yaYb3+eef165duxQWFiaTyaTevXvrxRdf1Pfff2/pM3jwYMXFxal58+bKyMjQhg0b5OfnZzXOPffcozVr1ujVV19VYGCgKlWqpIEDB+rNN9+0+TorVKigqVOnKjw8XLm5uWrcuLG+/fbbW7InMYCSZzLbslkkAOCukJeXpwYNGqhnz56aNGlSSZcDAA7BDC8A3MVOnjypdevWqX379srOztbcuXN1/Phx/eMf/yjp0gDAYVjDCwB3MScnJ0VHR6tFixZq06aN9u7dq/Xr16tBgwYlXRoAOAxLGgAAAGBozPACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABDI/ACAADA0Ai8AAAAMDQCLwAAAAyNwAsAAABD+z+OxZScw7uECQAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -631,13 +595,13 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "ac44f6df-0983-45ec-a33f-4a30a2df1957", "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAq4AAAIjCAYAAADC0ZkAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABA+ElEQVR4nO3deViU1f//8dcAgsgyCCq4gLupueaKe4ohleWW1qc+KplWrml+UioTS8NsUTM1LRNtM820rMzUwlzINczKPRVTwExlcQGV+f3Rz/k6gooGDMeej+ua62LOfe5zv2fwmuvl4cy5LTabzSYAAACgiHNxdgEAAABAXhBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBGMNisSg6OtrZZVxTpUqV1LdvX2eX8a/Tt29fVapUydllAChgBFcAeRYbGyuLxXLVx48//ujsEgvMjh071KNHD1WsWFHFixdX+fLl1bFjR02bNs3ZpUmSzp07p8mTJ6tZs2ayWq0qXry4atSoocGDB2vPnj3OLg8A8oXFZrPZnF0EADPExsYqMjJSL774oipXrpzjeKdOnVSqVKkCu/65c+fk5uYmNze3ArtGbjZs2KA777xTISEh6tOnj4KCgnT48GH9+OOP2r9/v/bt22fvm5mZKRcXFxUrVqzQ6jt+/Lg6deqkrVu36t5771VYWJi8vb21e/duLViwQMnJycrKyiq0epzh/Pnzys7OloeHh7NLAVCACvfTH8AtISIiQo0bNy706xYvXrzQrylJEyZMkNVq1ebNm+Xn5+dw7NixYw7PnRGc+vbtq59++kmffvqpunfv7nDspZde0nPPPVfoNRWW06dPy8vLq1D/owDAeVgqACDfHTx4UBaLRa+99ppmz56tqlWrysPDQ02aNNHmzZtz9F+0aJFq166t4sWLq06dOlqyZEmuaxavXOMaHR0ti8Wiffv2qW/fvvLz85PValVkZKTOnDmT4zoffPCBGjVqJE9PT/n7++vBBx/U4cOHr/t69u/fr9tvvz1HaJWkMmXKODy/co3rtZZWHDx40N5v165d6tGjh/z9/VW8eHE1btxYX3zxxXVr27hxo7766iv169cvR2iV/g7Sr732mkPbd999p9atW8vLy0t+fn66//77tXPnToc+l97bPXv26JFHHpHValXp0qU1ZswY2Ww2HT58WPfff798fX0VFBSk119/3eH8uLg4WSwWffLJJ3r22WcVFBQkLy8v3XfffTne87Vr1+qBBx5QSEiIPDw8FBwcrOHDh+vs2bMO/fr27Stvb2/t379fd999t3x8fPTwww/bj13572XBggVq1KiRfHx85Ovrq7p162rq1KkOfX7//Xc98MAD8vf3V4kSJdS8eXN99dVXub6WhQsXasKECapQoYKKFy+uDh06OMy2Ayh4zLgCuGGpqak6fvy4Q5vFYlFAQIBD20cffaT09HQ9/vjjslgsmjRpkrp166bff//dPkP21VdfqVevXqpbt65iYmJ08uRJ9evXT+XLl89zPT179lTlypUVExOjbdu26d1331WZMmX0yiuv2PtMmDBBY8aMUc+ePfXYY4/pzz//1LRp09SmTRv99NNPuYbSSypWrKj4+Hj98ssvqlOnTp7rkqT3338/R9vzzz+vY8eOydvbW5L066+/qmXLlipfvrxGjx4tLy8vLVy4UF26dNHixYvVtWvXq45/Kdz+97//zVM9q1atUkREhKpUqaLo6GidPXtW06ZNU8uWLbVt27Yc4a9Xr16qVauWJk6cqK+++krjx4+Xv7+/Zs2apfbt2+uVV17Rhx9+qJEjR6pJkyZq06aNw/kTJkyQxWLRqFGjdOzYMU2ZMkVhYWFKSEiQp6enpL//43LmzBk9+eSTCggI0KZNmzRt2jT98ccfWrRokcN4Fy5cUHh4uFq1aqXXXntNJUqUyPV1rly5Ug899JA6dOhg/3ewc+dOrV+/XsOGDZMkpaSkqEWLFjpz5oyGDh2qgIAAzZs3T/fdd58+/fTTHO/7xIkT5eLiopEjRyo1NVWTJk3Sww8/rI0bN+bpvQeQD2wAkEdz5861Scr14eHhYe934MABmyRbQECA7cSJE/b2zz//3CbJtmzZMntb3bp1bRUqVLClp6fb2+Li4mySbBUrVnS4viTb2LFj7c/Hjh1rk2R79NFHHfp17drVFhAQYH9+8OBBm6urq23ChAkO/Xbs2GFzc3PL0X6lb7/91ubq6mpzdXW1hYaG2p555hnbihUrbFlZWTn6VqxY0danT5+rjjVp0iSbJNv8+fPtbR06dLDVrVvXdu7cOXtbdna2rUWLFrbq1atfs7auXbvaJNlOnjx5zX6XNGjQwFamTBnbX3/9ZW/bvn27zcXFxda7d29726X3dsCAAfa2Cxcu2CpUqGCzWCy2iRMn2ttPnjxp8/T0dHjd33//vU2SrXz58ra0tDR7+8KFC22SbFOnTrW3nTlzJkedMTExNovFYjt06JC9rU+fPjZJttGjR+fo36dPH4d/L8OGDbP5+vraLly4cNX34qmnnrJJsq1du9belp6ebqtcubKtUqVKtosXLzq8llq1atkyMzPtfadOnWqTZNuxY8dVrwEgf7FUAMANmz59ulauXOnwWL58eY5+vXr1UsmSJe3PW7duLenvP89K0tGjR7Vjxw717t3bPvsoSW3btlXdunXzXM8TTzzh8Lx169b666+/lJaWJkn67LPPlJ2drZ49e+r48eP2R1BQkKpXr67vv//+muN37NhR8fHxuu+++7R9+3ZNmjRJ4eHhKl++fJ7+nH/J999/r6ioKA0ZMsQ+Q3rixAl999136tmzp9LT0+21/fXXXwoPD9fevXt15MiRq4556TX6+Phc9/pJSUlKSEhQ37595e/vb2+vV6+eOnbsqK+//jrHOY899pj9Z1dXVzVu3Fg2m039+vWzt/v5+em2226z/14v17t3b4faevToobJlyzpc69LMq/T3mtXjx4+rRYsWstls+umnn3KM+eSTT173tfr5+en06dNauXLlVft8/fXXatq0qVq1amVv8/b21oABA3Tw4EH99ttvDv0jIyPl7u5uf37lv2cABY+lAgBuWNOmTfP05ayQkBCH55dC7MmTJyVJhw4dkiRVq1Ytx7nVqlXTtm3b8lTPta7j6+urvXv3ymazqXr16rmen5cv9jRp0kSfffaZsrKytH37di1ZskSTJ09Wjx49lJCQoNq1a1/z/D/++EO9evVSy5Yt9cYbb9jb9+3bJ5vNpjFjxmjMmDG5nnvs2LGrLp3w9fWVJKWnp19zuYP0f+/3bbfdluNYrVq1tGLFCvuXnS658r29tNXWlbtHWK1W/fXXXznGvfI9t1gsqlatmsP63sTERL3wwgv64osv7P82LklNTXV47ubmpgoVKlzjVf5t4MCBWrhwoSIiIlS+fHnddddd6tmzpzp16mTvc+jQITVr1izHubVq1bIfv3xpyPX+PQMoeARXAAXG1dU113ZbPu/Cd73rZGdny2KxaPny5bn2vXy293rc3d3VpEkTNWnSRDVq1FBkZKQWLVqksWPHXvWcrKws9ejRQx4eHlq4cKHDdl7Z2dmSpJEjRyo8PDzX83ML9pfUrFlT0t/7zF6aAcxPub1f+fl7vXjxojp27KgTJ05o1KhRqlmzpry8vHTkyBH17dvX/v5c4uHhIReX6/+xsEyZMkpISNCKFSu0fPlyLV++XHPnzlXv3r01b968G65TKrx/zwCujuAKwGkqVqwoSbl+Mzs/v61dtWpV2Ww2Va5cWTVq1Mi3cS/NOiclJV2z39ChQ5WQkKAffvhBgYGBDseqVKki6e9Z37CwsBuuoXPnzoqJidEHH3xw3eB66f3evXt3jmO7du1SqVKlHGZb88PevXsdnttsNu3bt0/16tWT9Hfg3rNnj+bNm6fevXvb+13rT/x55e7urs6dO6tz587Kzs7WwIEDNWvWLI0ZM0bVqlVTxYoVr/peSP/3fgEoOljjCsBpypUrpzp16mj+/PnKyMiwt69Zs0Y7duzIt+t069ZNrq6uGjduXI7ZMZvNluufuC/3/fff5zqrdmmdZm5/er9k7ty5mjVrlqZPn66mTZvmOF6mTBm1a9dOs2bNyjUA//nnn9esLTQ0VJ06ddK7776rpUuX5jielZWlkSNHSpLKli2rBg0aaN68eTp16pS9zy+//KJvv/1Wd9999zWvdTPmz5+v9PR0+/NPP/1USUlJioiIkPR/s5iXv782my3HtlU36srfqYuLiz0sZ2ZmSpLuvvtubdq0SfHx8fZ+p0+f1uzZs1WpUqXrLv8AUPiYcQVww5YvX26flbpcixYt7DOIefXyyy/r/vvvV8uWLRUZGamTJ0/qrbfeUp06dRzC7D9RtWpVjR8/XlFRUTp48KC6dOkiHx8fHThwQEuWLNGAAQPs4S43Q4YM0ZkzZ9S1a1fVrFlTWVlZ2rBhgz755BNVqlRJkZGRuZ53/PhxDRw4ULVr15aHh4c++OADh+Ndu3aVl5eXpk+frlatWqlu3brq37+/qlSpopSUFMXHx+uPP/7Q9u3br/n65s+fr7vuukvdunVT586d1aFDB3l5eWnv3r1asGCBkpKS7Hu5vvrqq4qIiFBoaKj69etn3w7LarU67JGbX/z9/dWqVStFRkYqJSVFU6ZMUbVq1dS/f39Jfy91qFq1qkaOHKkjR47I19dXixcv/sfrRh977DGdOHFC7du3V4UKFXTo0CFNmzZNDRo0sK9hHT16tD7++GNFRERo6NCh8vf317x583TgwAEtXrw4T0sSABQugiuAG/bCCy/k2j537twbDq6dO3fWxx9/rOjoaI0ePVrVq1dXbGys5s2bp19//TU/ypX0d0ipUaOGJk+erHHjxkmSgoODddddd+m+++675rmvvfaaFi1apK+//lqzZ89WVlaWQkJCNHDgQD3//PNX/VJURkaGzp07p99++y3XfVYPHDggLy8v1a5dW1u2bNG4ceMUGxurv/76S2XKlFHDhg2v+l5frnTp0tqwYYNmzJihTz75RM8995yysrJUsWJF3XffffZ9SyUpLCxM33zzjcaOHasXXnhBxYoVU9u2bfXKK6/kehvff+rZZ5/Vzz//rJiYGKWnp6tDhw6aMWOGff/VYsWKadmyZRo6dKhiYmJUvHhxde3aVYMHD1b9+vVv+rqPPPKIZs+erRkzZujUqVMKCgpSr169FB0dbQ+kgYGB2rBhg0aNGqVp06bp3LlzqlevnpYtW6Z77rknX14/gPxlsbGqHEAR1KBBA5UuXTpf1jqi8MXFxenOO+/UokWL1KNHD2eXA+AWwd9BADjV+fPndeHCBYe2uLg4bd++Xe3atXNOUQCAIomlAgCc6siRIwoLC9MjjzyicuXKadeuXXr77bcVFBSU48YCAIB/N4IrAKcqWbKkGjVqpHfffVd//vmnvLy8dM8992jixIkKCAhwdnkAgCKENa4AAAAwAmtcAQAAYASCKwAAAIxwy69xzc7O1tGjR+Xj4yOLxeLscgAAAHAFm82m9PR0lStX7po3/7jlg+vRo0cVHBzs7DIAAABwHYcPH1aFChWuevyWD64+Pj6S/n4jfH19nVwNAAAArpSWlqbg4GB7bruaWz64Xloe4OvrS3AFAAAowq63rJMvZwEAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjODU4BodHS2LxeLwqFmzpv34uXPnNGjQIAUEBMjb21vdu3dXSkqKEysGAACAszh9xvX2229XUlKS/bFu3Tr7seHDh2vZsmVatGiR1qxZo6NHj6pbt25OrBYAAADO4ub0AtzcFBQUlKM9NTVVc+bM0UcffaT27dtLkubOnatatWrpxx9/VPPmzQu7VAAAADiR02dc9+7dq3LlyqlKlSp6+OGHlZiYKEnaunWrzp8/r7CwMHvfmjVrKiQkRPHx8VcdLzMzU2lpaQ4PAAAAmM+pwbVZs2aKjY3VN998o5kzZ+rAgQNq3bq10tPTlZycLHd3d/n5+TmcExgYqOTk5KuOGRMTI6vVan8EBwcX8KsAAABAYXDqUoGIiAj7z/Xq1VOzZs1UsWJFLVy4UJ6enjc1ZlRUlEaMGGF/npaWRngFAAC4BTh9jevl/Pz8VKNGDe3bt08dO3ZUVlaWTp065TDrmpKSkuua2Es8PDzk4eFRCNVencXi1MsDKCQ2m7MrAIB/F6evcb1cRkaG9u/fr7Jly6pRo0YqVqyYVq9ebT++e/duJSYmKjQ01IlVAgAAwBmcOuM6cuRIde7cWRUrVtTRo0c1duxYubq66qGHHpLValW/fv00YsQI+fv7y9fXV0OGDFFoaCg7CgAAAPwLOTW4/vHHH3rooYf0119/qXTp0mrVqpV+/PFHlS5dWpI0efJkubi4qHv37srMzFR4eLhmzJjhzJIBAADgJBab7dZepZWWliar1arU1FT5+voWyjVZ4wr8O9zan54AUHjymteK1BpXAAAA4GoIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGKHIBNeJEyfKYrHoqaeesredO3dOgwYNUkBAgLy9vdW9e3elpKQ4r0gAAAA4TZEIrps3b9asWbNUr149h/bhw4dr2bJlWrRokdasWaOjR4+qW7duTqoSAAAAzuT04JqRkaGHH35Y77zzjkqWLGlvT01N1Zw5c/TGG2+offv2atSokebOnasNGzboxx9/dGLFAAAAcAanB9dBgwbpnnvuUVhYmEP71q1bdf78eYf2mjVrKiQkRPHx8VcdLzMzU2lpaQ4PAAAAmM/NmRdfsGCBtm3bps2bN+c4lpycLHd3d/n5+Tm0BwYGKjk5+apjxsTEaNy4cfldKgAAAJzMaTOuhw8f1rBhw/Thhx+qePHi+TZuVFSUUlNT7Y/Dhw/n29gAAABwHqcF161bt+rYsWO644475ObmJjc3N61Zs0Zvvvmm3NzcFBgYqKysLJ06dcrhvJSUFAUFBV11XA8PD/n6+jo8AAAAYD6nLRXo0KGDduzY4dAWGRmpmjVratSoUQoODlaxYsW0evVqde/eXZK0e/duJSYmKjQ01BklAwAAwImcFlx9fHxUp04dhzYvLy8FBATY2/v166cRI0bI399fvr6+GjJkiEJDQ9W8eXNnlAwAAAAncuqXs65n8uTJcnFxUffu3ZWZmanw8HDNmDHD2WUBAADACSw2m83m7CIKUlpamqxWq1JTUwttvavFUiiXAeBkt/anJwAUnrzmNafv4woAAADkBcEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBKcG15kzZ6pevXry9fWVr6+vQkNDtXz5cvvxc+fOadCgQQoICJC3t7e6d++ulJQUJ1YMAAAAZ3FqcK1QoYImTpyorVu3asuWLWrfvr3uv/9+/frrr5Kk4cOHa9myZVq0aJHWrFmjo0ePqlu3bs4sGQAAAE5isdlsNmcXcTl/f3+9+uqr6tGjh0qXLq2PPvpIPXr0kCTt2rVLtWrVUnx8vJo3b56n8dLS0mS1WpWamipfX9+CLN3OYimUywBwsqL16QkA5sprXisya1wvXryoBQsW6PTp0woNDdXWrVt1/vx5hYWF2fvUrFlTISEhio+Pv+o4mZmZSktLc3gAAADAfE4Prjt27JC3t7c8PDz0xBNPaMmSJapdu7aSk5Pl7u4uPz8/h/6BgYFKTk6+6ngxMTGyWq32R3BwcAG/AgAAABSGfxRcMzMz/3EBt912mxISErRx40Y9+eST6tOnj3777bebHi8qKkqpqan2x+HDh/9xjQAAAHA+txvpvHz5ci1YsEBr167V4cOHlZ2dLS8vLzVs2FB33XWXIiMjVa5cuRsqwN3dXdWqVZMkNWrUSJs3b9bUqVPVq1cvZWVl6dSpUw6zrikpKQoKCrrqeB4eHvLw8LihGgAAAFD05WnGdcmSJapRo4YeffRRubm5adSoUfrss8+0YsUKvfvuu2rbtq1WrVqlKlWq6IknntCff/550wVlZ2crMzNTjRo1UrFixbR69Wr7sd27dysxMVGhoaE3PT4AAADMlKcZ10mTJmny5MmKiIiQi0vOrNuzZ09J0pEjRzRt2jR98MEHGj58+HXHjYqKUkREhEJCQpSenq6PPvpIcXFxWrFihaxWq/r166cRI0bI399fvr6+GjJkiEJDQ/O8owAAAABuHXkKrtf6Fv/lypcvr4kTJ+b54seOHVPv3r2VlJQkq9WqevXqacWKFerYsaMkafLkyXJxcVH37t2VmZmp8PBwzZgxI8/jAwAA4NZR5PZxzW/s4wqgoNzan54AUHgKZB/XvXv3avHixTpw4IAk6auvvlKbNm3UpEkTTZgwQbd4BgYAAIAT5XlXgSVLlqhnz55ycXGRxWLR7Nmz9fjjj6tdu3by9fVVdHS0/YtbAAAAQH7L84zrhAkT9Mwzz+jcuXOaOXOmnnjiCcXExGj58uX68ssvNX36dMXGxhZgqQAAAPg3y/MaVx8fHyUkJKhq1arKzs6Wu7u7EhISVKdOHUnSwYMHVbt2bZ05c6ZAC75RrHEFUFBYHQUA+SPf17iePn1aPj4+f5/k4iJPT0+VKFHCftzT0zNf7qQFAAAA5CbPwdVischy2VTilc8BAACAgpTnL2fZbDbVqFHDHlYzMjLUsGFD+w0J2FEAAAAABSnPwXXu3LkFWQcAAABwTXkOrn369CnIOgAAAIBruqEbEAAAAADOkqcZV39/f+3Zs0elSpVSyZIlr/mlrBMnTuRbcQAAAMAleQqukydPtm+FNWXKlIKsBwAAAMhVnm9AYCpuQACgoNzan54AUHjy/QYEAAAAgDPleVcBV1fXPPW7ePHiTRcDAAAAXM0N3YCgYsWK6tOnjxo2bFiQNQEAAAA55Dm4btq0SXPmzNHUqVNVuXJlPfroo3r44YdVsmTJgqwPAAAAkHQDa1wbN26smTNnKikpSSNGjNCSJUtUoUIFPfjgg1q5cmVB1ggAAADc+JezihcvrkceeUSrV6/WL7/8omPHjqlTp07s3woAAIACleelApf7448/FBsbq9jYWJ05c0b/+9//Cm2rKQAAAPw75Tm4ZmVlacmSJZozZ47Wrl2riIgITZkyRREREXnecQAAAAC4WXkOrmXLlpWPj4/69OmjGTNmqEyZMpKk06dPO/Rj5hUAAAAFIc93znJx+b/lsJZcbg1ls9lksViK3D6u3DkLQEHhzlkAkD/ymtfyPOP6/fff50thAAAAwM3Ic3Bt27ZtQdYBAAAAXFOetsO6ch1rfvcHAAAAridPwbVatWqaOHGikpKSrtrHZrNp5cqVioiI0JtvvplvBQIAAABSHpcKxMXF6dlnn1V0dLTq16+vxo0bq1y5cipevLhOnjyp3377TfHx8XJzc1NUVJQef/zxgq4bAAAA/zJ53lVAkhITE7Vo0SKtXbtWhw4d0tmzZ1WqVCk1bNhQ4eHhRXJPV3YVAFBQ2FUAAPJHXvPaDQVXExFcARSUW/vTEwAKT17zWp7WuAIAAADORnAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAINxVc165dq0ceeUShoaE6cuSIJOn999/XunXr8rU4AAAA4JIbDq6LFy9WeHi4PD099dNPPykzM1OSlJqaqpdffjnfCwQAAACkmwiu48eP19tvv6133nlHxYoVs7e3bNlS27Zty9fiAAAAgEtuOLju3r1bbdq0ydFutVp16tSp/KgJAAAAyOGGg2tQUJD27duXo33dunWqUqVKvhQFAAAAXOmGg2v//v01bNgwbdy4URaLRUePHtWHH36okSNH6sknnyyIGgEAAAC53egJo0ePVnZ2tjp06KAzZ86oTZs28vDw0MiRIzVkyJCCqBEAAACQxWaz2W7mxKysLO3bt08ZGRmqXbu2vL2987u2fJGWliar1arU1FT5+voWyjUtlkK5DAAnu7lPTwDAlfKa1254qcD8+fO1c+dOubu7q3bt2mratKm8vb117tw5zZ8//x8VDQAAAFzNDQfXvn37qmnTplq8eLFDe2pqqiIjI/OtMAAAAOByN3XnrHHjxum///2voqOj87kcAAAAIHc3FVwfeeQRfffdd5o1a5Z69Oihs2fP5nddAAAAgIMbDq6W///No+bNm2vjxo3at2+fWrRooYMHD+Z3bQAAAIDdDQfXyzchCAkJ0YYNG1SpUiV17NgxXwsDAAAALnfDwXXs2LEOW1+VKFFCS5Ys0fDhw3O9FSwAAACQH256H1dTsI8rgIJya396AkDhyWtey9Ods7744gtFRESoWLFi+uKLL67az2KxqHPnzjdeLQAAAHAdeZpxdXFxUXJyssqUKSMXl6uvLrBYLLp48WK+FvhPMeMKoKAw4woA+SNfZ1yzs7Nz/RkAAAAoLDe1jysAAABQ2PIcXOPj4/Xll186tM2fP1+VK1dWmTJlNGDAAGVmZuZ7gQAAAIB0A8H1xRdf1K+//mp/vmPHDvXr109hYWEaPXq0li1bppiYmAIpEgAAAMhzcE1ISFCHDh3szxcsWKBmzZrpnXfe0YgRI/Tmm29q4cKFBVIkAAAAkOfgevLkSQUGBtqfr1mzRhEREfbnTZo00eHDh/O3OgAAAOD/y3NwDQwM1IEDByRJWVlZ2rZtm5o3b24/np6ermLFiuV/hQAAAIBuILjefffdGj16tNauXauoqCiVKFFCrVu3th//+eefVbVq1QIpEgAAAMjTPq6S9NJLL6lbt25q27atvL29NW/ePLm7u9uPv/fee7rrrrsKpEgAAAAgT3fOulxqaqq8vb3l6urq0H7ixAl5e3s7hNmigDtnASgo3DkLAPJHvt4563JWqzXXdn9//xsdCgAAAMgz7pwFAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADCCU4NrTEyMmjRpIh8fH5UpU0ZdunTR7t27HfqcO3dOgwYNUkBAgLy9vdW9e3elpKQ4qWIAAAA4i1OD65o1azRo0CD9+OOPWrlypc6fP6+77rpLp0+ftvcZPny4li1bpkWLFmnNmjU6evSounXr5sSqAQAA4AwWm81mc3YRl/z5558qU6aM1qxZozZt2ig1NVWlS5fWRx99pB49ekiSdu3apVq1aik+Pl7Nmze/7phpaWmyWq1KTU2Vr69vQb8ESZLFUiiXAeBkRefTEwDMlte8VqTWuKampkqS/P39JUlbt27V+fPnFRYWZu9Ts2ZNhYSEKD4+PtcxMjMzlZaW5vAAAACA+YpMcM3OztZTTz2lli1bqk6dOpKk5ORkubu7y8/Pz6FvYGCgkpOTcx0nJiZGVqvV/ggODi7o0gEAAFAIikxwHTRokH755RctWLDgH40TFRWl1NRU++Pw4cP5VCEAAACcyc3ZBUjS4MGD9eWXX+qHH35QhQoV7O1BQUHKysrSqVOnHGZdU1JSFBQUlOtYHh4e8vDwKOiSAQAAUMicOuNqs9k0ePBgLVmyRN99950qV67scLxRo0YqVqyYVq9ebW/bvXu3EhMTFRoaWtjlAgAAwImcOuM6aNAgffTRR/r888/l4+NjX7dqtVrl6ekpq9Wqfv36acSIEfL395evr6+GDBmi0NDQPO0oAAAAgFuHU7fDslxl36i5c+eqb9++kv6+AcHTTz+tjz/+WJmZmQoPD9eMGTOuulTgSmyHBaCgsB0WAOSPvOa1IrWPa0EguAIoKLf2pycAFB4j93EFAAAArobgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEZwaXH/44Qd17txZ5cqVk8Vi0dKlSx2O22w2vfDCCypbtqw8PT0VFhamvXv3OqdYAAAAOJVTg+vp06dVv359TZ8+PdfjkyZN0ptvvqm3335bGzdulJeXl8LDw3Xu3LlCrhQAAADO5ubMi0dERCgiIiLXYzabTVOmTNHzzz+v+++/X5I0f/58BQYGaunSpXrwwQcLs1QAAAA4WZFd43rgwAElJycrLCzM3ma1WtWsWTPFx8df9bzMzEylpaU5PAAAAGC+Ihtck5OTJUmBgYEO7YGBgfZjuYmJiZHVarU/goODC7ROAAAAFI4iG1xvVlRUlFJTU+2Pw4cPO7skAAAA5IMiG1yDgoIkSSkpKQ7tKSkp9mO58fDwkK+vr8MDAAAA5iuywbVy5coKCgrS6tWr7W1paWnauHGjQkNDnVgZAAAAnMGpuwpkZGRo37599ucHDhxQQkKC/P39FRISoqeeekrjx49X9erVVblyZY0ZM0blypVTly5dnFc0AAAAnMKpwXXLli2688477c9HjBghSerTp49iY2P1zDPP6PTp0xowYIBOnTqlVq1a6ZtvvlHx4sWdVTIAAACcxGKz2WzOLqIgpaWlyWq1KjU1tdDWu1oshXIZAE52a396AkDhyWteK7JrXAEAAIDLEVwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIzg5uwCAABmsYyzOLsEAIXANtbm7BJyYMYVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACEYE1+nTp6tSpUoqXry4mjVrpk2bNjm7JAAAABSyIh9cP/nkE40YMUJjx47Vtm3bVL9+fYWHh+vYsWPOLg0AAACFqMgH1zfeeEP9+/dXZGSkateurbffflslSpTQe++95+zSAAAAUIjcnF3AtWRlZWnr1q2Kioqyt7m4uCgsLEzx8fG5npOZmanMzEz789TUVElSWlpawRYL4F/nX/uxcs7ZBQAoDIWZnS5dy2azXbNfkQ6ux48f18WLFxUYGOjQHhgYqF27duV6TkxMjMaNG5ejPTg4uEBqBPDvZbU6uwIAKDjWiYX/IZeeni7rNT5ci3RwvRlRUVEaMWKE/Xl2drZOnDihgIAAWSwWJ1aGW1VaWpqCg4N1+PBh+fr6OrscAMhXfMahMNhsNqWnp6tcuXLX7Fekg2upUqXk6uqqlJQUh/aUlBQFBQXleo6Hh4c8PDwc2vz8/AqqRMDO19eXD3UAtyw+41DQrjXTekmR/nKWu7u7GjVqpNWrV9vbsrOztXr1aoWGhjqxMgAAABS2Ij3jKkkjRoxQnz591LhxYzVt2lRTpkzR6dOnFRkZ6ezSAAAAUIiKfHDt1auX/vzzT73wwgtKTk5WgwYN9M033+T4whbgLB4eHho7dmyOJSoAcCvgMw5FicV2vX0HAAAAgCKgSK9xBQAAAC4huAIAAMAIBFcAAAAYgeAKXIfNZtOAAQPk7+8vi8WihISE655jsVi0dOlSSdLBgwfzfB4AXK5SpUqaMmVKnvsX1c+bvn37qkuXLs4uQ5IUHR2tBg0aOLsM3CSCK3Ad33zzjWJjY/Xll18qKSlJderUcXZJAP4lNm/erAEDBuTrmLGxsflyY54bCdVTp05VbGzsP74mUOS3wwKcbf/+/SpbtqxatGjh7FIA/MuULl3a2SX8IxcvXpTFYsnTHZGAvGDGFbiGvn37asiQIUpMTJTFYlGlSpVynWVo0KCBoqOjnVIjgKLjyy+/lJ+fny5evChJSkhIkMVi0ejRo+19HnvsMT3yyCOSpHXr1ql169by9PRUcHCwhg4dqtOnT9v7Xvl5s2vXLrVq1UrFixdX7dq1tWrVKoelSZf8/vvvuvPOO1WiRAnVr19f8fHxkqS4uDhFRkYqNTVVFotFFovlqp9dNptN0dHRCgkJkYeHh8qVK6ehQ4dKktq1a6dDhw5p+PDh9nGk/5vN/eKLL1S7dm15eHgoMTExx1KBdu3aafDgwRo8eLCsVqtKlSqlMWPG6PIdOitVqqSXXnpJDz30kLy8vFS+fHlNnz7docZTp07pscceU+nSpeXr66v27dtr+/btDn0mTpyowMBA+fj4qF+/fjp37tzVfn0wAMEVuIapU6fqxRdfVIUKFZSUlKTNmzc7uyQARVjr1q2Vnp6un376SZK0Zs0alSpVSnFxcfY+a9asUbt27bR//3516tRJ3bt3188//6xPPvlE69at0+DBg3Md++LFi+rSpYtKlCihjRs3avbs2Xruuedy7fvcc89p5MiRSkhIUI0aNfTQQw/pwoULatGihaZMmSJfX18lJSUpKSlJI0eOzHWMxYsXa/LkyZo1a5b27t2rpUuXqm7dupKkzz77TBUqVNCLL75oH+eSM2fO6JVXXtG7776rX3/9VWXKlMl1/Hnz5snNzU2bNm3S1KlT9cYbb+jdd9916PPqq6+qfv36+umnnzR69GgNGzZMK1eutB9/4IEHdOzYMS1fvlxbt27VHXfcoQ4dOujEiROSpIULFyo6Olovv/yytmzZorJly2rGjBm51gMzsFQAuAar1SofHx+5uroqKCjI2eUAKOKsVqsaNGiguLg4NW7cWHFxcRo+fLjGjRunjIwMpaamat++fWrbtq1iYmL08MMP66mnnpIkVa9eXW+++abatm2rmTNnqnjx4g5jr1y5Uvv371dcXJz982jChAnq2LFjjjpGjhype+65R5I0btw43X777dq3b59q1qwpq9Uqi8Vy3c+0xMREBQUFKSwsTMWKFVNISIiaNm0qSfL395erq6t8fHxyjHP+/HnNmDFD9evXv+b4wcHBmjx5siwWi2677Tbt2LFDkydPVv/+/e19WrZsaZ+trlGjhtavX6/JkyerY8eOWrdunTZt2qRjx47Z7+r12muvaenSpfr00081YMAATZkyRf369VO/fv0kSePHj9eqVauYdTUYM64AAOSjtm3bKi4uTjabTWvXrlW3bt1Uq1YtrVu3TmvWrFG5cuVUvXp1bd++XbGxsfL29rY/wsPDlZ2drQMHDuQYd/fu3QoODnYIipeC5JXq1atn/7ls2bKSpGPHjl215pdfftmhjsTERD3wwAM6e/asqlSpov79+2vJkiW6cOHCdV+/u7u7w/Wvpnnz5vYlBpIUGhqqvXv32pdZXGq7XGhoqHbu3ClJ2r59uzIyMhQQEOBQ+4EDB7R//35J0s6dO9WsWbMcY8BczLgCN8jFxUVX3in5/PnzTqoGQFHTrl07vffee9q+fbuKFSummjVrql27doqLi9PJkyfVtm1bSVJGRoYef/xx+7rRy4WEhPyjGooVK2b/+VI4zM7Ovmr/J554Qj179rQ/L1eunNzc3LR7926tWrVKK1eu1MCBA/Xqq69qzZo1DuNfydPT0yGQFpSMjAyVLVvWYRnGJfmxawKKJoIrcINKly7tsJ4rLS0t19kRAP9Ol9a5Tp482R5S27Vrp4kTJ+rkyZN6+umnJUl33HGHfvvtN1WrVi1P49522206fPiwUlJSFBgYKEk3te7e3d3dYVZT+vtP//7+/jn6enp6qnPnzurcubMGDRqkmjVraseOHbrjjjtyHedGbNy40eH5jz/+qOrVq8vV1dWh7co+tWrVkvT3+5ecnCw3NzdVqlQp12vUqlVLGzduVO/eva86JszCUgHgBrVv317vv/++1q5dqx07dqhPnz4OH7QA/t1KliypevXq6cMPP1S7du0kSW3atNG2bdu0Z88ee5gdNWqUNmzYoMGDByshIUF79+7V559/ftUvZ3Xs2FFVq1ZVnz599PPPP2v9+vV6/vnnJemGZjgrVaqkjIwMrV69WsePH9eZM2dy7RcbG6s5c+bol19+0e+//64PPvhAnp6eqlixon2cH374QUeOHNHx48fzfP1LEhMTNWLECO3evVsff/yxpk2bpmHDhjn0Wb9+vSZNmqQ9e/Zo+vTpWrRokb1PWFiYQkND1aVLF3377bc6ePCgNmzYoOeee05btmyRJA0bNkzvvfee5s6dqz179mjs2LH69ddfb7hWFB0EV+AGRUVFqW3btrr33nt1zz33qEuXLqpataqzywJQhLRt21YXL160B1d/f3/Vrl1bQUFBuu222yT9vQ51zZo12rNnj1q3bq2GDRvqhRdeULly5XId09XVVUuXLlVGRoaaNGmixx57zL6rwJVf5LqWFi1a6IknnlCvXr1UunRpTZo0Kdd+fn5+euedd9SyZUvVq1dPq1at0rJlyxQQECBJevHFF3Xw4EFVrVr1pvab7d27t86ePaumTZtq0KBBGjZsWI6bLTz99NPasmWLGjZsqPHjx+uNN95QeHi4pL/D+tdff602bdooMjJSNWrU0IMPPqhDhw7ZZ6R79eqlMWPG6JlnnlGjRo106NAhPfnkkzdcK4oOi+3KxXoAAMAI69evV6tWrbRv3z6j/gPdrl07NWjQ4Jp33qpUqZKeeuop+64LgMQaVwAAjLFkyRJ5e3urevXq2rdvn4YNG6aWLVsaFVqBf4LgCgCAIdLT0zVq1CglJiaqVKlSCgsL0+uvv+7ssoBCw1IBAAAAGIEvZwEAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAXwrxEbGys/Pz/78+joaDVo0OCa5/Tt21ddunQp0LryYvbs2QoODpaLi4umTJmSp9qd7eDBg7JYLEpISHB2KQBuEQRXAEVe3759ZbFY7I+AgAB16tRJP//88w2N06tXL+3Zs6eAqiw4aWlpGjx4sEaNGqUjR45owIABGjlypFavXu3s0uxyC/jBwcFKSkpSnTp1nFMUgFsOwRWAETp16qSkpCQlJSVp9erVcnNz07333ntDY3h6eqpMmTIFVGHBSUxM1Pnz53XPPfeobNmyKlGihLy9ve33jC9I58+fv+lzXV1dFRQUJDc37nUDIH8QXAEYwcPDQ0FBQQoKClKDBg00evRoHT58WH/++ackKS4uThaLRadOnbKfk5CQIIvFooMHD0rKuVTgShcvXtSIESPk5+engIAAPfPMM8rLPVrWr1+vdu3aqUSJEipZsqTCw8N18uRJSVJmZqaGDh2qMmXKqHjx4mrVqpU2b95sP/dS3atXr1bjxo1VokQJtWjRQrt377bXXLduXUlSlSpV7K/nyqUCFy5c0NChQ+21jxo1Sn369HGYBa1UqVKOe8M3aNBA0dHR9ucWi0UzZ87UfffdJy8vL02YMEEXL15Uv379VLlyZXl6euq2227T1KlT7edER0dr3rx5+vzzz+2z4nFxcbkuFVizZo2aNm0qDw8PlS1bVqNHj9aFCxfsx9u1a6ehQ4fqmWeekb+/v4KCghzqs9lsio6OVkhIiDw8PFSuXDkNHTr0ur8jALcGgisA42RkZOiDDz5QtWrV8nXW8fXXX1dsbKzee+89rVu3TidOnNCSJUuueU5CQoI6dOig2rVrKz4+XuvWrVPnzp118eJFSdIzzzyjxYsXa968edq2bZuqVaum8PBwnThxwmGc5557Tq+//rq2bNkiNzc3Pfroo5L+Xt6watUqSdKmTZuUlJSk4ODgHHW88sor+vDDDzV37lytX79eaWlpWrp06U29D9HR0eratat27NihRx99VNnZ2apQoYIWLVqk3377TS+88IKeffZZLVy4UJI0cuRI9ezZ02FWvEWLFjnGPXLkiO6++241adJE27dv18yZMzVnzhyNHz/eod+8efPk5eWljRs3atKkSXrxxRe1cuVKSdLixYs1efJkzZo1S3v37tXSpUvtwR7ArY+/3wAwwpdffilvb29J0unTp1W2bFl9+eWXcnHJv/9/T5kyRVFRUerWrZsk6e2339aKFSuuec6kSZPUuHFjzZgxw952++232+ucOXOmYmNjFRERIUl65513tHLlSs2ZM0f/+9//7OdMmDBBbdu2lSSNHj1a99xzj86dOydPT097OC9durSCgoJyrWPatGmKiopS165dJUlvvfWWvv7665t5G/Sf//xHkZGRDm3jxo2z/1y5cmXFx8dr4cKF6tmzp7y9veXp6anMzMyr1idJM2bMUHBwsN566y1ZLBbVrFlTR48e1ahRo/TCCy/Yf5f16tXT2LFjJUnVq1fXW2+9pdWrV6tjx45KTExUUFCQwsLCVKxYMYWEhKhp06Y39ToBmIcZVwBGuPPOO5WQkKCEhARt2rRJ4eHhioiI0KFDh/Jl/NTUVCUlJalZs2b2Njc3NzVu3Pia512acc3N/v37df78ebVs2dLeVqxYMTVt2lQ7d+506FuvXj37z2XLlpUkHTt2LM+1p6SkOAQ4V1dXNWrUKE/nXym31zx9+nQ1atRIpUuXlre3t2bPnq3ExMQbGnfnzp0KDQ2VxWKxt7Vs2VIZGRn6448/7G2XvxfS3+/HpffigQce0NmzZ1WlShX1799fS5YscVhqAODWRnAFYAQvLy9Vq1ZN1apVU5MmTfTuu+/q9OnTeueddyTJPlt3+ZrUf/LForzy9PTMl3GKFStm//lSsMvOzs6XsS9xcXHJsWY3t/fIy8vL4fmCBQs0cuRI9evXT99++60SEhIUGRmprKysfK3vksvfC+nv9+PSexEcHKzdu3drxowZ8vT01MCBA9WmTZtC+V0DcD6CKwAjWSwWubi46OzZs5L+/jO6JCUlJdn73Mj+oVarVWXLltXGjRvtbRcuXNDWrVuveV69evWuui1V1apV5e7urvXr19vbzp8/r82bN6t27dp5ru16rFarAgMDHb70dfHiRW3bts2hX+nSpR3en7S0NB04cOC6469fv14tWrTQwIED1bBhQ1WrVk379+936OPu7m5f13s1tWrVUnx8vEN4Xr9+vXx8fFShQoXr1nGJp6enOnfurDfffFNxcXGKj4/Xjh078nw+AHMRXAEYITMzU8nJyUpOTtbOnTs1ZMgQZWRkqHPnzpKkatWqKTg4WNHR0dq7d6+++uorvf766zd0jWHDhmnixIlaunSpdu3apYEDBzrsUpCbqKgobd68WQMHDtTPP/+sXbt2aebMmTp+/Li8vLz05JNP6n//+5+++eYb/fbbb+rfv7/OnDmjfv363exbkashQ4YoJiZGn3/+uXbv3q1hw4bp5MmTDn+Wb9++vd5//32tXbtWO3bsUJ8+feTq6nrdsatXr64tW7ZoxYoV2rNnj8aMGeMQkqW/dyz4+eeftXv3bh0/fjzXGdCBAwfq8OHDGjJkiHbt2qXPP/9cY8eO1YgRI/K8Vjk2NlZz5szRL7/8ot9//10ffPCBPD09VbFixTydD8BsfDkLgBG++eYb+9pPHx8f1axZU4sWLVK7du0k/f3n5Y8//lhPPvmk6tWrpyZNmmj8+PF64IEH8nyNp59+WklJSerTp49cXFz06KOPqmvXrkpNTb3qOTVq1NC3336rZ599Vk2bNpWnp6eaNWumhx56SJI0ceJEZWdn67///a/S09PVuHFjrVixQiVLlrz5NyMXo0aNUnJysnr37i1XV1cNGDBA4eHhDsE0KipKBw4c0L333iur1aqXXnopTzOujz/+uH766Sf16tVLFotFDz30kAYOHKjly5fb+/Tv319xcXFq3LixMjIy9P3336tSpUoO45QvX15ff/21/ve//6l+/fry9/dXv3799Pzzz+f5dfr5+WnixIkaMWKELl68qLp162rZsmWFsqctAOez2PKySSEAwCjZ2dmqVauWevbsqZdeesnZ5QBAvmDGFQBuAYcOHdK3336rtm3bKjMzU2+99ZYOHDig//znP84uDQDyDWtcAeAW4OLiotjYWDVp0kQtW7bUjh07tGrVKtWqVcvZpQFAvmGpAAAAAIzAjCsAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYIT/B7j8KbQIg+FtAAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAq4AAAIjCAYAAADC0ZkAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABA8UlEQVR4nO3deViU1f//8dewiiyDoIILuJuaW7ninmJIZbml9alEM61c0/ykVCqWhtmiZmpaJtpmmh8tKzO1MBdyDbNyzy0FzFQWF1CZ3x/9nK8jqGjAcOz5uK65LubMuc/9nhuvuV4ezpzbYrPZbAIAAACKOBdnFwAAAADkBcEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRWAMSwWi2JiYpxdxjVVrFhRvXr1cnYZ/zq9evVSxYoVnV0GgAJGcAWQZ3FxcbJYLFd9/Pjjj84uscBs375d3bp1U4UKFVSsWDGVK1dO7du319SpU51dmiTp3LlzmjRpkpo0aSKr1apixYqpevXqGjhwoHbv3u3s8gAgX1hsNpvN2UUAMENcXJx69+6tl156SZUqVcrxeocOHVSyZMkCO/+5c+fk5uYmNze3AjtHbtavX6+77rpLoaGhioqKUnBwsA4fPqwff/xR+/bt0969e+19MzMz5eLiInd390Kr7/jx4+rQoYO2bNmi++67T+Hh4fLx8dGuXbs0f/58JScnKysrq9DqcYbz588rOztbnp6ezi4FQAEq3E9/ALeEyMhINWzYsNDPW6xYsUI/pySNHz9eVqtVmzZtkr+/v8Nrx44dc3jujODUq1cv/fTTT/rss8/UtWtXh9defvllvfDCC4VeU2E5ffq0vL29C/U/CgCch6UCAPLdgQMHZLFY9Prrr2vWrFmqUqWKPD091ahRI23atClH/4ULF6pWrVoqVqyYateurcWLF+e6ZvHKNa4xMTGyWCzau3evevXqJX9/f1mtVvXu3VtnzpzJcZ4PP/xQDRo0kJeXlwICAvTQQw/p8OHD130/+/bt0+23354jtEpS6dKlHZ5fucb1WksrDhw4YO+3c+dOdevWTQEBASpWrJgaNmyoL7744rq1bdiwQV999ZX69OmTI7RKfwfp119/3aHtu+++U8uWLeXt7S1/f3898MAD2rFjh0OfS9d29+7devTRR2W1WlWqVCmNGjVKNptNhw8f1gMPPCA/Pz8FBwfrjTfecDg+Pj5eFotFn376qZ5//nkFBwfL29tb999/f45rvmbNGj344IMKDQ2Vp6enQkJCNHToUJ09e9ahX69eveTj46N9+/bpnnvuka+vrx555BH7a1f+e5k/f74aNGggX19f+fn5qU6dOpoyZYpDn99//10PPvigAgICVLx4cTVt2lRfffVVru9lwYIFGj9+vMqXL69ixYqpXbt2DrPtAAoeM64AblhqaqqOHz/u0GaxWBQYGOjQ9vHHHys9PV1PPvmkLBaLJk6cqC5duuj333+3z5B99dVX6tGjh+rUqaPY2FidPHlSffr0Ubly5fJcT/fu3VWpUiXFxsZq69ateu+991S6dGm9+uqr9j7jx4/XqFGj1L17dz3xxBP6888/NXXqVLVq1Uo//fRTrqH0kgoVKighIUG//PKLateunee6JOmDDz7I0fbiiy/q2LFj8vHxkST9+uuvat68ucqVK6eRI0fK29tbCxYsUKdOnbRo0SJ17tz5quNfCrePPfZYnupZuXKlIiMjVblyZcXExOjs2bOaOnWqmjdvrq1bt+YIfz169FDNmjU1YcIEffXVVxo3bpwCAgI0c+ZMtW3bVq+++qo++ugjDR8+XI0aNVKrVq0cjh8/frwsFotGjBihY8eOafLkyQoPD1diYqK8vLwk/f0flzNnzujpp59WYGCgNm7cqKlTp+qPP/7QwoULHca7cOGCIiIi1KJFC73++usqXrx4ru9zxYoVevjhh9WuXTv7v4MdO3Zo3bp1GjJkiCQpJSVFzZo105kzZzR48GAFBgZq7ty5uv/++/XZZ5/luO4TJkyQi4uLhg8frtTUVE2cOFGPPPKINmzYkKdrDyAf2AAgj+bMmWOTlOvD09PT3m///v02SbbAwEDbiRMn7O2ff/65TZJt6dKl9rY6derYypcvb0tPT7e3xcfH2yTZKlSo4HB+SbYxY8bYn48ZM8Ymyfb444879OvcubMtMDDQ/vzAgQM2V1dX2/jx4x36bd++3ebm5paj/UrffvutzdXV1ebq6moLCwuzPffcc7bly5fbsrKycvStUKGCLSoq6qpjTZw40SbJNm/ePHtbu3btbHXq1LGdO3fO3padnW1r1qyZrVq1atesrXPnzjZJtpMnT16z3yX169e3lS5d2vbXX3/Z27Zt22ZzcXGx9ezZ09526dr269fP3nbhwgVb+fLlbRaLxTZhwgR7+8mTJ21eXl4O7/v777+3SbKVK1fOlpaWZm9fsGCBTZJtypQp9rYzZ87kqDM2NtZmsVhsBw8etLdFRUXZJNlGjhyZo39UVJTDv5chQ4bY/Pz8bBcuXLjqtXjmmWdskmxr1qyxt6Wnp9sqVapkq1ixou3ixYsO76VmzZq2zMxMe98pU6bYJNm2b99+1XMAyF8sFQBww6ZNm6YVK1Y4PJYtW5ajX48ePVSiRAn785YtW0r6+8+zknT06FFt375dPXv2tM8+SlLr1q1Vp06dPNfz1FNPOTxv2bKl/vrrL6WlpUmS/ve//yk7O1vdu3fX8ePH7Y/g4GBVq1ZN33///TXHb9++vRISEnT//fdr27ZtmjhxoiIiIlSuXLk8/Tn/ku+//17R0dEaNGiQfYb0xIkT+u6779S9e3elp6fba/vrr78UERGhPXv26MiRI1cd89J79PX1ve75k5KSlJiYqF69eikgIMDeXrduXbVv315ff/11jmOeeOIJ+8+urq5q2LChbDab+vTpY2/39/fXbbfdZv+9Xq5nz54OtXXr1k1lypRxONelmVfp7zWrx48fV7NmzWSz2fTTTz/lGPPpp5++7nv19/fX6dOntWLFiqv2+frrr9W4cWO1aNHC3ubj46N+/frpwIED+u233xz69+7dWx4eHvbnV/57BlDwWCoA4IY1btw4T1/OCg0NdXh+KcSePHlSknTw4EFJUtWqVXMcW7VqVW3dujVP9VzrPH5+ftqzZ49sNpuqVauW6/F5+WJPo0aN9L///U9ZWVnatm2bFi9erEmTJqlbt25KTExUrVq1rnn8H3/8oR49eqh58+Z688037e179+6VzWbTqFGjNGrUqFyPPXbs2FWXTvj5+UmS0tPTr7ncQfq/633bbbfleK1mzZpavny5/ctOl1x5bS9ttXXl7hFWq1V//fVXjnGvvOYWi0VVq1Z1WN976NAhjR49Wl988YX938YlqampDs/d3NxUvnz5a7zLv/Xv318LFixQZGSkypUrp7vvvlvdu3dXhw4d7H0OHjyoJk2a5Di2Zs2a9tcvXxpyvX/PAAoewRVAgXF1dc213ZbPu/Bd7zzZ2dmyWCxatmxZrn0vn+29Hg8PDzVq1EiNGjVS9erV1bt3by1cuFBjxoy56jFZWVnq1q2bPD09tWDBAoftvLKzsyVJw4cPV0RERK7H5xbsL6lRo4akv/eZvTQDmJ9yu175+Xu9ePGi2rdvrxMnTmjEiBGqUaOGvL29deTIEfXq1ct+fS7x9PSUi8v1/1hYunRpJSYmavny5Vq2bJmWLVumOXPmqGfPnpo7d+4N1ykV3r9nAFdHcAXgNBUqVJCkXL+ZnZ/f1q5SpYpsNpsqVaqk6tWr59u4l2adk5KSrtlv8ODBSkxM1A8//KCgoCCH1ypXrizp71nf8PDwG66hY8eOio2N1Ycffnjd4Hrpeu/atSvHazt37lTJkiUdZlvzw549exye22w27d27V3Xr1pX0d+DevXu35s6dq549e9r7XetP/Hnl4eGhjh07qmPHjsrOzlb//v01c+ZMjRo1SlWrVlWFChWuei2k/7teAIoO1rgCcJqyZcuqdu3amjdvnjIyMuztq1ev1vbt2/PtPF26dJGrq6vGjh2bY3bMZrPl+ifuy33//fe5zqpdWqeZ25/eL5kzZ45mzpypadOmqXHjxjleL126tNq0aaOZM2fmGoD//PPPa9YWFhamDh066L333tOSJUtyvJ6VlaXhw4dLksqUKaP69etr7ty5OnXqlL3PL7/8om+//Vb33HPPNc91M+bNm6f09HT7888++0xJSUmKjIyU9H+zmJdfX5vNlmPbqht15e/UxcXFHpYzMzMlSffcc482btyohIQEe7/Tp09r1qxZqlix4nWXfwAofMy4Arhhy5Yts89KXa5Zs2b2GcS8euWVV/TAAw+oefPm6t27t06ePKm3335btWvXdgiz/0SVKlU0btw4RUdH68CBA+rUqZN8fX21f/9+LV68WP369bOHu9wMGjRIZ86cUefOnVWjRg1lZWVp/fr1+vTTT1WxYkX17t071+OOHz+u/v37q1atWvL09NSHH37o8Hrnzp3l7e2tadOmqUWLFqpTp4769u2rypUrKyUlRQkJCfrjjz+0bdu2a76/efPm6e6771aXLl3UsWNHtWvXTt7e3tqzZ4/mz5+vpKQk+16ur732miIjIxUWFqY+ffrYt8OyWq0Oe+Tml4CAALVo0UK9e/dWSkqKJk+erKpVq6pv376S/l7qUKVKFQ0fPlxHjhyRn5+fFi1a9I/XjT7xxBM6ceKE2rZtq/Lly+vgwYOaOnWq6tevb1/DOnLkSH3yySeKjIzU4MGDFRAQoLlz52r//v1atGhRnpYkAChcBFcAN2z06NG5ts+ZM+eGg2vHjh31ySefKCYmRiNHjlS1atUUFxenuXPn6tdff82PciX9HVKqV6+uSZMmaezYsZKkkJAQ3X333br//vuveezrr7+uhQsX6uuvv9asWbOUlZWl0NBQ9e/fXy+++OJVvxSVkZGhc+fO6bfffst1n9X9+/fL29tbtWrV0ubNmzV27FjFxcXpr7/+UunSpXXHHXdc9VpfrlSpUlq/fr2mT5+uTz/9VC+88IKysrJUoUIF3X///fZ9SyUpPDxc33zzjcaMGaPRo0fL3d1drVu31quvvprrbXz/qeeff14///yzYmNjlZ6ernbt2mn69On2/Vfd3d21dOlSDR48WLGxsSpWrJg6d+6sgQMHql69ejd93kcffVSzZs3S9OnTderUKQUHB6tHjx6KiYmxB9KgoCCtX79eI0aM0NSpU3Xu3DnVrVtXS5cu1b333psv7x9A/rLYWFUOoAiqX7++SpUqlS9rHVH44uPjddddd2nhwoXq1q2bs8sBcIvg7yAAnOr8+fO6cOGCQ1t8fLy2bdumNm3aOKcoAECRxFIBAE515MgRhYeH69FHH1XZsmW1c+dOvfPOOwoODs5xYwEAwL8bwRWAU5UoUUINGjTQe++9pz///FPe3t669957NWHCBAUGBjq7PABAEcIaVwAAABiBNa4AAAAwAsEVAAAARrjl17hmZ2fr6NGj8vX1lcVicXY5AAAAuILNZlN6errKli17zZt/3PLB9ejRowoJCXF2GQAAALiOw4cPq3z58ld9/ZYPrr6+vpL+vhB+fn5OrgYAAABXSktLU0hIiD23Xc0tH1wvLQ/w8/MjuAIAABRh11vWyZezAAAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADCCU4NrTEyMLBaLw6NGjRr218+dO6cBAwYoMDBQPj4+6tq1q1JSUpxYMQAAAJzF6TOut99+u5KSkuyPtWvX2l8bOnSoli5dqoULF2r16tU6evSounTp4sRqAQAA4CxuTi/AzU3BwcE52lNTUzV79mx9/PHHatu2rSRpzpw5qlmzpn788Uc1bdq0sEsFAACAEzl9xnXPnj0qW7asKleurEceeUSHDh2SJG3ZskXnz59XeHi4vW+NGjUUGhqqhISEq46XmZmptLQ0hwcAAADM59Tg2qRJE8XFxembb77RjBkztH//frVs2VLp6elKTk6Wh4eH/P39HY4JCgpScnLyVceMjY2V1Wq1P0JCQgr4XQAAAKAwOHWpQGRkpP3nunXrqkmTJqpQoYIWLFggLy+vmxozOjpaw4YNsz9PS0sjvAIAANwCnL5U4HL+/v6qXr269u7dq+DgYGVlZenUqVMOfVJSUnJdE3uJp6en/Pz8HB4AAAAwn9O/nHW5jIwM7du3T4899pgaNGggd3d3rVq1Sl27dpUk7dq1S4cOHVJYWJiTK702i8XZFQAoDDabsysAgH8XpwbX4cOHq2PHjqpQoYKOHj2qMWPGyNXVVQ8//LCsVqv69OmjYcOGKSAgQH5+fho0aJDCwsLYUQAAAOBfyKnB9Y8//tDDDz+sv/76S6VKlVKLFi30448/qlSpUpKkSZMmycXFRV27dlVmZqYiIiI0ffp0Z5YMAAAAJ7HYbLf2H7vS0tJktVqVmppaaOtdWSoA/Dvc2p+eAFB48prXitSXswAAAICrIbgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMUGSC64QJE2SxWPTMM8/Y286dO6cBAwYoMDBQPj4+6tq1q1JSUpxXJAAAAJymSATXTZs2aebMmapbt65D+9ChQ7V06VItXLhQq1ev1tGjR9WlSxcnVQkAAABncnpwzcjI0COPPKJ3331XJUqUsLenpqZq9uzZevPNN9W2bVs1aNBAc+bM0fr16/Xjjz86sWIAAAA4g9OD64ABA3TvvfcqPDzcoX3Lli06f/68Q3uNGjUUGhqqhISEq46XmZmptLQ0hwcAAADM5+bMk8+fP19bt27Vpk2bcryWnJwsDw8P+fv7O7QHBQUpOTn5qmPGxsZq7Nix+V0qAAAAnMxpM66HDx/WkCFD9NFHH6lYsWL5Nm50dLRSU1Ptj8OHD+fb2AAAAHAepwXXLVu26NixY7rzzjvl5uYmNzc3rV69Wm+99Zbc3NwUFBSkrKwsnTp1yuG4lJQUBQcHX3VcT09P+fn5OTwAAABgPqctFWjXrp22b9/u0Na7d2/VqFFDI0aMUEhIiNzd3bVq1Sp17dpVkrRr1y4dOnRIYWFhzigZAAAATuS04Orr66vatWs7tHl7eyswMNDe3qdPHw0bNkwBAQHy8/PToEGDFBYWpqZNmzqjZAAAADiRU7+cdT2TJk2Si4uLunbtqszMTEVERGj69OnOLgsAAABOYLHZbDZnF1GQ0tLSZLValZqaWmjrXS2WQjkNACe7tT89AaDw5DWvOX0fVwAAACAvCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABjBqcF1xowZqlu3rvz8/OTn56ewsDAtW7bM/vq5c+c0YMAABQYGysfHR127dlVKSooTKwYAAICzODW4li9fXhMmTNCWLVu0efNmtW3bVg888IB+/fVXSdLQoUO1dOlSLVy4UKtXr9bRo0fVpUsXZ5YMAAAAJ7HYbDabs4u4XEBAgF577TV169ZNpUqV0scff6xu3bpJknbu3KmaNWsqISFBTZs2zdN4aWlpslqtSk1NlZ+fX0GWbmexFMppADhZ0fr0BABz5TWvFZk1rhcvXtT8+fN1+vRphYWFacuWLTp//rzCw8PtfWrUqKHQ0FAlJCRcdZzMzEylpaU5PAAAAGA+pwfX7du3y8fHR56ennrqqae0ePFi1apVS8nJyfLw8JC/v79D/6CgICUnJ191vNjYWFmtVvsjJCSkgN8BAAAACoPTg+ttt92mxMREbdiwQU8//bSioqL022+/3fR40dHRSk1NtT8OHz6cj9UCAADAWdz+ycGZmZny9PT8RwV4eHioatWqkqQGDRpo06ZNmjJlinr06KGsrCydOnXKYdY1JSVFwcHBVx3P09PzH9cEAACAoueGZlyXLVumqKgoVa5cWe7u7ipevLj8/PzUunVrjR8/XkePHv3HBWVnZyszM1MNGjSQu7u7Vq1aZX9t165dOnTokMLCwv7xeQAAAGCWPM24Ll68WCNGjFB6erruuecejRgxQmXLlpWXl5dOnDihX375RStXrtTLL7+sXr166eWXX1apUqWuO250dLQiIyMVGhqq9PR0ffzxx4qPj9fy5ctltVrVp08fDRs2TAEBAfLz89OgQYMUFhaW5x0FAAAAcOvIU3CdOHGiJk2apMjISLm45Jyk7d69uyTpyJEjmjp1qj788EMNHTr0uuMeO3ZMPXv2VFJSkqxWq+rWravly5erffv2kqRJkybJxcVFXbt2VWZmpiIiIjR9+vQbeX8AAAC4RRS5fVzzG/u4Aigot/anJwAUHuP2cQUAAACu5YaC6549e7Ro0SLt379fkvTVV1+pVatWatSokcaPH69bfPIWAAAATpTn7bAWL16s7t27y8XFRRaLRbNmzdKTTz6pNm3ayM/PTzExMXJzc9OIESMKsl4AAAD8S+V5xnX8+PF67rnndO7cOc2YMUNPPfWUYmNjtWzZMn355ZeaNm2a4uLiCrBUAAAA/Jvl+ctZvr6+SkxMVJUqVZSdnS0PDw8lJiaqdu3akqQDBw6oVq1aOnPmTIEWfKP4chaAgsLqKADIH/n+5azTp0/L19f374NcXOTl5aXixYvbX/fy8lJmZuY/KBkAAAC4ujwHV4vFIstlU4lXPgcAAAAKUp6/nGWz2VS9enV7WM3IyNAdd9xhvyEBOwoAAACgIOU5uM6ZM6cg6wAAAACuKc/BNSoqqiDrAAAAAK6JO2cBAADACHmacQ0ICNDu3btVsmRJlShR4ppfyjpx4kS+FQcAAABckqfgOmnSJPtWWJMnTy7IegAAAIBc5fkGBKbiBgQACsqt/ekJAIUn329AAAAAADhTnncVcHV1zVO/ixcv3nQxAAAAwNXc0A0IKlSooKioKN1xxx0FWRMAAACQQ56D68aNGzV79mxNmTJFlSpV0uOPP65HHnlEJUqUKMj6AAAAAEk3sMa1YcOGmjFjhpKSkjRs2DAtXrxY5cuX10MPPaQVK1YUZI0AAADAjX85q1ixYnr00Ue1atUq/fLLLzp27Jg6dOjA/q0AAAAoUHleKnC5P/74Q3FxcYqLi9OZM2f03//+t9C2mgIAAMC/U56Da1ZWlhYvXqzZs2drzZo1ioyM1OTJkxUZGZnnHQcAAACAm5Xn4FqmTBn5+voqKipK06dPV+nSpSVJp0+fdujHzCsAAAAKQp7vnOXi8n/LYS253BrKZrPJYrEUuX1cuXMWgILCnbMAIH/kNa/lecb1+++/z5fCAAAAgJuR5+DaunXrgqwDAAAAuKY8bYd15TrW/O4PAAAAXE+egmvVqlU1YcIEJSUlXbWPzWbTihUrFBkZqbfeeivfCgQAAACkPC4ViI+P1/PPP6+YmBjVq1dPDRs2VNmyZVWsWDGdPHlSv/32mxISEuTm5qbo6Gg9+eSTBV03AAAA/mXyvKuAJB06dEgLFy7UmjVrdPDgQZ09e1YlS5bUHXfcoYiIiCK5pyu7CgAoKOwqAAD5I6957YaCq4kIrgAKyq396QkAhSeveS1Pa1wBAAAAZyO4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABghJsKrmvWrNGjjz6qsLAwHTlyRJL0wQcfaO3atflaHAAAAHDJDQfXRYsWKSIiQl5eXvrpp5+UmZkpSUpNTdUrr7yS7wUCAAAA0k0E13Hjxumdd97Ru+++K3d3d3t78+bNtXXr1nwtDgAAALjkhoPrrl271KpVqxztVqtVp06dyo+aAAAAgBxuOLgGBwdr7969OdrXrl2rypUr50tRAAAAwJVuOLj27dtXQ4YM0YYNG2SxWHT06FF99NFHGj58uJ5++umCqBEAAACQ240eMHLkSGVnZ6tdu3Y6c+aMWrVqJU9PTw0fPlyDBg0qiBoBAAAAWWw2m+1mDszKytLevXuVkZGhWrVqycfHJ79ryxdpaWmyWq1KTU2Vn59foZzTYimU0wBwspv79AQAXCmvee2GlwrMmzdPO3bskIeHh2rVqqXGjRvLx8dH586d07x58/5R0QAAAMDV3HBw7dWrlxo3bqxFixY5tKempqp37975VhgAAABwuZu6c9bYsWP12GOPKSYmJp/LAQAAAHJ3U8H10Ucf1XfffaeZM2eqW7duOnv2bH7XBQAAADi44eBq+f/fPGratKk2bNigvXv3qlmzZjpw4EB+1wYAAADY3XBwvXwTgtDQUK1fv14VK1ZU+/bt87UwAAAA4HI3HFzHjBnjsPVV8eLFtXjxYg0dOjTXW8ECAAAA+eGm93E1Bfu4Aigot/anJwAUnrzmtTzdOeuLL75QZGSk3N3d9cUXX1y1n8ViUceOHW+8WgAAAOA68jTj6uLiouTkZJUuXVouLldfXWCxWHTx4sV8LfCfYsYVQEFhxhUA8ke+zrhmZ2fn+jMAAABQWG5qH1cAAACgsOU5uCYkJOjLL790aJs3b54qVaqk0qVLq1+/fsrMzMz3AgEAAADpBoLrSy+9pF9//dX+fPv27erTp4/Cw8M1cuRILV26VLGxsQVSJAAAAJDn4JqYmKh27drZn8+fP19NmjTRu+++q2HDhumtt97SggULCqRIAAAAIM/B9eTJkwoKCrI/X716tSIjI+3PGzVqpMOHD+dvdQAAAMD/l+fgGhQUpP3790uSsrKytHXrVjVt2tT+enp6utzd3fO/QgAAAEA3EFzvuecejRw5UmvWrFF0dLSKFy+uli1b2l//+eefVaVKlQIpEgAAAMjTPq6S9PLLL6tLly5q3bq1fHx8NHfuXHl4eNhff//993X33XcXSJEAAABAnu6cdbnU1FT5+PjI1dXVof3EiRPy8fFxCLNFAXfOAlBQuHMWAOSPfL1z1uWsVmuu7QEBATc6FAAAAJBn3DkLAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEpwbX2NhYNWrUSL6+vipdurQ6deqkXbt2OfQ5d+6cBgwYoMDAQPn4+Khr165KSUlxUsUAAABwFqcG19WrV2vAgAH68ccftWLFCp0/f1533323Tp8+be8zdOhQLV26VAsXLtTq1at19OhRdenSxYlVAwAAwBksNpvN5uwiLvnzzz9VunRprV69Wq1atVJqaqpKlSqljz/+WN26dZMk7dy5UzVr1lRCQoKaNm163THT0tJktVqVmpoqPz+/gn4LkiSLpVBOA8DJis6nJwCYLa95rUitcU1NTZUkBQQESJK2bNmi8+fPKzw83N6nRo0aCg0NVUJCQq5jZGZmKi0tzeEBAAAA8xWZ4Jqdna1nnnlGzZs3V+3atSVJycnJ8vDwkL+/v0PfoKAgJScn5zpObGysrFar/RESElLQpQMAAKAQFJngOmDAAP3yyy+aP3/+PxonOjpaqamp9sfhw4fzqUIAAAA4k5uzC5CkgQMH6ssvv9QPP/yg8uXL29uDg4OVlZWlU6dOOcy6pqSkKDg4ONexPD095enpWdAlAwAAoJA5dcbVZrNp4MCBWrx4sb777jtVqlTJ4fUGDRrI3d1dq1atsrft2rVLhw4dUlhYWGGXCwAAACdy6ozrgAED9PHHH+vzzz+Xr6+vfd2q1WqVl5eXrFar+vTpo2HDhikgIEB+fn4aNGiQwsLC8rSjAAAAAG4dTt0Oy3KVfaPmzJmjXr16Sfr7BgTPPvusPvnkE2VmZioiIkLTp0+/6lKBK7EdFoCCwnZYAJA/8prXitQ+rgWB4AqgoNzan54AUHiM3McVAAAAuBqCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARnBqcP3hhx/UsWNHlS1bVhaLRUuWLHF43WazafTo0SpTpoy8vLwUHh6uPXv2OKdYAAAAOJVTg+vp06dVr149TZs2LdfXJ06cqLfeekvvvPOONmzYIG9vb0VEROjcuXOFXCkAAACczc2ZJ4+MjFRkZGSur9lsNk2ePFkvvviiHnjgAUnSvHnzFBQUpCVLluihhx4qzFIBAADgZEV2jev+/fuVnJys8PBwe5vValWTJk2UkJBw1eMyMzOVlpbm8AAAAID5imxwTU5OliQFBQU5tAcFBdlfy01sbKysVqv9ERISUqB1AgAAoHAU2eB6s6Kjo5Wammp/HD582NklAQAAIB8U2eAaHBwsSUpJSXFoT0lJsb+WG09PT/n5+Tk8AAAAYL4iG1wrVaqk4OBgrVq1yt6WlpamDRs2KCwszImVAQAAwBmcuqtARkaG9u7da3++f/9+JSYmKiAgQKGhoXrmmWc0btw4VatWTZUqVdKoUaNUtmxZderUyXlFAwAAwCmcGlw3b96su+66y/582LBhkqSoqCjFxcXpueee0+nTp9WvXz+dOnVKLVq00DfffKNixYo5q2QAAAA4icVms9mcXURBSktLk9VqVWpqaqGtd7VYCuU0AJzs1v70BIDCk9e8VmTXuAIAAACXI7gCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEQiuAAAAMALBFQAAAEYguAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARiC4AgAAwAhuzi4AAGAWy1iLs0sAUAhsY2zOLiEHZlwBAABgBIIrAAAAjEBwBQAAgBEIrgAAADACwRUAAABGILgCAADACARXAAAAGIHgCgAAACMQXAEAAGAEgisAAACMQHAFAACAEYwIrtOmTVPFihVVrFgxNWnSRBs3bnR2SQAAAChkRT64fvrppxo2bJjGjBmjrVu3ql69eoqIiNCxY8ecXRoAAAAKUZEPrm+++ab69u2r3r17q1atWnrnnXdUvHhxvf/++84uDQAAAIXIzdkFXEtWVpa2bNmi6Ohoe5uLi4vCw8OVkJCQ6zGZmZnKzMy0P09NTZUkpaWlFWyxAP51/rUfK+ecXQCAwlCY2enSuWw22zX7Fengevz4cV28eFFBQUEO7UFBQdq5c2eux8TGxmrs2LE52kNCQgqkRgD/XlarsysAgIJjnVD4H3Lp6emyXuPDtUgH15sRHR2tYcOG2Z9nZ2frxIkTCgwMlMVicWJluFWlpaUpJCREhw8flp+fn7PLAYB8xWccCoPNZlN6errKli17zX5FOriWLFlSrq6uSklJcWhPSUlRcHBwrsd4enrK09PToc3f37+gSgTs/Pz8+FAHcMviMw4F7VozrZcU6S9neXh4qEGDBlq1apW9LTs7W6tWrVJYWJgTKwMAAEBhK9IzrpI0bNgwRUVFqWHDhmrcuLEmT56s06dPq3fv3s4uDQAAAIWoyAfXHj166M8//9To0aOVnJys+vXr65tvvsnxhS3AWTw9PTVmzJgcS1QA4FbAZxyKEovtevsOAAAAAEVAkV7jCgAAAFxCcAUAAIARCK4AAAAwAsEVuA6bzaZ+/fopICBAFotFiYmJ1z3GYrFoyZIlkqQDBw7k+TgAuFzFihU1efLkPPcvqp83vXr1UqdOnZxdhiQpJiZG9evXd3YZuEkEV+A6vvnmG8XFxenLL79UUlKSateu7eySAPxLbNq0Sf369cvXMePi4vLlxjw3EqqnTJmiuLi4f3xOoMhvhwU42759+1SmTBk1a9bM2aUA+JcpVaqUs0v4Ry5evCiLxZKnOyIBecGMK3ANvXr10qBBg3To0CFZLBZVrFgx11mG+vXrKyYmxik1Aig6vvzyS/n7++vixYuSpMTERFksFo0cOdLe54knntCjjz4qSVq7dq1atmwpLy8vhYSEaPDgwTp9+rS975WfNzt37lSLFi1UrFgx1apVSytXrnRYmnTJ77//rrvuukvFixdXvXr1lJCQIEmKj49X7969lZqaKovFIovFctXPLpvNppiYGIWGhsrT01Nly5bV4MGDJUlt2rTRwYMHNXToUPs40v/N5n7xxReqVauWPD09dejQoRxLBdq0aaOBAwdq4MCBslqtKlmypEaNGqXLd+isWLGiXn75ZT388MPy9vZWuXLlNG3aNIcaT506pSeeeEKlSpWSn5+f2rZtq23btjn0mTBhgoKCguTr66s+ffro3LlzV/v1wQAEV+AapkyZopdeeknly5dXUlKSNm3a5OySABRhLVu2VHp6un766SdJ0urVq1WyZEnFx8fb+6xevVpt2rTRvn371KFDB3Xt2lU///yzPv30U61du1YDBw7MdeyLFy+qU6dOKl68uDZs2KBZs2bphRdeyLXvCy+8oOHDhysxMVHVq1fXww8/rAsXLqhZs2aaPHmy/Pz8lJSUpKSkJA0fPjzXMRYtWqRJkyZp5syZ2rNnj5YsWaI6depIkv73v/+pfPnyeumll+zjXHLmzBm9+uqreu+99/Trr7+qdOnSuY4/d+5cubm5aePGjZoyZYrefPNNvffeew59XnvtNdWrV08//fSTRo4cqSFDhmjFihX21x988EEdO3ZMy5Yt05YtW3TnnXeqXbt2OnHihCRpwYIFiomJ0SuvvKLNmzerTJkymj59eq71wAwsFQCuwWq1ytfXV66urgoODnZ2OQCKOKvVqvr16ys+Pl4NGzZUfHy8hg4dqrFjxyojI0Opqanau3evWrdurdjYWD3yyCN65plnJEnVqlXTW2+9pdatW2vGjBkqVqyYw9grVqzQvn37FB8fb/88Gj9+vNq3b5+jjuHDh+vee++VJI0dO1a333679u7dqxo1ashqtcpisVz3M+3QoUMKDg5WeHi43N3dFRoaqsaNG0uSAgIC5OrqKl9f3xzjnD9/XtOnT1e9evWuOX5ISIgmTZoki8Wi2267Tdu3b9ekSZPUt29fe5/mzZvbZ6urV6+udevWadKkSWrfvr3Wrl2rjRs36tixY/a7er3++utasmSJPvvsM/Xr10+TJ09Wnz591KdPH0nSuHHjtHLlSmZdDcaMKwAA+ah169aKj4+XzWbTmjVr1KVLF9WsWVNr167V6tWrVbZsWVWrVk3btm1TXFycfHx87I+IiAhlZ2dr//79OcbdtWuXQkJCHILipSB5pbp169p/LlOmjCTp2LFjV635lVdecajj0KFDevDBB3X27FlVrlxZffv21eLFi3XhwoXrvn8PDw+H819N06ZN7UsMJCksLEx79uyxL7O41Ha5sLAw7dixQ5K0bds2ZWRkKDAw0KH2/fv3a9++fZKkHTt2qEmTJjnGgLmYcQVukIuLi668U/L58+edVA2AoqZNmzZ6//33tW3bNrm7u6tGjRpq06aN4uPjdfLkSbVu3VqSlJGRoSeffNK+bvRyoaGh/6gGd3d3+8+XwmF2dvZV+z/11FPq3r27/XnZsmXl5uamXbt2aeXKlVqxYoX69++v1157TatXr3YY/0peXl4OgbSgZGRkqEyZMg7LMC7Jj10TUDQRXIEbVKpUKYf1XGlpabnOjgD4d7q0znXSpEn2kNqmTRtNmDBBJ0+e1LPPPitJuvPOO/Xbb7+patWqeRr3tttu0+HDh5WSkqKgoCBJuql19x4eHg6zmtLff/oPCAjI0dfLy0sdO3ZUx44dNWDAANWoUUPbt2/XnXfemes4N2LDhg0Oz3/88UdVq1ZNrq6uDm1X9qlZs6akv69fcnKy3NzcVLFixVzPUbNmTW3YsEE9e/a86pgwC0sFgBvUtm1bffDBB1qzZo22b9+uqKgohw9aAP9uJUqUUN26dfXRRx+pTZs2kqRWrVpp69at2r17tz3MjhgxQuvXr9fAgQOVmJioPXv26PPPP7/ql7Pat2+vKlWqKCoqSj///LPWrVunF198UZJuaIazYsWKysjI0KpVq3T8+HGdOXMm135xcXGaPXu2fvnlF/3+++/68MMP5eXlpQoVKtjH+eGHH3TkyBEdP348z+e/5NChQxo2bJh27dqlTz75RFOnTtWQIUMc+qxbt04TJ07U7t27NW3aNC1cuNDeJzw8XGFhYerUqZO+/fZbHThwQOvXr9cLL7ygzZs3S5KGDBmi999/X3PmzNHu3bs1ZswY/frrrzdcK4oOgitwg6Kjo9W6dWvdd999uvfee9WpUydVqVLF2WUBKEJat26tixcv2oNrQECAatWqpeDgYN12222S/l6Hunr1au3evVstW7bUHXfcodGjR6ts2bK5junq6qolS5YoIyNDjRo10hNPPGHfVeDKL3JdS7NmzfTUU0+pR48eKlWqlCZOnJhrP39/f7377rtq3ry56tatq5UrV2rp0qUKDAyUJL300ks6cOCAqlSpclP7zfbs2VNnz55V48aNNWDAAA0ZMiTHzRaeffZZbd68WXfccYfGjRunN998UxEREZL+Dutff/21WrVqpd69e6t69ep66KGHdPDgQfuMdI8ePTRq1Cg999xzatCggQ4ePKinn376hmtF0WGxXblYDwAAGGHdunVq0aKF9u7da9R/oNu0aaP69etf885bFStW1DPPPGPfdQGQWOMKAIAxFi9eLB8fH1WrVk179+7VkCFD1Lx5c6NCK/BPEFwBADBEenq6RowYoUOHDqlkyZIKDw/XG2+84eyygELDUgEAAAAYgS9nAQAAwAgEVwAAABiB4AoAAAAjEFwBAABgBIIrAAAAjEBwBfCvERcXJ39/f/vzmJgY1a9f/5rH9OrVS506dSrQuvJi1qxZCgkJkYuLiyZPnpyn2p3twIEDslgsSkxMdHYpAG4RBFcARV6vXr1ksVjsj8DAQHXo0EE///zzDY3To0cP7d69u4CqLDhpaWkaOHCgRowYoSNHjqhfv34aPny4Vq1a5ezS7HIL+CEhIUpKSlLt2rWdUxSAWw7BFYAROnTooKSkJCUlJWnVqlVyc3PTfffdd0NjeHl5qXTp0gVUYcE5dOiQzp8/r3vvvVdlypRR8eLF5ePjY79nfEE6f/78TR/r6uqq4OBgublxrxsA+YPgCsAInp6eCg4OVnBwsOrXr6+RI0fq8OHD+vPPPyVJ8fHxslgsOnXqlP2YxMREWSwWHThwQFLOpQJXunjxooYNGyZ/f38FBgbqueeeU17u0bJu3Tq1adNGxYsXV4kSJRQREaGTJ09KkjIzMzV48GCVLl1axYoVU4sWLbRp0yb7sZfqXrVqlRo2bKjixYurWbNm2rVrl73mOnXqSJIqV65sfz9XLhW4cOGCBg8ebK99xIgRioqKcpgFrVixYo57w9evX18xMTH25xaLRTNmzND9998vb29vjR8/XhcvXlSfPn1UqVIleXl56bbbbtOUKVPsx8TExGju3Ln6/PPP7bPi8fHxuS4VWL16tRo3bixPT0+VKVNGI0eO1IULF+yvt2nTRoMHD9Zzzz2ngIAABQcHO9Rns9kUExOj0NBQeXp6qmzZsho8ePB1f0cAbg0EVwDGycjI0IcffqiqVavm66zjG2+8obi4OL3//vtau3atTpw4ocWLF1/zmMTERLVr1061atVSQkKC1q5dq44dO+rixYuSpOeee06LFi3S3LlztXXrVlWtWlURERE6ceKEwzgvvPCC3njjDW3evFlubm56/PHHJf29vGHlypWSpI0bNyopKUkhISE56nj11Vf10Ucfac6cOVq3bp3S0tK0ZMmSm7oOMTEx6ty5s7Zv367HH39c2dnZKl++vBYuXKjffvtNo0eP1vPPP68FCxZIkoYPH67u3bs7zIo3a9Ysx7hHjhzRPffco0aNGmnbtm2aMWOGZs+erXHjxjn0mzt3rry9vbVhwwZNnDhRL730klasWCFJWrRokSZNmqSZM2dqz549WrJkiT3YA7j18fcbAEb48ssv5ePjI0k6ffq0ypQpoy+//FIuLvn3/+/JkycrOjpaXbp0kSS98847Wr58+TWPmThxoho2bKjp06fb226//XZ7nTNmzFBcXJwiIyMlSe+++65WrFih2bNn67///a/9mPHjx6t169aSpJEjR+ree+/VuXPn5OXlZQ/npUqVUnBwcK51TJ06VdHR0ercubMk6e2339bXX399M5dB//nPf9S7d2+HtrFjx9p/rlSpkhISErRgwQJ1795dPj4+8vLyUmZm5lXrk6Tp06crJCREb7/9tiwWi2rUqKGjR49qxIgRGj16tP13WbduXY0ZM0aSVK1aNb399ttatWqV2rdvr0OHDik4OFjh4eFyd3dXaGioGjdufFPvE4B5mHEFYIS77rpLiYmJSkxM1MaNGxUREaHIyEgdPHgwX8ZPTU1VUlKSmjRpYm9zc3NTw4YNr3ncpRnX3Ozbt0/nz59X8+bN7W3u7u5q3LixduzY4dC3bt269p/LlCkjSTp27Fiea09JSXEIcK6urmrQoEGejr9Sbu952rRpatCggUqVKiUfHx/NmjVLhw4duqFxd+zYobCwMFksFntb8+bNlZGRoT/++MPedvm1kP6+HpeuxYMPPqizZ8+qcuXK6tu3rxYvXuyw1ADArY3gCsAI3t7eqlq1qqpWrapGjRrpvffe0+nTp/Xuu+9Kkn227vI1qf/ki0V55eXllS/juLu723++FOyys7PzZexLXFxccqzZze0aeXt7OzyfP3++hg8frj59+ujbb79VYmKievfuraysrHyt75LLr4X09/W4dC1CQkK0a9cuTZ8+XV5eXurfv79atWpVKL9rAM5HcAVgJIvFIhcXF509e1bS339Gl6SkpCR7nxvZP9RqtapMmTLasGGDve3ChQvasmXLNY+rW7fuVbelqlKlijw8PLRu3Tp72/nz57Vp0ybVqlUrz7Vdj9VqVVBQkMOXvi5evKitW7c69CtVqpTD9UlLS9P+/fuvO/66devUrFkz9e/fX3fccYeqVq2qffv2OfTx8PCwr+u9mpo1ayohIcEhPK9bt06+vr4qX778deu4xMvLSx07dtRbb72l+Ph4JSQkaPv27Xk+HoC5CK4AjJCZmank5GQlJydrx44dGjRokDIyMtSxY0dJUtWqVRUSEqKYmBjt2bNHX331ld54440bOseQIUM0YcIELVmyRDt37lT//v0ddinITXR0tDZt2qT+/fvr559/1s6dOzVjxgwdP35c3t7eevrpp/Xf//5X33zzjX777Tf17dtXZ86cUZ8+fW72UuRq0KBBio2N1eeff65du3ZpyJAhOnnypMOf5du2basPPvhAa9as0fbt2xUVFSVXV9frjl2tWjVt3rxZy5cv1+7duzVq1CiHkCz9vWPBzz//rF27dun48eO5zoD2799fhw8f1qBBg7Rz5059/vnnGjNmjIYNG5bntcpxcXGaPXu2fvnlF/3+++/68MMP5eXlpQoVKuTpeABm48tZAIzwzTff2Nd++vr6qkaNGlq4cKHatGkj6e8/L3/yySd6+umnVbduXTVq1Ejjxo3Tgw8+mOdzPPvss0pKSlJUVJRcXFz0+OOPq3PnzkpNTb3qMdWrV9e3336r559/Xo0bN5aXl5eaNGmihx9+WJI0YcIEZWdn67HHHlN6eroaNmyo5cuXq0SJEjd/MXIxYsQIJScnq2fPnnJ1dVW/fv0UERHhEEyjo6O1f/9+3XfffbJarXr55ZfzNOP65JNP6qefflKPHj1ksVj08MMPq3///lq2bJm9T9++fRUfH6+GDRsqIyND33//vSpWrOgwTrly5fT111/rv//9r+rVq6eAgAD16dNHL774Yp7fp7+/vyZMmKBhw4bp4sWLqlOnjpYuXVooe9oCcD6LLS+bFAIAjJKdna2aNWuqe/fuevnll51dDgDkC2ZcAeAWcPDgQX377bdq3bq1MjMz9fbbb2v//v36z3/+4+zSACDfsMYVAG4BLi4uiouLU6NGjdS8eXNt375dK1euVM2aNZ1dGgDkG5YKAAAAwAjMuAIAAMAIBFcAAAAYgeAKAAAAIxBcAQAAYASCKwAAAIxAcAUAAIARCK4AAAAwAsEVAAAARvh/AKsjnCZn3JQAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -685,7 +649,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "79b7b4be-1377-48e5-9a45-ad249fec04af", "metadata": {}, "outputs": [ @@ -705,17 +669,17 @@ " Tactic Sources | [EDGE_MASK_CONVOLUTIONS, JIT_CONVOLUTIONS]\n", " Profiling Verbosity | ProfilingVerbosity.DETAILED\n", " Preview Features | [PROFILE_SHARING_0806]\u001b[0m\n", - "\u001b[38;5;10m[I] Finished engine building in 35.676 seconds\u001b[0m\n", + "\u001b[38;5;10m[I] Finished engine building in 38.043 seconds\u001b[0m\n", "[I] Saving engine to resnet50_full_vc.plan\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -738,7 +702,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "c9fc753b-fc1e-470d-96a7-f7fafaabca0d", "metadata": {}, "outputs": [ @@ -758,17 +722,17 @@ " Tactic Sources | [EDGE_MASK_CONVOLUTIONS, JIT_CONVOLUTIONS]\n", " Profiling Verbosity | ProfilingVerbosity.DETAILED\n", " Preview Features | [PROFILE_SHARING_0806]\u001b[0m\n", - "\u001b[38;5;10m[I] Finished engine building in 44.720 seconds\u001b[0m\n", + "\u001b[38;5;10m[I] Finished engine building in 36.902 seconds\u001b[0m\n", "[I] Saving engine to resnet50_stripped_vc.plan\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -800,7 +764,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[I] Loading TensorRT runtime from: ./libnvinfer_lean.so.10.0.1\n", + "[I] Loading TensorRT runtime from: /usr/lib/x86_64-linux-gnu/libnvinfer_lean.so\n", "[I] Loading bytes from resnet50_stripped_vc.plan\n" ] } @@ -809,7 +773,7 @@ "from polygraphy.backend.trt import load_runtime\n", "\n", "# Load the weight-stripped engine using lean runtime.\n", - "lean_runtime = load_runtime('./libnvinfer_lean.so.10.0.1')\n", + "lean_runtime = load_runtime('/usr/lib/x86_64-linux-gnu/libnvinfer_lean.so')\n", "stripped_vc_engine = engine_from_bytes(bytes_from_path('resnet50_stripped_vc.plan'), runtime=lean_runtime)" ] }, @@ -901,7 +865,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAArcAAAIjCAYAAAAZajMiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABQ9klEQVR4nO3deXgN5///8ddJIpGIhCAbEXtRa62hFUsqUkuVVqlqLB+U2Epb1Wot1VJd0NqqVdFWPy21tVpU7VTV0pSq/WMrYikSsQTJ/fvDL+frSJAQTkyfj+s615Uzc8/Me07mTF6Zc899bMYYIwAAAMACXJxdAAAAAJBdCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLfAfWzo0KGy2Wz3ZFv169dX/fr17c9XrFghm82mb7/99p5s/37TsWNHeXt7O7uMuyY2NlY2m0379+93din/Kvv375fNZlNsbKyzSwFyLMItkEOkhYW0R+7cuRUcHKzIyEh9+OGHOnv2bLZs58iRIxo6dKji4uKyZX3Z6W7VlhYI0h6urq4qWrSonnjiiRz5OjjTpUuXNG7cOFWtWlU+Pj7Kly+fHnzwQXXr1k07duxwdnmSpLi4OD377LMKCQmRh4eH/Pz8FBERoWnTpiklJcXZ5QFwMjdnFwDA0fDhw1W8eHFdvnxZ8fHxWrFihfr166cPPvhA3333nSpVqmRvO3jwYL3yyitZWv+RI0c0bNgwFStWTFWqVMn0cj/99FOWtnM7bre2zGrXrp0ee+wxpaSkaPv27Zo0aZIWLlyoX3/99a5s737UunVrLVy4UO3atVPXrl11+fJl7dixQwsWLFCdOnVUtmxZSVKHDh3Utm1beXh43NP6Pv30Uz3//PMKCAhQhw4dVLp0aZ09e1ZLly5Vly5ddPToUb366qv3tKZ7KTQ0VBcuXFCuXLmcXQqQYxFugRwmKipK1atXtz8fNGiQli1bpmbNmqlFixbavn27PD09JUlubm5yc7u7b+Pz58/Ly8tL7u7ud3U798JDDz2kZ5991v68bt26atGihSZNmqSPP/7YiZXlDBs2bNCCBQv01ltvpQuI48eP15kzZ+zPXV1d5erqek/r+/XXX/X8888rLCxMP/74o/LmzWuf169fP23cuFF//vnnPa3pXrly5YpSU1Pl7u6u3LlzO7scIEejWwJwH2jYsKFef/11HThwQF9++aV9ekZ9bpcsWaKHH35Y+fLlk7e3tx544AF7UFmxYoVq1KghSerUqZP9Y/q0/nv169dXhQoVtGnTJtWrV09eXl72Za/vc5smJSVFr776qgIDA5UnTx61aNFChw4dcmhTrFgxdezYMd2y167zVrVJ0vr169WkSRP5+vrKy8tL4eHhWrt2baZfx+s1bNhQkrRv3z5J0vz589W0aVMFBwfLw8NDJUuW1JtvvpnhR93r16/XY489pvz58ytPnjyqVKmSxo0bd9PtxcXFqVChQqpfv76SkpIkSe+9957q1KmjAgUKyNPTU9WqVcuwH/OFCxfUp08fFSxYUHnz5lWLFi10+PBh2Ww2DR061KHt4cOH1blzZwUEBMjDw0MPPvigPvvss1u+Hnv37pV0NfRfz9XVVQUKFLA/v77PbdqxmNHj2t99amqqxo4dqwcffFC5c+dWQECAunfvrtOnT9+yvmHDhslms2nGjBkOwTZN9erVHbZ17tw5DRgwwN594YEHHtB7770nY4zDcjabTb169dKsWbNUvnx5eXp6KiwsTFu3bpUkffzxxypVqpRy586t+vXrp+tnfO37pk6dOvL09FTx4sU1efJkh3aXLl3SG2+8oWrVqsnX11d58uTRI488ouXLlzu0S+tG895772ns2LEqWbKkPDw89Ndff2XY5zY+Pl6dOnVSkSJF5OHhoaCgID3++OPp6pw4caIefPBBeXh4KDg4WDExMQ7/sFy7L3/99ZcaNGggLy8vFS5cWKNHj77JbwbIWbhyC9wnOnTooFdffVU//fSTunbtmmGbbdu2qVmzZqpUqZKGDx8uDw8P7dmzxx4Ay5Urp+HDh+uNN95Qt27d9Mgjj0iS6tSpY1/HP//8o6ioKLVt21bPPvusAgICblrXW2+9JZvNpoEDB+r48eMaO3asIiIiFBcXZ7/CnBm3qm3ZsmWKiopStWrVNGTIELm4uGjatGlq2LChVq9erZo1a2Z6W2nSwlxaaIuNjZW3t7f69+8vb29vLVu2TG+88YYSExP17rvv2pdbsmSJmjVrpqCgIPXt21eBgYHavn27FixYoL59+2a4rQ0bNigyMlLVq1fX/Pnz7a/NuHHj1KJFC7Vv316XLl3S119/raeeekoLFixQ06ZN7ct37NhRM2fOVIcOHVS7dm2tXLnSYX6aY8eOqXbt2vbAVqhQIS1cuFBdunRRYmKi+vXrd8PXIzQ0VJI0Y8YM1a1bN0ufCrRq1UqlSpVymLZp0yaNHTtW/v7+9mndu3dXbGysOnXqpD59+mjfvn0aP368fv/9d61du/aGH7efP39eS5cuVb169VS0aNFb1mOMUYsWLbR8+XJ16dJFVapU0eLFi/XSSy/p8OHDGjNmjEP71atX67vvvlNMTIwkaeTIkWrWrJlefvllTZw4UT179tTp06c1evRode7cWcuWLXNY/vTp03rsscfUpk0btWvXTjNnzlSPHj3k7u6uzp07S5ISExP16aef2rt8nD17VlOnTlVkZKR+++23dF1jpk2bposXL6pbt272vsWpqanp9rV169batm2bevfurWLFiun48eNasmSJDh48qGLFikm6+s/HsGHDFBERoR49emjnzp2aNGmSNmzYkO51P336tJo0aaJWrVqpTZs2+vbbbzVw4EBVrFhRUVFRt3ztAaczAHKEadOmGUlmw4YNN2zj6+trqlatan8+ZMgQc+3beMyYMUaSOXHixA3XsWHDBiPJTJs2Ld288PBwI8lMnjw5w3nh4eH258uXLzeSTOHChU1iYqJ9+syZM40kM27cOPu00NBQEx0dfct13qi21NRUU7p0aRMZGWlSU1Pt08+fP2+KFy9uHn300RvurzHG7Nu3z0gyw4YNMydOnDDx8fFmxYoVpmrVqkaSmT17tn191+vevbvx8vIyFy9eNMYYc+XKFVO8eHETGhpqTp8+na7ONNHR0SZPnjzGGGPWrFljfHx8TNOmTe3ruXYfrnXp0iVToUIF07BhQ/u0TZs2GUmmX79+Dm07duxoJJkhQ4bYp3Xp0sUEBQWZkydPOrRt27at8fX1zXAfr60/7RgICAgw7dq1MxMmTDAHDhxI1zbteN23b1+G6zpx4oQpWrSoqVixoklKSjLGGLN69WojycyYMcOh7aJFizKcfq0//vjDSDJ9+/a9YZtrzZs3z0gyI0aMcJj+5JNPGpvNZvbs2WOfJsl4eHg47MvHH39sJJnAwECH43vQoEHp9jvtNXv//fft05KTk02VKlWMv7+/uXTpkjHm6rGTnJzsUM/p06dNQECA6dy5s31a2vHq4+Njjh8/7tA+bV7ae+T06dNGknn33Xdv+FocP37cuLu7m8aNG5uUlBT79PHjxxtJ5rPPPku3L59//rnDvgQGBprWrVvfcBtATkK3BOA+4u3tfdNRE/Llyyfp6sfrGV3hyQwPDw916tQp0+2fe+45h4+In3zySQUFBenHH3+8re1nJC4uTrt379Yzzzyjf/75RydPntTJkyd17tw5NWrUSKtWrcrU/g4ZMkSFChVSYGCg6tevr7179+qdd95Rq1atJMnhSvPZs2d18uRJPfLIIzp//rx9pIDff/9d+/btU79+/eyvd5qMhmVbvny5IiMj1ahRI82ZMyfdDVjXbvP06dNKSEjQI488os2bN9unL1q0SJLUs2dPh2V79+7t8NwYo9mzZ6t58+Yyxthfp5MnTyoyMlIJCQkO672ezWbT4sWLNWLECOXPn1///e9/FRMTo9DQUD399NPpPsK+kZSUFLVr105nz57V3LlzlSdPHknSrFmz5Ovrq0cffdShtmrVqsnb2zvdx/PXSkxMlKQMuyNk5Mcff5Srq6v69OnjMH3AgAEyxmjhwoUO0xs1amS/yilJtWrVknT1qui120yb/r///c9heTc3N3Xv3t3+3N3dXd27d9fx48e1adMmSVe7dqT1XU9NTdWpU6d05coVVa9ePcPfS+vWrVWoUKGb7qenp6fc3d21YsWKG3bt+Pnnn3Xp0iX169dPLi7/92e/a9eu8vHx0Q8//ODQ3tvb26Fvuru7u2rWrJlun4Gcim4JwH0kKSnJ4SPe6z399NP69NNP9Z///EevvPKKGjVqpFatWunJJ590+KN2M4ULF87SzWOlS5d2eG6z2VSqVKlsHf909+7dkqTo6OgbtklISFD+/Plvup5u3brpqaeekouLi32Iq2vD5rZt2zR48GAtW7bMHqauXb/0f10ZKlSocMu6L168qKZNm6patWqaOXNmhh/zL1iwQCNGjFBcXJySk5Pt068NygcOHJCLi4uKFy/usOz13QBOnDihM2fOaMqUKZoyZUqGNR0/fvymNXt4eOi1117Ta6+9pqNHj2rlypUaN26cZs6cqVy5cjn0+b6RtNfwhx9+UMmSJe3Td+/erYSEhBsewzerzcfHR5IyPSTegQMHFBwcnC4MlytXzj7/Wtd3dfD19ZUkhYSEZDj9+iAZHBxsD/FpypQpI+lqH9ratWtLkqZPn673339fO3bs0OXLl+1tr//d3mja9Tw8PPTOO+9owIABCggIUO3atdWsWTM999xzCgwMdNjXBx54wGFZd3d3lShRIt1rUaRIkXT/qOXPn19btmy5ZT1ATkC4Be4Tf//9txISEtIFmmt5enpq1apVWr58uX744QctWrRI33zzjRo2bKiffvopU3e3Z6WfbGbd6IsmUlJSMlVT2lXZd99994ZDdmXmCxNKly6tiIiIDOedOXNG4eHh8vHx0fDhw1WyZEnlzp1bmzdv1sCBA2/rSriHh4cee+wxzZ8/X4sWLVKzZs0c5q9evVotWrRQvXr1NHHiRAUFBSlXrlyaNm2avvrqqyxvL63GZ5999ob/CFw7lNytBAUFqW3btmrdurUefPBBzZw5U7GxsTftiztv3jy98847evPNN9WkSZN09fn7+2vGjBkZLnuzq5SlSpWSm5ub/Sav7Haj4/BG0811N6VlxpdffqmOHTuqZcuWeumll+Tv7y9XV1eNHDnS/k/TtTL7XuzXr5+aN2+uefPmafHixXr99dc1cuRILVu2TFWrVs1yndm5z4AzEG6B+8QXX3whSYqMjLxpOxcXFzVq1EiNGjXSBx98oLfffluvvfaali9froiIiGz/RrO0q6ppjDHas2ePQ4jKnz9/hh9pHzhwQCVKlLA/v1FtaVf/fHx8bhhO79SKFSv0zz//aM6cOapXr559etpICtfX8ueff96ylrQ7+x9//HE99dRTWrhwocOIE7Nnz1bu3Lm1ePFihyvI06ZNc1hPaGioUlNTtW/fPocr5Xv27HFoV6hQIeXNm1cpKSnZ+jrlypVLlSpV0u7du3Xy5En7FcHr7dq1S9HR0WrZsmWGY82WLFlSP//8s+rWrZvlf6K8vLzUsGFDLVu2TIcOHUp3RfV6oaGh+vnnn3X27FmHq7dp3UvSbp7LLkeOHNG5c+ccrt7u2rVLkuzdHb799luVKFFCc+bMcTjWhwwZcsfbL1mypAYMGKABAwZo9+7dqlKlit5//319+eWX9n3duXOnw/vt0qVL2rdv3117TwHOQp9b4D6wbNkyvfnmmypevLjat29/w3anTp1KNy3tSmfaR95pf3wz23/yVj7//HOHj4q//fZbHT161OGu6pIlS+rXX3/VpUuX7NMWLFiQbsiwG9VWrVo1lSxZUu+99559CK1rnThx4o73I+1q1bVXpy5duqSJEyc6tHvooYdUvHhxjR07Nl2dGV3Zcnd315w5c1SjRg01b95cv/32m8M2bTabw1Bj+/fv17x58xzWkfYPzfW1fPTRR+n2oXXr1po9e3aG473e6nXavXu3Dh48mG76mTNntG7dOuXPn/+GV1eTkpL0xBNPqHDhwpo+fXqG/6i0adNGKSkpevPNN9PNu3Llyi2PySFDhsgYow4dOmR4HGzatEnTp0+XJPuXdYwfP96hzZgxY2Sz2bL9rv8rV644jJV86dIlffzxxypUqJCqVasmKeNjbP369Vq3bt1tb/f8+fO6ePGiw7SSJUsqb9689vd8RESE3N3d9eGHHzpse+rUqUpISMhw1A3gfsaVWyCHWbhwoXbs2KErV67o2LFjWrZsmZYsWaLQ0FB99913Nx3Affjw4Vq1apWaNm2q0NBQHT9+XBMnTlSRIkX08MMPS7r6hy9fvnyaPHmy8ubNqzx58qhWrVqZ6t+XET8/Pz388MPq1KmTjh07prFjx6pUqVIOw5X95z//0bfffqsmTZqoTZs22rt3r7788kuH/pi3qu3TTz9VVFSUHnzwQXXq1EmFCxfW4cOHtXz5cvn4+Oj777+/rfrT1KlTR/nz51d0dLT69Okjm82mL774Il1gdXFx0aRJk9S8eXNVqVJFnTp1UlBQkHbs2KFt27Zp8eLF6dbt6empBQsWqGHDhoqKitLKlStVoUIFNW3aVB988IGaNGmiZ555RsePH9eECRNUqlQph/6N1apVU+vWrTV27Fj9888/9qHA0q4MXhskR40apeXLl6tWrVrq2rWrypcvr1OnTmnz5s36+eefM/wHKM0ff/yhZ555RlFRUXrkkUfk5+enw4cPa/r06Tpy5IjGjh17w4+shw0bpr/++kuDBw/W/PnzHeaVLFlSYWFhCg8PV/fu3TVy5EjFxcWpcePGypUrl3bv3q1Zs2Zp3LhxevLJJ2/6O5owYYJ69uypsmXLOnxD2YoVK/Tdd99pxIgRkqTmzZurQYMGeu2117R//35VrlxZP/30k+bPn69+/fqlO/buVHBwsN555x3t379fZcqU0TfffKO4uDhNmTLFPsxWs2bNNGfOHD3xxBNq2rSp9u3bp8mTJ6t8+fIZhvXM2LVrlxo1aqQ2bdqofPnycnNz09y5c3Xs2DG1bdtW0tUr+oMGDdKwYcPUpEkTtWjRQjt37tTEiRNVo0YNh5vHAEtwziANAK6XNrRS2sPd3d0EBgaaRx991IwbN85hOKI01w8FtnTpUvP444+b4OBg4+7uboKDg027du3Mrl27HJabP3++KV++vHFzc3MYVig8PNw8+OCDGdZ3o6HA/vvf/5pBgwYZf39/4+npaZo2bZrh0FHvv/++KVy4sPHw8DB169Y1GzduTLfOm9VmjDG///67adWqlSlQoIDx8PAwoaGhpk2bNmbp0qU3fW3Thk+62XBJxhizdu1aU7t2bePp6WmCg4PNyy+/bBYvXmwkmeXLlzu0XbNmjXn00UdN3rx5TZ48eUylSpXMRx99ZJ9/7VBgaU6ePGnKly9vAgMDze7du40xxkydOtWULl3aeHh4mLJly5pp06al+70aY8y5c+dMTEyM8fPzM97e3qZly5Zm586dRpIZNWqUQ9tjx46ZmJgYExISYnLlymUCAwNNo0aNzJQpU266/8eOHTOjRo0y4eHhJigoyLi5uZn8+fObhg0bmm+//dah7fVDgUVHRzscv9c+rh8GbsqUKaZatWrG09PT5M2b11SsWNG8/PLL5siRIzetL82mTZvMM888Y4KDg02uXLlM/vz5TaNGjcz06dMdhro6e/aseeGFF+ztSpcubd59912HIduMuToUWExMjMO0Gx0zacf9rFmz7NPS3jcbN240YWFhJnfu3CY0NNSMHz/eYdnU1FTz9ttvm9DQUOPh4WGqVq1qFixYYKKjo01oaOgtt33tvLT3xcmTJ01MTIwpW7asyZMnj/H19TW1atUyM2fOTLfs+PHjTdmyZU2uXLlMQECA6dGjR7rh7G50Dri+RiAnsxlDD3EAuB/FxcWpatWq+vLLL2/aXQV3V/369XXy5EnLfvUvcL+hzy0A3AcuXLiQbtrYsWPl4uLicAMcAPzb0ecWAO4Do0eP1qZNm9SgQQO5ublp4cKFWrhwobp163bLkQMA4N+EcAsA94E6depoyZIlevPNN5WUlKSiRYtq6NCheu2115xdGgDkKPS5BQAAgGXQ5xYAAACWQbgFAACAZdDnVle/7/zIkSPKmzdvtn81KQAAAO6cMUZnz55VcHCwXFxufH2WcKur3wnO3cYAAAA536FDh1SkSJEbzifcSsqbN6+kqy+Wj4+Pk6sBAADA9RITExUSEmLPbTdCuNX/fS+7j48P4RYAACAHu1UXUm4oAwAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYhpuzC/i3stmcXQHuBmOcXQEAAP9uXLkFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFiGU8PtyJEjVaNGDeXNm1f+/v5q2bKldu7c6dCmfv36stlsDo/nn3/eoc3BgwfVtGlTeXl5yd/fXy+99JKuXLlyL3cFAAAAOYCbMze+cuVKxcTEqEaNGrpy5YpeffVVNW7cWH/99Zfy5Mljb9e1a1cNHz7c/tzLy8v+c0pKipo2barAwED98ssvOnr0qJ577jnlypVLb7/99j3dHwAAADiXzRhjnF1EmhMnTsjf318rV65UvXr1JF29clulShWNHTs2w2UWLlyoZs2a6ciRIwoICJAkTZ48WQMHDtSJEyfk7u5+y+0mJibK19dXCQkJ8vHxybb9uRmb7Z5sBvdYznk3AQBgLZnNazmqz21CQoIkyc/Pz2H6jBkzVLBgQVWoUEGDBg3S+fPn7fPWrVunihUr2oOtJEVGRioxMVHbtm3LcDvJyclKTEx0eAAAAOD+59RuCddKTU1Vv379VLduXVWoUME+/ZlnnlFoaKiCg4O1ZcsWDRw4UDt37tScOXMkSfHx8Q7BVpL9eXx8fIbbGjlypIYNG3aX9gQAAADOkmPCbUxMjP7880+tWbPGYXq3bt3sP1esWFFBQUFq1KiR9u7dq5IlS97WtgYNGqT+/fvbnycmJiokJOT2CgcAAECOkSO6JfTq1UsLFizQ8uXLVaRIkZu2rVWrliRpz549kqTAwEAdO3bMoU3a88DAwAzX4eHhIR8fH4cHAAAA7n9ODbfGGPXq1Utz587VsmXLVLx48VsuExcXJ0kKCgqSJIWFhWnr1q06fvy4vc2SJUvk4+Oj8uXL35W6AQAAkDM5tVtCTEyMvvrqK82fP1958+a195H19fWVp6en9u7dq6+++kqPPfaYChQooC1btuiFF15QvXr1VKlSJUlS48aNVb58eXXo0EGjR49WfHy8Bg8erJiYGHl4eDhz9wAAAHCPOXUoMNsNxsOaNm2aOnbsqEOHDunZZ5/Vn3/+qXPnzikkJERPPPGEBg8e7NCV4MCBA+rRo4dWrFihPHnyKDo6WqNGjZKbW+ayO0OBIbswFBgAAHdHZvNajhrn1lkIt8guvJsAALg77stxbgEAAIA7QbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZbg5uwAAd842zObsEnCXmCHG2SUAwH2FK7cAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALMOp4XbkyJGqUaOG8ubNK39/f7Vs2VI7d+50aHPx4kXFxMSoQIEC8vb2VuvWrXXs2DGHNgcPHlTTpk3l5eUlf39/vfTSS7py5cq93BUAAADkAE4NtytXrlRMTIx+/fVXLVmyRJcvX1bjxo117tw5e5sXXnhB33//vWbNmqWVK1fqyJEjatWqlX1+SkqKmjZtqkuXLumXX37R9OnTFRsbqzfeeMMZuwQAAAAnshljjLOLSHPixAn5+/tr5cqVqlevnhISElSoUCF99dVXevLJJyVJO3bsULly5bRu3TrVrl1bCxcuVLNmzXTkyBEFBARIkiZPnqyBAwfqxIkTcnd3v+V2ExMT5evrq4SEBPn4+NzVfUxjs92TzeAec9a7yTaMA8qqzJAcc4oGAKfKbF7LUX1uExISJEl+fn6SpE2bNuny5cuKiIiwtylbtqyKFi2qdevWSZLWrVunihUr2oOtJEVGRioxMVHbtm3LcDvJyclKTEx0eAAAAOD+l2PCbWpqqvr166e6deuqQoUKkqT4+Hi5u7srX758Dm0DAgIUHx9vb3NtsE2bnzYvIyNHjpSvr6/9ERISks17AwAAAGfIMeE2JiZGf/75p77++uu7vq1BgwYpISHB/jh06NBd3yYAAADuPjdnFyBJvXr10oIFC7Rq1SoVKVLEPj0wMFCXLl3SmTNnHK7eHjt2TIGBgfY2v/32m8P60kZTSGtzPQ8PD3l4eGTzXgAAAMDZnHrl1hijXr16ae7cuVq2bJmKFy/uML9atWrKlSuXli5dap+2c+dOHTx4UGFhYZKksLAwbd26VcePH7e3WbJkiXx8fFS+fPl7syMAAADIEZx65TYmJkZfffWV5s+fr7x589r7yPr6+srT01O+vr7q0qWL+vfvLz8/P/n4+Kh3794KCwtT7dq1JUmNGzdW+fLl1aFDB40ePVrx8fEaPHiwYmJiuDoLAADwL+PUcDtp0iRJUv369R2mT5s2TR07dpQkjRkzRi4uLmrdurWSk5MVGRmpiRMn2tu6urpqwYIF6tGjh8LCwpQnTx5FR0dr+PDh92o3AAAAkEPkqHFunYVxbpFdGOcW2Y1xbgHgqvtynFsAAADgThBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZdxxuE1OTs6OOgAAAIA7luVwu3DhQkVHR6tEiRLKlSuXvLy85OPjo/DwcL311ls6cuTI3agTAAAAuKVMh9u5c+eqTJky6ty5s9zc3DRw4EDNmTNHixcv1qeffqrw8HD9/PPPKlGihJ5//nmdOHHibtYNAAAApGMzxpjMNAwLC9PgwYMVFRUlF5cbZ+LDhw/ro48+UkBAgF544YVsK/RuSkxMlK+vrxISEuTj43NPtmmz3ZPN4B7L3Lsp+9mGcUBZlRnipIMKAHKYzOY1t8yucN26dZlqV7hwYY0aNSqzqwUAAACyDaMlAAAAwDKyHG53796t2bNna9++fZKkH374QfXq1VONGjX01ltvKZO9HAAAAIBsl+luCdLVm8ratGkjFxcX2Ww2TZkyRd27d1f9+vXl4+OjoUOH2m82AwAAAO61LF25feutt/Tyyy/r4sWLmjRpkp5//nmNHDlSCxcu1IIFCzRhwgTFxsbepVIBAACAm8tSuN25c6c6d+4sm82m6OhoXbp0SREREfb5jRs31oEDB7K9SAAAACAzshRuz507p7x5815d0MVFnp6e8vLyss/39PTkG8sAAADgNFkKtzabTbZrBmi9/jkAAADgTFm6ocwYozJlytgDbVJSkqpWrWr/UgdGSgAAAIAzZSncTps27W7VAQAAANyxLIXb6Ojou1UHAAAAcMf4hjIAAABYRqav3Pr5+WnXrl0qWLCg8ufPf9MbyU6dOpUtxQEAAABZkelwO2bMGPswYGPHjr1b9QAAAAC3LdPh9tr+tvS9BQAAQE5En1sAAABYRpbCraura6YembVq1So1b95cwcHBstlsmjdvnsP8jh072r8oIu3RpEkThzanTp1S+/bt5ePjo3z58qlLly5KSkrKym4BAADAIrL8JQ6hoaGKjo5W1apV73jj586dU+XKldW5c2e1atUqwzZNmjRxGF/Xw8PDYX779u119OhRLVmyRJcvX1anTp3UrVs3ffXVV3dcHwAAAO4vWQq3v/32m6ZOnapx48apePHi6ty5s9q3b6/8+fPf1sajoqIUFRV10zYeHh4KDAzMcN727du1aNEibdiwQdWrV5ckffTRR3rsscf03nvvKTg4+LbqAgAAwP0pS90SqlevrkmTJuno0aPq37+/5s6dqyJFiqht27ZasmTJXSlwxYoV8vf31wMPPKAePXron3/+sc9bt26d8uXLZw+2khQRESEXFxetX7/+hutMTk5WYmKiwwMAAAD3v9u6oSx37tx69tlntXTpUv355586fvy4mjRpku3j2zZp0kSff/65li5dqnfeeUcrV65UVFSUUlJSJEnx8fHy9/d3WMbNzU1+fn6Kj4+/4XpHjhwpX19f+yMkJCRb6wYAAIBzZKlbwrX+/vtvxcbGKjY2VufPn9dLL70kHx+f7KxNbdu2tf9csWJFVapUSSVLltSKFSvUqFGj217voEGD1L9/f/vzxMREAi4AAIAFZOnK7aVLl/TNN9+ocePGKl26tDZv3qyxY8fq0KFDGjVqlNzcbjsrZ0qJEiVUsGBB7dmzR5IUGBio48ePO7S5cuWKTp06dcN+utLVfrw+Pj4ODwAAANz/spRGg4KClDdvXkVHR2vixIn2LgHnzp1zaHe3wuLff/+tf/75R0FBQZKksLAwnTlzRps2bVK1atUkScuWLVNqaqpq1ap1V2oAAABAzmUzxpjMNnZx+b8LvTabLd18Y4xsNpu9T+ytJCUl2a/CVq1aVR988IEaNGggPz8/+fn5adiwYWrdurUCAwO1d+9evfzyyzp79qy2bt1qHxIsKipKx44d0+TJk+1DgVWvXj1LQ4ElJibK19dXCQkJ9+wqbgYvHywg8++m7GUbxgFlVWaIkw4qAMhhMpvXsnTldvny5Xdc2LU2btyoBg0a2J+n9YONjo7WpEmTtGXLFk2fPl1nzpxRcHCwGjdurDfffNNhrNsZM2aoV69eatSokVxcXNS6dWt9+OGH2VonAAAA7g9ZunJrVVy5RXbhyi2yG1duAeCqzOa1TN9Qdn2/2uxuDwAAANypTIfbUqVKadSoUTp69OgN2xhjtGTJEkVFRdE1AAAAAPdcpvvcrlixQq+++qqGDh2qypUrq3r16goODlbu3Ll1+vRp/fXXX1q3bp3c3Nw0aNAgde/e/W7WDQAAAKST6XD7wAMPaPbs2Tp48KBmzZql1atX65dfftGFCxdUsGBBVa1aVZ988omioqLk6up6N2sGAAAAMsQNZeKGMmQfbihDduOGMgC4KttvKAMAAAByOsItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwjNsOt6tXr9azzz6rsLAwHT58WJL0xRdfaM2aNdlWHAAAAJAVtxVuZ8+ercjISHl6eur3339XcnKyJCkhIUFvv/12thYIAAAAZNZthdsRI0Zo8uTJ+uSTT5QrVy779Lp162rz5s3ZVhwAAACQFbcVbnfu3Kl69eqlm+7r66szZ87caU0AAADAbbmtcBsYGKg9e/akm75mzRqVKFHijosCAAAAbsdthduuXbuqb9++Wr9+vWw2m44cOaIZM2boxRdfVI8ePbK7RgAAACBT3G5noVdeeUWpqalq1KiRzp8/r3r16snDw0Mvvviievfund01AgAAAJliM8aY21340qVL2rNnj5KSklS+fHl5e3tnZ233TGJionx9fZWQkCAfH597sk2b7Z5sBvfY7b+b7oxtGAeUVZkhTjqoACCHyWxeu61uCZ9//rm2b98ud3d3lS9fXjVr1pS3t7cuXryozz///LaLBgAAAO7EbYXbjh07qmbNmpo9e7bD9ISEBHXq1ClbCgMAAACy6ra/oWzYsGHq0KGDhg4dmo3lAAAAALfvtsPts88+q2XLlunjjz/Wk08+qQsXLmRnXQAAAECW3Va4tf3/u6Fq166t9evXa8+ePapTp47279+fnbUBAAAAWXJb4fbaARaKFi2qX375RcWKFdOjjz6abYUBAAAAWXVb4XbIkCEOw355eXlp7ty5euGFFzL8Wl4AAADgXrijcW6tgnFukV0Y5xbZjXFuAeCqzOa1TH9D2XfffaeoqCjlypVL33333Q3b2Ww2NW/ePGvVAgAAANkg0+G2ZcuWio+Pl7+/v1q2bHnDdjabTSkpKdlRGwAAAJAlmQ63qampGf4MAAAA5BS3Pc4tAAAAkNNkKdyuW7dOCxYscJj2+eefq3jx4vL391e3bt2UnJycrQUCAAAAmZWlcDt8+HBt27bN/nzr1q3q0qWLIiIi9Morr+j777/XyJEjs71IAAAAIDOyFG7j4uLUqFEj+/Ovv/5atWrV0ieffKL+/fvrww8/1MyZM7O9SAAAACAzshRuT58+rYCAAPvzlStXKioqyv68Ro0aOnToUPZVBwAAAGRBlsJtQECA9u3bJ0m6dOmSNm/erNq1a9vnnz17Vrly5creCgEAAIBMylK4feyxx/TKK69o9erVGjRokLy8vPTII4/Y52/ZskUlS5bM9iIBAACAzMj0OLeS9Oabb6pVq1YKDw+Xt7e3pk+fLnd3d/v8zz77TI0bN872IgEAAIDMyFK4LViwoFatWqWEhAR5e3vL1dXVYf6sWbPk7e2drQUCAAAAmZWlcJvG19c3w+l+fn53VAwAAABwJ/iGMgAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACW4dRwu2rVKjVv3lzBwcGy2WyaN2+ew3xjjN544w0FBQXJ09NTERER2r17t0ObU6dOqX379vLx8VG+fPnUpUsXJSUl3cO9AAAAQE7h1HB77tw5Va5cWRMmTMhw/ujRo/Xhhx9q8uTJWr9+vfLkyaPIyEhdvHjR3qZ9+/batm2blixZogULFmjVqlXq1q3bvdoFAAAA5CA2Y4xxdhGSZLPZNHfuXLVs2VLS1au2wcHBGjBggF588UVJUkJCggICAhQbG6u2bdtq+/btKl++vDZs2KDq1atLkhYtWqTHHntMf//9t4KDgzO17cTERPn6+iohIUE+Pj53Zf+uZ7Pdk83gHnPWu8k2jAPKqsyQHHGKBgCny2xey7F9bvft26f4+HhFRETYp/n6+qpWrVpat26dJGndunXKly+fPdhKUkREhFxcXLR+/fobrjs5OVmJiYkODwAAANz/cmy4jY+PlyQFBAQ4TA8ICLDPi4+Pl7+/v8N8Nzc3+fn52dtkZOTIkfL19bU/QkJCsrl6AAAAOEOODbd306BBg5SQkGB/HDp0yNklAQAAIBvk2HAbGBgoSTp27JjD9GPHjtnnBQYG6vjx4w7zr1y5olOnTtnbZMTDw0M+Pj4ODwAAANz/cmy4LV68uAIDA7V06VL7tMTERK1fv15hYWGSpLCwMJ05c0abNm2yt1m2bJlSU1NVq1ate14zAAAAnMvNmRtPSkrSnj177M/37dunuLg4+fn5qWjRourXr59GjBih0qVLq3jx4nr99dcVHBxsH1GhXLlyatKkibp27arJkyfr8uXL6tWrl9q2bZvpkRIAAABgHU4Ntxs3blSDBg3sz/v37y9Jio6OVmxsrF5++WWdO3dO3bp105kzZ/Twww9r0aJFyp07t32ZGTNmqFevXmrUqJFcXFzUunVrffjhh/d8XwAAAOB8OWacW2dinFtkF8a5RXZjnFsAuOq+H+cWAAAAyCrCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACzDzdkFAABylmG2Yc4uAXfJEDPE2SUAdx1XbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYRo4Ot0OHDpXNZnN4lC1b1j7/4sWLiomJUYECBeTt7a3WrVvr2LFjTqwYAAAAzpSjw60kPfjggzp69Kj9sWbNGvu8F154Qd9//71mzZqllStX6siRI2rVqpUTqwUAAIAzuTm7gFtxc3NTYGBguukJCQmaOnWqvvrqKzVs2FCSNG3aNJUrV06//vqrateufa9LBQAAgJPl+Cu3u3fvVnBwsEqUKKH27dvr4MGDkqRNmzbp8uXLioiIsLctW7asihYtqnXr1t10ncnJyUpMTHR4AAAA4P6Xo8NtrVq1FBsbq0WLFmnSpEnat2+fHnnkEZ09e1bx8fFyd3dXvnz5HJYJCAhQfHz8Tdc7cuRI+fr62h8hISF3cS8AAABwr+TobglRUVH2nytVqqRatWopNDRUM2fOlKen522vd9CgQerfv7/9eWJiIgEXAADAAnL0ldvr5cuXT2XKlNGePXsUGBioS5cu6cyZMw5tjh07lmEf3Wt5eHjIx8fH4QEAAID7330VbpOSkrR3714FBQWpWrVqypUrl5YuXWqfv3PnTh08eFBhYWFOrBIAAADOkqO7Jbz44otq3ry5QkNDdeTIEQ0ZMkSurq5q166dfH191aVLF/Xv319+fn7y8fFR7969FRYWxkgJAAAA/1I5Otz+/fffateunf755x8VKlRIDz/8sH799VcVKlRIkjRmzBi5uLiodevWSk5OVmRkpCZOnOjkqgEAAOAsOTrcfv311zednzt3bk2YMEETJky4RxUBAAAgJ7uv+twCAAAAN0O4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZbs4uAAAAWJzN5uwKcDcY4+wKMsSVWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFiGZcLthAkTVKxYMeXOnVu1atXSb7/95uySAAAAcI9ZItx+88036t+/v4YMGaLNmzercuXKioyM1PHjx51dGgAAAO4hS4TbDz74QF27dlWnTp1Uvnx5TZ48WV5eXvrss8+cXRoAAADuITdnF3CnLl26pE2bNmnQoEH2aS4uLoqIiNC6desyXCY5OVnJycn25wkJCZKkxMTEu1ssLM9ph9BFJ20Xd50zzksXOaAsi79zyFb3+HhKO36NMTdtd9+H25MnTyolJUUBAQEO0wMCArRjx44Mlxk5cqSGDRuWbnpISMhdqRH/Hr6+zq4AVuM7ioMK2WeU7yhnlwArcdIfvbNnz8r3Jtu+78Pt7Rg0aJD69+9vf56amqpTp06pQIECstlsTqzMehITExUSEqJDhw7Jx8fH2eXgPsfxhOzGMYXsxPF0dxljdPbsWQUHB9+03X0fbgsWLChXV1cdO3bMYfqxY8cUGBiY4TIeHh7y8PBwmJYvX767VSIk+fj48EZHtuF4QnbjmEJ24ni6e252xTbNfX9Dmbu7u6pVq6alS5fap6Wmpmrp0qUKCwtzYmUAAAC41+77K7eS1L9/f0VHR6t69eqqWbOmxo4dq3PnzqlTp07OLg0AAAD3kCXC7dNPP60TJ07ojTfeUHx8vKpUqaJFixalu8kM956Hh4eGDBmSrhsIcDs4npDdOKaQnTiecgabudV4CgAAAMB94r7vcwsAAACkIdwCAADAMgi3AAAAsAzCLe6IMUbdunWTn5+fbDab4uLibrmMzWbTvHnzJEn79+/P9HJwvmLFimns2LGZbp9Tf78dO3ZUy5YtnV2GJGno0KGqUqWKs8vIEW7nfCJxTrmfcU7JfpxTCLe4Q4sWLVJsbKwWLFigo0ePqkKFCs4uCXfRhg0b1K1bt2xdZ2xsbLZ8iUpW/kiOGzdOsbGxd7xNZK/74XxSsWJFPf/88xnO++KLL+Th4aGTJ09KuhrWp0yZolq1asnb21v58uVT9erVNXbsWJ0/f/5elp1jcU65v2zatEk2m02//vprhvMbNWqkVq1a2Z/Hx8erd+/eKlGihDw8PBQSEqLmzZs7fDfB3UC4xR3Zu3evgoKCVKdOHQUGBsrNzRKjy+EGChUqJC8vL2eXcdtSUlKUmpoqX19fvpUwB3LW+aRYsWJasWJFptp26dJFX3/9tS5cuJBu3rRp09SiRQsVLFhQktShQwf169dPjz/+uJYvX664uDi9/vrrmj9/vn766afs3IX7FucU51uxYoWKFSuWqbbVqlVT5cqV9dlnn6Wbt3//fi1fvlxdunSxP69WrZqWLVumd999V1u3btWiRYvUoEEDxcTEZOcupGeA2xQdHW0k2R+hoaEmNDTUjBkzxqFd5cqVzZAhQ+zPJZm5c+caY4zZt2+fkWR+//33e1b3v8n3339vfH19zZUrV4wxxvz+++9Gkhk4cKC9TZcuXUz79u2NMcasXr3aPPzwwyZ37tymSJEipnfv3iYpKcne9vrf7/bt203dunWNh4eHKVeunFmyZEmGv9/Zs2eb+vXrG09PT1OpUiXzyy+/GGOMWb58ucMxJMnhWLlWamqqGTJkiAkJCTHu7u4mKCjI9O7d2xhjTHh4eLr1GGPMtGnTjK+vr5k/f74pV66ccXV1Nfv27TPR0dHm8ccft687PDzcxMTEmJiYGOPj42MKFChgBg8ebFJTUx32ffjw4aZt27bGy8vLBAcHm/HjxzvUePr0adOlSxdTsGBBkzdvXtOgQQMTFxfn0GbkyJHG39/feHt7m86dO5uBAweaypUr3+I3aX0ZnU+MSX/MGZP955TQ0FCzfPnyTLU9ceKEcXd3N1988YXD9P/973/GZrOZhQsXGmOM+eabb4wkM2/evHTrSE1NNWfOnMl0fTlFVs8nxnBOuR/OKcuXL7e/3zLjww8/ND4+PubcuXMO04cMGWKCg4Ptx0dUVJQpXLiww+/72v26m7hyi9s2btw4DR8+XEWKFNHRo0e1YcMGZ5eE6zzyyCM6e/asfv/9d0nSypUrVbBgQYerVCtXrlT9+vW1d+9eNWnSRK1bt9aWLVv0zTffaM2aNerVq1eG605JSVHLli3l5eWl9evXa8qUKXrttdcybPvaa6/pxRdfVFxcnMqUKaN27drpypUrqlOnjsaOHSsfHx8dPXpUR48e1YsvvpjhOmbPnq0xY8bo448/1u7duzVv3jxVrFhRkjRnzhwVKVJEw4cPt68nzfnz5/XOO+/o008/1bZt2+Tv75/h+qdPny43Nzf99ttvGjdunD744AN9+umnDm3effddVa5cWb///rteeeUV9e3bV0uWLLHPf+qpp3T8+HEtXLhQmzZt0kMPPaRGjRrp1KlTkqSZM2dq6NChevvtt7Vx40YFBQVp4sSJGdbzb3O/nE8KFiyoxx9/PN2Vq9jYWBUpUkSNGzeWJM2YMUMPPPCAHn/88XTrsNls8vX1vSf1ZqesnE8kcU6x6Dmlffv2Sk5O1rfffmufZozR9OnT1bFjR7m6uurUqVNatGiRYmJilCdPnnTruOtXue9qdIbljRkzxuE/Pq7c5jwPPfSQeffdd40xxrRs2dK89dZbxt3d3Zw9e9b8/fffRpLZtWuX6dKli+nWrZvDsqtXrzYuLi7mwoULxhjH3+/ChQuNm5ubOXr0qL39ja6yfPrpp/Y227ZtM5LM9u3bjTH/dyXkVt5//31TpkwZc+nSpQznZ3TsTZs2zUhKd6Ujo6ss5cqVc7iqMnDgQFOuXDmH9Tdp0sRhPU8//bSJiooyxlx9rXx8fMzFixcd2pQsWdJ8/PHHxhhjwsLCTM+ePR3m16pViyu3/9/15xNjct6VW2OMWbRokbHZbOZ///ufMebqFcDQ0FAzePBge5ty5cqZFi1aZHqd94vMnk+MMZxT7pNzSlav3BpjTNu2bU14eLj9+dKlS40ks3v3bmOMMevXrzeSzJw5c7K03uzClVvA4sLDw7VixQoZY7R69Wq1atVK5cqV05o1a7Ry5UoFBwerdOnS+uOPPxQbGytvb2/7IzIyUqmpqdq3b1+69e7cuVMhISEKDAy0T6tZs2aGNVSqVMn+c1BQkCTp+PHjN6z57bffdqjj4MGDeuqpp3ThwgWVKFFCXbt21dy5c3XlypVb7r+7u7vD9m+kdu3astls9udhYWHavXu3UlJSHKZdKywsTNu3b5ck/fHHH0pKSlKBAgUcat+3b5/27t0rSdq+fbtq1aqVbh24t55//vl0x1dUVJTDtJt59NFHVaRIEU2bNk2StHTpUh08eFCdOnWytzEW/fLPzJ5PJHFOycHnlGvXFxUVpYMHDzpMu9FNk2k6d+6sVatW2ev47LPPFB4erlKlSkly/vHP3T/IVi4uLukO6suXLzupGkhS/fr19dlnn+mPP/5Qrly5VLZsWdWvX18rVqzQ6dOnFR4eLklKSkpS9+7d1adPn3TrKFq06B3VkCtXLvvPaSf71NTUG7Z//vnn1aZNG/vz4OBgubm5aefOnfr555+1ZMkS9ezZU++++65WrlzpsP7reXp6OvyBuVuSkpIUFBSU4Y1J9+uNJjnB3TinDB8+3OGj6vr16+udd95JFxJuVlPHjh01ffp0DR06VNOmTVODBg1UokQJe5syZcpox44dd1RnTpTZ84nEOeVO3c1zyrVDqa1fv14DBw502I6Pj89Nl2/UqJGKFi2q2NhYvfTSS5ozZ44+/vhj+/zSpUvLZrM57T1AuEW2KlSokEPfpMTExAz/Q8e9k9ZPbsyYMfY/PPXr19eoUaN0+vRpDRgwQJL00EMP6a+//rL/530rDzzwgA4dOqRjx44pICBAkm6rn6S7u7vDlQxJ8vPzk5+fX7q2np6eat68uZo3b66YmBiVLVtWW7du1UMPPZTherJi/fr1Ds9//fVXlS5dWq6urg7Trm9Trlw5SVdfv/j4eLm5ud3wzuNy5cpp/fr1eu655264Tji6G+cUf39/h36Sbm5uKly4cKaPfUnq1KmTRowYoTlz5mju3Lnp+lI+88wzatu2rebPn5+u360xRomJifd1v9tbnU8kzik5+Zxy7e/k77//lpubW5aOfxcXF3Xq1ElTp05V4cKF5e7urieffNI+38/PT5GRkZowYYL69OmTrt/tmTNn7uo//XRLQLZq2LChvvjiC61evVpbt25VdHS0wxsZ917+/PlVqVIlzZgxw36jR7169bR582bt2rXL/gdq4MCB+uWXX9SrVy/FxcVp9+7dmj9//g1v/nj00UdVsmRJRUdHa8uWLVq7dq0GDx4sSVm6qlGsWDElJSVp6dKlOnny5A3H/4yNjdXUqVP1559/6n//+5++/PJLeXp6KjQ01L6eVatW6fDhw/ZxRrPi4MGD6t+/v3bu3Kn//ve/+uijj9S3b1+HNmvXrtXo0aO1a9cuTZgwQbNmzbK3iYiIUFhYmFq2bKmffvpJ+/fv1y+//KLXXntNGzdulCT17dtXn332maZNm6Zdu3ZpyJAh2rZtW5Zr/TfJqeeU4sWLq2HDhurWrZs8PDwcxvaUpDZt2ujpp59Wu3bt7Df7HDhwQAsWLFBERISWL1/upMrvTGbPJxLnFKufUzp16qTDhw/r1VdfVbt27eTp6ekwf8KECUpJSVHNmjU1e/Zs7d69W9u3b9eHH35417tjEW6RrQYNGqTw8HA1a9ZMTZs2VcuWLVWyZElnl/WvFx4erpSUFPsfIz8/P5UvX16BgYF64IEHJF3tw7Zy5Urt2rVLjzzyiKpWrao33nhDwcHBGa7T1dVV8+bNU1JSkmrUqKH//Oc/9jubc+fOnena6tSpo+eff15PP/20ChUqpNGjR2fYLl++fPrkk09Ut25dVapUST///LO+//57FShQQNLVj5r379+vkiVLqlChQpnefprnnntOFy5cUM2aNRUTE6O+ffumG1x+wIAB2rhxo6pWraoRI0bogw8+UGRkpKSrf3x//PFH1atXT506dVKZMmXUtm1bHThwwH4V6umnn9brr7+ul19+WdWqVdOBAwfUo0ePLNf6b5KTzyldunTR6dOn9cwzz6Q75m02m7766it98MEHmjdvnsLDw1WpUiUNHTpUjz/+uP24uR9l5nwicU6x+jmlaNGiioiI0OnTp9W5c+d080uUKKHNmzerQYMGGjBggCpUqKBHH31US5cu1aRJk+5qbTbj7F6/ACxj7dq1evjhh7Vnz54cE0Ayo379+qpSpcpNv42oWLFi6tevn/r163fP6gL+7Tin4HbQ5xbAbZs7d668vb1VunRp7dmzR3379lXdunXvqz9CAHIOzinIDoRbALft7NmzGjhwoA4ePKiCBQsqIiJC77//vrPLAnCf4pyC7EC3BAAAAFgGN5QBAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AHCN2NhYh+88Hzp0qKpUqXLTZTp27KiWLVve1boyY8qUKQoJCZGLi4vGjh2bqdqdbf/+/bLZbIqLi3N2KQAsgnALwBI6duwom81mfxQoUEBNmjTRli1bsrSep59+Wrt27bpLVd49iYmJ6tWrlwYOHKjDhw+rW7duevHFF7V06VJnl2aX0T8BISEhOnr0qCpUqOCcogBYDuEWgGU0adJER48e1dGjR7V06VK5ubmpWbNmWVqHp6en/P3971KFd8/Bgwd1+fJlNW3aVEFBQfLy8pK3t7cKFChw17d9+fLl217W1dVVgYGBcnPjO4UAZA/CLQDL8PDwUGBgoAIDA1WlShW98sorOnTokE6cOCFJWrFihWw2m86cOWNfJi4uTjabTfv375eUvlvC9VJSUtS/f3/ly5dPBQoU0Msvv6zMfBfO2rVrVb9+fXl5eSl//vyKjIzU6dOnJUnJycnq06eP/P39lTt3bj388MPasGGDfdm0upcuXarq1avLy8tLderU0c6dO+01V6xYUZJUokQJ+/5c3y3hypUr6tOnj732gQMHKjo62uFqarFixTR27FiH2qtUqaKhQ4fan9tsNk2aNEktWrRQnjx59NZbbyklJUVdunRR8eLF5enpqQceeEDjxo2zLzN06FBNnz5d8+fPt19dX7FiRYbdElauXKmaNWvKw8NDQUFBeuWVV3TlyhX7/Pr166tPnz56+eWX5efnp8DAQIf6jDEaOnSoihYtKg8PDwUHB6tPnz63/B0BsAbCLQBLSkpK0pdffqlSpUpl69XL999/X7Gxsfrss8+0Zs0anTp1SnPnzr3pMnFxcWrUqJHKly+vdevWac2aNWrevLlSUlIkSS+//LJmz56t6dOna/PmzSpVqpQiIyN16tQph/W89tprev/997Vx40a5ubmpc+fOkq52pfj5558lSb/99puOHj2qkJCQdHW88847mjFjhqZNm6a1a9cqMTFR8+bNu63XYejQoXriiSe0detWde7cWampqSpSpIhmzZqlv/76S2+88YZeffVVzZw5U5L04osvqk2bNg5X1+vUqZNuvYcPH9Zjjz2mGjVq6I8//tCkSZM0depUjRgxwqHd9OnTlSdPHq1fv16jR4/W8OHDtWTJEknS7NmzNWbMGH388cfavXu35s2bZw//AKyPz4EAWMaCBQvk7e0tSTp37pyCgoK0YMECubhk3//xY8eO1aBBg9SqVStJ0uTJk7V48eKbLjN69GhVr15dEydOtE978MEH7XVOmjRJsbGxioqKkiR98sknWrJkiaZOnaqXXnrJvsxbb72l8PBwSdIrr7yipk2b6uLFi/L09LQH+EKFCikwMDDDOj766CMNGjRITzzxhCRp/Pjx+vHHH2/nZdAzzzyjTp06OUwbNmyY/efixYtr3bp1mjlzptq0aSNvb295enoqOTn5hvVJ0sSJExUSEqLx48fLZrOpbNmyOnLkiAYOHKg33njD/rusVKmShgwZIkkqXbq0xo8fr6VLl+rRRx/VwYMHFRgYqIiICOXKlUtFixZVzZo1b2s/Adx/uHILwDIaNGiguLg4xcXF6bffflNkZKSioqJ04MCBbFl/QkKCjh49qlq1atmnubm5qXr16jddLu3KbUb27t2ry5cvq27duvZpuXLlUs2aNbV9+3aHtpUqVbL/HBQUJEk6fvx4pms/duyYQ8hzdXVVtWrVMrX89TLa5wkTJqhatWoqVKiQvL29NWXKFB08eDBL692+fbvCwsJks9ns0+rWraukpCT9/fff9mnXvhbS1dcj7bV46qmndOHCBZUoUUJdu3bV3LlzHbo1ALA2wi0Ay8iTJ49KlSqlUqVKqUaNGvr000917tw5ffLJJ5Jkv+p3bR/ZO7kZKrM8PT2zZT25cuWy/5wW/lJTU7Nl3WlcXFzS9SHO6DXKkyePw/Ovv/5aL774orp06aKffvpJcXFx6tSpky5dupSt9aW59rWQrr4eaa9FSEiIdu7cqYkTJ8rT01M9e/ZUvXr17snvGoDzEW4BWJbNZpOLi4suXLgg6epH9pJ09OhRe5usjK/q6+uroKAgrV+/3j7typUr2rRp002Xq1Sp0g2H5CpZsqTc3d21du1a+7TLly9rw4YNKl++fKZruxVfX18FBAQ43KiWkpKizZs3O7QrVKiQw+uTmJioffv23XL9a9euVZ06ddSzZ09VrVpVpUqV0t69ex3auLu72/sZ30i5cuW0bt06h4C9du1a5c2bV0WKFLllHWk8PT3VvHlzffjhh1qxYoXWrVunrVu3Znp5APcvwi0Ay0hOTlZ8fLzi4+O1fft29e7dW0lJSWrevLkkqVSpUgoJCdHQoUO1e/du/fDDD3r//feztI2+fftq1KhRmjdvnnbs2KGePXs6jL6QkUGDBmnDhg3q2bOntmzZoh07dmjSpEk6efKk8uTJox49euill17SokWL9Ndff6lr1646f/68unTpcrsvRYZ69+6tkSNHav78+dq5c6f69u2r06dPO3QBaNiwob744gutXr1aW7duVXR0tFxdXW+57tKlS2vjxo1avHixdu3apddff90hSEtXR2LYsmWLdu7cqZMnT2Z4JbVnz546dOiQevfurR07dmj+/PkaMmSI+vfvn+m+07GxsZo6dar+/PNP/e9//9OXX34pT09PhYaGZmp5APc3bigDYBmLFi2y90XNmzevypYtq1mzZql+/fqSrn6U/d///lc9evRQpUqVVKNGDY0YMUJPPfVUprcxYMAAHT16VNHR0XJxcVHnzp31xBNPKCEh4YbLlClTRj/99JNeffVV1axZU56enqpVq5batWsnSRo1apRSU1PVoUMHnT17VtWrV9fixYuVP3/+238xMjBw4EDFx8frueeek6urq7p166bIyEiH8Dpo0CDt27dPzZo1k6+vr958881MXbnt3r27fv/9dz399NOy2Wxq166devbsqYULF9rbdO3aVStWrFD16tWVlJSk5cuXq1ixYg7rKVy4sH788Ue99NJLqly5svz8/NSlSxcNHjw40/uZL18+jRo1Sv3791dKSooqVqyo77///p6M+QvA+WwmMwM0AgAsJzU1VeXKlVObNm305ptvOrscAMgWXLkFgH+JAwcO6KefflJ4eLiSk5M1fvx47du3T88884yzSwOAbEOfWwD4l3BxcVFsbKxq1KihunXrauvWrfr5559Vrlw5Z5cGANmGbgkAAACwDK7cAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAy/h/D4ohnS+PFxQAAAAASUVORK5CYII=", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAArcAAAIjCAYAAAAZajMiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABRJklEQVR4nO3de3zP9f//8ft7ZjNmY9iJmXPIMcdRNofMckiUSJrDB2VOUUkphxTp4JBTSqbSgZxKITmTJKwk5+YQ5hA2cxi25+8Pv72/3jZsbN7z6na9XN6Xy16v1/P1ej1e773er933ej/fz7fNGGMEAAAAWICLswsAAAAAsgrhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFriHDRs2TDab7a7sKywsTGFhYfbpVatWyWaz6Ztvvrkr+7/XdO7cWZ6ens4uI9tER0fLZrNp//79zi7lP2X//v2y2WyKjo52dilAjkW4BXKI1LCQ+siTJ48CAwMVHh6uCRMm6OzZs1mynyNHjmjYsGGKiYnJku1lpeyqLTUQpD5y5cql4sWL67HHHsuRz4MzXbp0SePHj1f16tXl5eWlAgUK6P7771ePHj20c+dOZ5cnSYqJidHTTz+toKAgubu7y8fHR02aNNGMGTOUnJzs7PIAOJmrswsA4GjEiBEqWbKkLl++rLi4OK1atUr9+/fX+++/r2+//VZVqlSxtx0yZIhefvnlTG3/yJEjGj58uEqUKKFq1apleL0ff/wxU/u5HbdbW0Z16NBBjzzyiJKTk7Vjxw5NmTJFixcv1i+//JIt+7sXtW3bVosXL1aHDh3UvXt3Xb58WTt37tSiRYtUr149lS9fXpLUqVMntW/fXu7u7ne1vo8//ljPPvus/Pz81KlTJ5UtW1Znz57V8uXL1a1bNx09elSvvPLKXa3pbgoODtaFCxeUO3duZ5cC5FiEWyCHiYiIUM2aNe3TgwcP1ooVK9SiRQu1atVKO3bskIeHhyTJ1dVVrq7Z+zI+f/688ubNKzc3t2zdz93wwAMP6Omnn7ZP169fX61atdKUKVP04YcfOrGynGHTpk1atGiR3nzzzTQBceLEiTpz5ox9OleuXMqVK9ddre+XX37Rs88+q5CQEP3www/Knz+/fVn//v3122+/6c8//7yrNd0tV65cUUpKitzc3JQnTx5nlwPkaHRLAO4BjRo10muvvaYDBw7o888/t89Pr8/tsmXL9OCDD6pAgQLy9PTUfffdZw8qq1atUq1atSRJXbp0sb9Nn9p/LywsTJUqVdLmzZvVoEED5c2b177u9X1uUyUnJ+uVV16Rv7+/8uXLp1atWunQoUMObUqUKKHOnTunWffabd6qNknauHGjmjVrJm9vb+XNm1ehoaFav359hp/H6zVq1EiSFBsbK0lauHChmjdvrsDAQLm7u6t06dJ644030n2re+PGjXrkkUdUsGBB5cuXT1WqVNH48eNvur+YmBgVKVJEYWFhSkxMlCS9++67qlevngoVKiQPDw/VqFEj3X7MFy5cUN++fVW4cGHlz59frVq10uHDh2Wz2TRs2DCHtocPH1bXrl3l5+cnd3d33X///frkk09u+Xzs27dP0tXQf71cuXKpUKFC9unr+9ymnovpPa793aekpGjcuHG6//77lSdPHvn5+alnz546ffr0LesbPny4bDabZs2a5RBsU9WsWdNhX+fOndPAgQPt3Rfuu+8+vfvuuzLGOKxns9nUu3dvzZkzRxUrVpSHh4dCQkK0bds2SdKHH36oMmXKKE+ePAoLC0vTz/ja1029evXk4eGhkiVLaurUqQ7tLl26pNdff101atSQt7e38uXLp4ceekgrV650aJfajebdd9/VuHHjVLp0abm7u+uvv/5Kt89tXFycunTpomLFisnd3V0BAQF69NFH09Q5efJk3X///XJ3d1dgYKCioqIc/mG59lj++usvNWzYUHnz5lXRokU1ZsyYm/xmgJyFO7fAPaJTp0565ZVX9OOPP6p79+7pttm+fbtatGihKlWqaMSIEXJ3d9fevXvtAbBChQoaMWKEXn/9dfXo0UMPPfSQJKlevXr2bfz777+KiIhQ+/bt9fTTT8vPz++mdb355puy2WwaNGiQjh8/rnHjxqlJkyaKiYmx32HOiFvVtmLFCkVERKhGjRoaOnSoXFxcNGPGDDVq1Ehr165V7dq1M7yvVKlhLjW0RUdHy9PTUwMGDJCnp6dWrFih119/XQkJCXrnnXfs6y1btkwtWrRQQECA+vXrJ39/f+3YsUOLFi1Sv3790t3Xpk2bFB4erpo1a2rhwoX252b8+PFq1aqVOnbsqEuXLumrr77SE088oUWLFql58+b29Tt37qzZs2erU6dOqlu3rlavXu2wPNWxY8dUt25de2ArUqSIFi9erG7duikhIUH9+/e/4fMRHBwsSZo1a5bq16+fqXcF2rRpozJlyjjM27x5s8aNGydfX1/7vJ49eyo6OlpdunRR3759FRsbq4kTJ2rr1q1av379Dd9uP3/+vJYvX64GDRqoePHit6zHGKNWrVpp5cqV6tatm6pVq6alS5fqxRdf1OHDhzV27FiH9mvXrtW3336rqKgoSdKoUaPUokULvfTSS5o8ebJ69eql06dPa8yYMeratatWrFjhsP7p06f1yCOPqF27durQoYNmz56t5557Tm5uburataskKSEhQR9//LG9y8fZs2c1ffp0hYeH69dff03TNWbGjBm6ePGievToYe9bnJKSkuZY27Ztq+3bt6tPnz4qUaKEjh8/rmXLlungwYMqUaKEpKv/fAwfPlxNmjTRc889p127dmnKlCnatGlTmuf99OnTatasmdq0aaN27drpm2++0aBBg1S5cmVFRETc8rkHnM4AyBFmzJhhJJlNmzbdsI23t7epXr26fXro0KHm2pfx2LFjjSRz4sSJG25j06ZNRpKZMWNGmmWhoaFGkpk6dWq6y0JDQ+3TK1euNJJM0aJFTUJCgn3+7NmzjSQzfvx4+7zg4GATGRl5y23eqLaUlBRTtmxZEx4eblJSUuzzz58/b0qWLGkefvjhGx6vMcbExsYaSWb48OHmxIkTJi4uzqxatcpUr17dSDJz5861b+96PXv2NHnz5jUXL140xhhz5coVU7JkSRMcHGxOnz6dps5UkZGRJl++fMYYY9atW2e8vLxM8+bN7du59hiudenSJVOpUiXTqFEj+7zNmzcbSaZ///4ObTt37mwkmaFDh9rndevWzQQEBJiTJ086tG3fvr3x9vZO9xivrT/1HPDz8zMdOnQwkyZNMgcOHEjTNvV8jY2NTXdbJ06cMMWLFzeVK1c2iYmJxhhj1q5daySZWbNmObRdsmRJuvOv9fvvvxtJpl+/fjdsc60FCxYYSWbkyJEO8x9//HFjs9nM3r177fMkGXd3d4dj+fDDD40k4+/v73B+Dx48OM1xpz5n7733nn1eUlKSqVatmvH19TWXLl0yxlw9d5KSkhzqOX36tPHz8zNdu3a1z0s9X728vMzx48cd2qcuS32NnD592kgy77zzzg2fi+PHjxs3NzfTtGlTk5ycbJ8/ceJEI8l88sknaY7l008/dTgWf39/07Zt2xvuA8hJ6JYA3EM8PT1vOmpCgQIFJF19ez29OzwZ4e7uri5dumS4/TPPPOPwFvHjjz+ugIAA/fDDD7e1//TExMRoz549euqpp/Tvv//q5MmTOnnypM6dO6fGjRtrzZo1GTreoUOHqkiRIvL391dYWJj27dunt99+W23atJEkhzvNZ8+e1cmTJ/XQQw/p/Pnz9pECtm7dqtjYWPXv39/+fKdKb1i2lStXKjw8XI0bN9a8efPSfADr2n2ePn1a8fHxeuihh7Rlyxb7/CVLlkiSevXq5bBunz59HKaNMZo7d65atmwpY4z9eTp58qTCw8MVHx/vsN3r2Ww2LV26VCNHjlTBggX15ZdfKioqSsHBwXryySfTvIV9I8nJyerQoYPOnj2r+fPnK1++fJKkOXPmyNvbWw8//LBDbTVq1JCnp2eat+evlZCQIEnpdkdIzw8//KBcuXKpb9++DvMHDhwoY4wWL17sML9x48b2u5ySVKdOHUlX74peu8/U+X///bfD+q6ururZs6d92s3NTT179tTx48e1efNmSVe7dqT2XU9JSdGpU6d05coV1axZM93fS9u2bVWkSJGbHqeHh4fc3Ny0atWqG3bt+Omnn3Tp0iX1799fLi7/92e/e/fu8vLy0vfff+/Q3tPT06Fvupubm2rXrp3mmIGcim4JwD0kMTHR4S3e6z355JP6+OOP9b///U8vv/yyGjdurDZt2ujxxx93+KN2M0WLFs3Uh8fKli3rMG2z2VSmTJksHf90z549kqTIyMgbtomPj1fBggVvup0ePXroiSeekIuLi32Iq2vD5vbt2zVkyBCtWLHCHqau3b70f10ZKlWqdMu6L168qObNm6tGjRqaPXt2um/zL1q0SCNHjlRMTIySkpLs868NygcOHJCLi4tKlizpsO713QBOnDihM2fOaNq0aZo2bVq6NR0/fvymNbu7u+vVV1/Vq6++qqNHj2r16tUaP368Zs+erdy5czv0+b6R1Ofw+++/V+nSpe3z9+zZo/j4+BuewzerzcvLS5IyPCTegQMHFBgYmCYMV6hQwb78Wtd3dfD29pYkBQUFpTv/+iAZGBhoD/GpypUrJ+lqH9q6detKkmbOnKn33ntPO3fu1OXLl+1tr//d3mje9dzd3fX2229r4MCB8vPzU926ddWiRQs988wz8vf3dzjW++67z2FdNzc3lSpVKs1zUaxYsTT/qBUsWFB//PHHLesBcgLCLXCP+OeffxQfH58m0FzLw8NDa9as0cqVK/X9999ryZIl+vrrr9WoUSP9+OOPGfp0e2b6yWbUjb5oIjk5OUM1pd6Vfeedd244ZFdGvjChbNmyatKkSbrLzpw5o9DQUHl5eWnEiBEqXbq08uTJoy1btmjQoEG3dSfc3d1djzzyiBYuXKglS5aoRYsWDsvXrl2rVq1aqUGDBpo8ebICAgKUO3duzZgxQ1988UWm95da49NPP33DfwSuHUruVgICAtS+fXu1bdtW999/v2bPnq3o6Oib9sVdsGCB3n77bb3xxhtq1qxZmvp8fX01a9asdNe92V3KMmXKyNXV1f4hr6x2o/PwRvPNdR9Ky4jPP/9cnTt3VuvWrfXiiy/K19dXuXLl0qhRo+z/NF0ro6/F/v37q2XLllqwYIGWLl2q1157TaNGjdKKFStUvXr1TNeZlccMOAPhFrhHfPbZZ5Kk8PDwm7ZzcXFR48aN1bhxY73//vt666239Oqrr2rlypVq0qRJln+jWepd1VTGGO3du9chRBUsWDDdt7QPHDigUqVK2advVFvq3T8vL68bhtM7tWrVKv3777+aN2+eGjRoYJ+fOpLC9bX8+eeft6wl9ZP9jz76qJ544gktXrzYYcSJuXPnKk+ePFq6dKnDHeQZM2Y4bCc4OFgpKSmKjY11uFO+d+9eh3ZFihRR/vz5lZycnKXPU+7cuVWlShXt2bNHJ0+etN8RvN7u3bsVGRmp1q1bpzvWbOnSpfXTTz+pfv36mf4nKm/evGrUqJFWrFihQ4cOpbmjer3g4GD99NNPOnv2rMPd29TuJakfnssqR44c0blz5xzu3u7evVuS7N0dvvnmG5UqVUrz5s1zONeHDh16x/svXbq0Bg4cqIEDB2rPnj2qVq2a3nvvPX3++ef2Y921a5fD6+3SpUuKjY3NttcU4Cz0uQXuAStWrNAbb7yhkiVLqmPHjjdsd+rUqTTzUu90pr7lnfrHN6P9J2/l008/dXir+JtvvtHRo0cdPlVdunRp/fLLL7p06ZJ93qJFi9IMGXaj2mrUqKHSpUvr3XfftQ+hda0TJ07c8XGk3q269u7UpUuXNHnyZId2DzzwgEqWLKlx48alqTO9O1tubm6aN2+eatWqpZYtW+rXX3912KfNZnMYamz//v1asGCBwzZS/6G5vpYPPvggzTG0bdtWc+fOTXe811s9T3v27NHBgwfTzD9z5ow2bNigggUL3vDuamJioh577DEVLVpUM2fOTPcflXbt2ik5OVlvvPFGmmVXrly55Tk5dOhQGWPUqVOndM+DzZs3a+bMmZJk/7KOiRMnOrQZO3asbDZbln/q/8qVKw5jJV+6dEkffvihihQpoho1akhK/xzbuHGjNmzYcNv7PX/+vC5evOgwr3Tp0sqfP7/9Nd+kSRO5ublpwoQJDvuePn264uPj0x11A7iXcecWyGEWL16snTt36sqVKzp27JhWrFihZcuWKTg4WN9+++1NB3AfMWKE1qxZo+bNmys4OFjHjx/X5MmTVaxYMT344IOSrv7hK1CggKZOnar8+fMrX758qlOnTob696XHx8dHDz74oLp06aJjx45p3LhxKlOmjMNwZf/73//0zTffqFmzZmrXrp327dunzz//3KE/5q1q+/jjjxUREaH7779fXbp0UdGiRXX48GGtXLlSXl5e+u67726r/lT16tVTwYIFFRkZqb59+8pms+mzzz5LE1hdXFw0ZcoUtWzZUtWqVVOXLl0UEBCgnTt3avv27Vq6dGmabXt4eGjRokVq1KiRIiIitHr1alWqVEnNmzfX+++/r2bNmumpp57S8ePHNWnSJJUpU8ahf2ONGjXUtm1bjRs3Tv/++699KLDUO4PXBsnRo0dr5cqVqlOnjrp3766KFSvq1KlT2rJli3766ad0/wFK9fvvv+upp55SRESEHnroIfn4+Ojw4cOaOXOmjhw5onHjxt3wLevhw4frr7/+0pAhQ7Rw4UKHZaVLl1ZISIhCQ0PVs2dPjRo1SjExMWratKly586tPXv2aM6cORo/frwef/zxm/6OJk2apF69eql8+fIO31C2atUqffvttxo5cqQkqWXLlmrYsKFeffVV7d+/X1WrVtWPP/6ohQsXqn///mnOvTsVGBiot99+W/v371e5cuX09ddfKyYmRtOmTbMPs9WiRQvNmzdPjz32mJo3b67Y2FhNnTpVFStWTDesZ8Tu3bvVuHFjtWvXThUrVpSrq6vmz5+vY8eOqX379pKu3tEfPHiwhg8frmbNmqlVq1batWuXJk+erFq1ajl8eAywBOcM0gDgeqlDK6U+3NzcjL+/v3n44YfN+PHjHYYjSnX9UGDLly83jz76qAkMDDRubm4mMDDQdOjQwezevdthvYULF5qKFSsaV1dXh2GFQkNDzf33359ufTcaCuzLL780gwcPNr6+vsbDw8M0b9483aGj3nvvPVO0aFHj7u5u6tevb3777bc027xZbcYYs3XrVtOmTRtTqFAh4+7uboKDg027du3M8uXLb/rcpg6fdLPhkowxZv369aZu3brGw8PDBAYGmpdeesksXbrUSDIrV650aLtu3Trz8MMPm/z585t8+fKZKlWqmA8++MC+/NqhwFKdPHnSVKxY0fj7+5s9e/YYY4yZPn26KVu2rHF3dzfly5c3M2bMSPN7NcaYc+fOmaioKOPj42M8PT1N69atza5du4wkM3r0aIe2x44dM1FRUSYoKMjkzp3b+Pv7m8aNG5tp06bd9PiPHTtmRo8ebUJDQ01AQIBxdXU1BQsWNI0aNTLffPONQ9vrhwKLjIx0OH+vfVw/DNy0adNMjRo1jIeHh8mfP7+pXLmyeemll8yRI0duWl+qzZs3m6eeesoEBgaa3Llzm4IFC5rGjRubmTNnOgx1dfbsWfP888/b25UtW9a88847DkO2GXN1KLCoqCiHeTc6Z1LP+zlz5tjnpb5ufvvtNxMSEmLy5MljgoODzcSJEx3WTUlJMW+99ZYJDg427u7upnr16mbRokUmMjLSBAcH33Lf1y5LfV2cPHnSREVFmfLly5t8+fIZb29vU6dOHTN79uw0606cONGUL1/e5M6d2/j5+ZnnnnsuzXB2N7oGXF8jkJPZjKGHOADci2JiYlS9enV9/vnnN+2uguwVFhamkydPWvarf4F7DX1uAeAecOHChTTzxo0bJxcXF4cPwAHAfx19bgHgHjBmzBht3rxZDRs2lKurqxYvXqzFixerR48etxw5AAD+Swi3AHAPqFevnpYtW6Y33nhDiYmJKl68uIYNG6ZXX33V2aUBQI5Cn1sAAABYBn1uAQAAYBmEWwAAAFgGfW519fvOjxw5ovz582f5V5MCAADgzhljdPbsWQUGBsrF5cb3Zwm3uvqd4HzaGAAAIOc7dOiQihUrdsPlhFtJ+fPnl3T1yfLy8nJyNQAAALheQkKCgoKC7LntRgi3+r/vZffy8iLcAgAA5GC36kLKB8oAAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJbh6uwC/qtsNmdXgOxgjLMrAADgv407twAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMp4bbUaNGqVatWsqfP798fX3VunVr7dq1y6FNWFiYbDabw+PZZ591aHPw4EE1b95cefPmla+vr1588UVduXLlbh4KAAAAcgBXZ+589erVioqKUq1atXTlyhW98soratq0qf766y/ly5fP3q579+4aMWKEfTpv3rz2n5OTk9W8eXP5+/vr559/1tGjR/XMM88od+7ceuutt+7q8QAAAMC5bMYY4+wiUp04cUK+vr5avXq1GjRoIOnqndtq1app3Lhx6a6zePFitWjRQkeOHJGfn58kaerUqRo0aJBOnDghNze3W+43ISFB3t7eio+Pl5eXV5Ydz83YbHdlN7jLcs6rCQAAa8loXstRfW7j4+MlST4+Pg7zZ82apcKFC6tSpUoaPHiwzp8/b1+2YcMGVa5c2R5sJSk8PFwJCQnavn17uvtJSkpSQkKCwwMAAAD3Pqd2S7hWSkqK+vfvr/r166tSpUr2+U899ZSCg4MVGBioP/74Q4MGDdKuXbs0b948SVJcXJxDsJVkn46Li0t3X6NGjdLw4cOz6UgAAADgLDkm3EZFRenPP//UunXrHOb36NHD/nPlypUVEBCgxo0ba9++fSpduvRt7Wvw4MEaMGCAfTohIUFBQUG3VzgAAAByjBzRLaF3795atGiRVq5cqWLFit20bZ06dSRJe/fulST5+/vr2LFjDm1Sp/39/dPdhru7u7y8vBweAAAAuPc5NdwaY9S7d2/Nnz9fK1asUMmSJW+5TkxMjCQpICBAkhQSEqJt27bp+PHj9jbLli2Tl5eXKlasmC11AwAAIGdyareEqKgoffHFF1q4cKHy589v7yPr7e0tDw8P7du3T1988YUeeeQRFSpUSH/88Yeef/55NWjQQFWqVJEkNW3aVBUrVlSnTp00ZswYxcXFaciQIYqKipK7u7szDw8AAAB3mVOHArPdYDysGTNmqHPnzjp06JCefvpp/fnnnzp37pyCgoL02GOPaciQIQ5dCQ4cOKDnnntOq1atUr58+RQZGanRo0fL1TVj2Z2hwJBVGAoMAIDskdG8lqPGuXUWwi2yCq8mAACyxz05zi0AAABwJwi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLINwCAADAMgi3AAAAsAzCLQAAACyDcAsAAADLcHV2AQDunG24zdklIJuYocbZJQDAPYU7twAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDKcGm5HjRqlWrVqKX/+/PL19VXr1q21a9cuhzYXL15UVFSUChUqJE9PT7Vt21bHjh1zaHPw4EE1b95cefPmla+vr1588UVduXLlbh4KAAAAcgCnhtvVq1crKipKv/zyi5YtW6bLly+radOmOnfunL3N888/r++++05z5szR6tWrdeTIEbVp08a+PDk5Wc2bN9elS5f0888/a+bMmYqOjtbrr7/ujEMCAACAE9mMMcbZRaQ6ceKEfH19tXr1ajVo0EDx8fEqUqSIvvjiCz3++OOSpJ07d6pChQrasGGD6tatq8WLF6tFixY6cuSI/Pz8JElTp07VoEGDdOLECbm5ud1yvwkJCfL29lZ8fLy8vLyy9RhT2Wx3ZTe4y5z1arIN54SyKjM0x1yiAcCpMprXclSf2/j4eEmSj4+PJGnz5s26fPmymjRpYm9Tvnx5FS9eXBs2bJAkbdiwQZUrV7YHW0kKDw9XQkKCtm/fnu5+kpKSlJCQ4PAAAADAvS/HhNuUlBT1799f9evXV6VKlSRJcXFxcnNzU4ECBRza+vn5KS4uzt7m2mCbujx1WXpGjRolb29v+yMoKCiLjwYAAADOkGPCbVRUlP7880999dVX2b6vwYMHKz4+3v44dOhQtu8TAAAA2c/V2QVIUu/evbVo0SKtWbNGxYoVs8/39/fXpUuXdObMGYe7t8eOHZO/v7+9za+//uqwvdTRFFLbXM/d3V3u7u5ZfBQAAABwNqfeuTXGqHfv3po/f75WrFihkiVLOiyvUaOGcufOreXLl9vn7dq1SwcPHlRISIgkKSQkRNu2bdPx48ftbZYtWyYvLy9VrFjx7hwIAAAAcgSn3rmNiorSF198oYULFyp//vz2PrLe3t7y8PCQt7e3unXrpgEDBsjHx0deXl7q06ePQkJCVLduXUlS06ZNVbFiRXXq1EljxoxRXFychgwZoqioKO7OAgAA/Mc4NdxOmTJFkhQWFuYwf8aMGercubMkaezYsXJxcVHbtm2VlJSk8PBwTZ482d42V65cWrRokZ577jmFhIQoX758ioyM1IgRI+7WYQAAACCHyFHj3DoL49wiqzDOLbIa49wCwFX35Di3AAAAwJ0g3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwjDsOt0lJSVlRBwAAAHDHMh1uFy9erMjISJUqVUq5c+dW3rx55eXlpdDQUL355ps6cuRIdtQJAAAA3FKGw+38+fNVrlw5de3aVa6urho0aJDmzZunpUuX6uOPP1ZoaKh++uknlSpVSs8++6xOnDiRnXUDAAAAadiMMSYjDUNCQjRkyBBFRETIxeXGmfjw4cP64IMP5Ofnp+effz7LCs1OCQkJ8vb2Vnx8vLy8vO7KPm22u7Ib3GUZezVlPdtwTiirMkOddFIBQA6T0bzmmtENbtiwIUPtihYtqtGjR2d0swAAAECWYbQEAAAAWEamw+2ePXs0d+5cxcbGSpK+//57NWjQQLVq1dKbb76pDPZyAAAAALJchrslSFc/VNauXTu5uLjIZrNp2rRp6tmzp8LCwuTl5aVhw4bZP2wGAAAA3G2ZunP75ptv6qWXXtLFixc1ZcoUPfvssxo1apQWL16sRYsWadKkSYqOjs6mUgEAAICby1S43bVrl7p27SqbzabIyEhdunRJTZo0sS9v2rSpDhw4kOVFAgAAABmRqXB77tw55c+f/+qKLi7y8PBQ3rx57cs9PDz4xjIAAAA4TabCrc1mk+2aAVqvnwYAAACcKVMfKDPGqFy5cvZAm5iYqOrVq9u/1IGREgAAAOBMmQq3M2bMyK46AAAAgDuWqXAbGRmZXXUAAAAAd4xvKAMAAIBlZPjOrY+Pj3bv3q3ChQurYMGCN/0g2alTp7KkOAAAACAzMhxux44dax8GbNy4cdlVDwAAAHDbMhxur+1vS99bAAAA5ERO7XO7Zs0atWzZUoGBgbLZbFqwYIHD8s6dO9vH0k19NGvWzKHNqVOn1LFjR3l5ealAgQLq1q2bEhMT7+JRAAAAIKfIVLjNlStXhh4Zde7cOVWtWlWTJk26YZtmzZrp6NGj9seXX37psLxjx47avn27li1bpkWLFmnNmjXq0aNHZg4LAAAAFpHpL3EIDg5WZGSkqlevfsc7j4iIUERExE3buLu7y9/fP91lO3bs0JIlS7Rp0ybVrFlTkvTBBx/okUce0bvvvqvAwMA7rhEAAAD3jkyF219//VXTp0/X+PHjVbJkSXXt2lUdO3ZUwYIFs6s+rVq1Sr6+vipYsKAaNWqkkSNHqlChQpKkDRs2qECBAvZgK0lNmjSRi4uLNm7cqMceeyzdbSYlJSkpKck+nZCQkG31AwAA4O7JVLeEmjVrasqUKTp69KgGDBig+fPnq1ixYmrfvr2WLVuW5cU1a9ZMn376qZYvX663335bq1evVkREhJKTkyVJcXFx8vX1dVjH1dVVPj4+iouLu+F2R40aJW9vb/sjKCgoy2sHAADA3XdbHyjLkyePnn76aS1fvlx//vmnjh8/rmbNmmX5+Lbt27dXq1atVLlyZbVu3VqLFi3Spk2btGrVqjva7uDBgxUfH29/HDp0KGsKBgAAgFNlqlvCtf755x9FR0crOjpa58+f14svvigvL6+srC2NUqVKqXDhwtq7d68aN24sf39/HT9+3KHNlStXdOrUqRv205Wu9uN1d3fP1loBAABw92Xqzu2lS5f09ddfq2nTpipbtqy2bNmicePG6dChQxo9erRcXW87K2fIP//8o3///VcBAQGSpJCQEJ05c0abN2+2t1mxYoVSUlJUp06dbK0FAAAAOU+m0mhAQIDy58+vyMhITZ482d7f9dy5cw7tMnoHNzExUXv37rVPx8bGKiYmRj4+PvLx8dHw4cPVtm1b+fv7a9++fXrppZdUpkwZhYeHS5IqVKigZs2aqXv37po6daouX76s3r17q3379oyUAAAA8B9kM8aYjDZ2cfm/G702my3NcmOMbDab/QNft7Jq1So1bNgwzfzIyEhNmTJFrVu31tatW3XmzBkFBgaqadOmeuONN+Tn52dve+rUKfXu3VvfffedXFxc1LZtW02YMEGenp4ZPSwlJCTI29tb8fHx2d61IlU6Tx8sIOOvpqxlG84JZVVmqJNOKgDIYTKa1zJ153blypV3XNi1wsLCdLNsvXTp0ltuw8fHR1988UVWlgUAAIB7VKbCbWhoaHbVAQAAANyxDH+g7Pp+tVndHgAAALhTGQ63ZcqU0ejRo3X06NEbtjHGaNmyZYqIiNCECROypEAAAAAgozLcLWHVqlV65ZVXNGzYMFWtWlU1a9ZUYGCg8uTJo9OnT+uvv/7Shg0b5OrqqsGDB6tnz57ZWTcAAACQRobD7X333ae5c+fq4MGDmjNnjtauXauff/5ZFy5cUOHChVW9enV99NFHioiIUK5cubKzZgAAACBdmRoKzKoYCgxZhaHAkNUYCgwArspoXsvUN5QBAAAAORnhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWMZth9u1a9fq6aefVkhIiA4fPixJ+uyzz7Ru3bosKw4AAADIjNsKt3PnzlV4eLg8PDy0detWJSUlSZLi4+P11ltvZWmBAAAAQEbdVrgdOXKkpk6dqo8++ki5c+e2z69fv762bNmSZcUBAAAAmXFb4XbXrl1q0KBBmvne3t46c+bMndYEAAAA3JbbCrf+/v7au3dvmvnr1q1TqVKl7rgoAAAA4HbcVrjt3r27+vXrp40bN8pms+nIkSOaNWuWXnjhBT333HNZXSMAAACQIa63s9LLL7+slJQUNW7cWOfPn1eDBg3k7u6uF154QX369MnqGgEAAIAMsRljzO2ufOnSJe3du1eJiYmqWLGiPD09s7K2uyYhIUHe3t6Kj4+Xl5fXXdmnzXZXdoO77PZfTXfGNpwTyqrMUCedVACQw2Q0r91Wt4RPP/1UO3bskJubmypWrKjatWvL09NTFy9e1KeffnrbRQMAAAB34rbCbefOnVW7dm3NnTvXYX58fLy6dOmSJYUBAAAAmXXb31A2fPhwderUScOGDcvCcgAAAIDbd9vh9umnn9aKFSv04Ycf6vHHH9eFCxeysi4AAAAg024r3Nr+/6eh6tatq40bN2rv3r2qV6+e9u/fn5W1AQAAAJlyW+H22gEWihcvrp9//lklSpTQww8/nGWFAQAAAJl1W+F26NChDsN+5c2bV/Pnz9fzzz+f7tfyAgAAAHfDHY1zaxWMc4uswji3yGqMcwsAV2U0r2X4G8q+/fZbRUREKHfu3Pr2229v2M5ms6lly5aZqxYAAADIAhkOt61bt1ZcXJx8fX3VunXrG7az2WxKTk7OitoAAACATMlwuE1JSUn3ZwAAACCnuO1xbgEAAICcJlPhdsOGDVq0aJHDvE8//VQlS5aUr6+vevTooaSkpCwtEAAAAMioTIXbESNGaPv27fbpbdu2qVu3bmrSpIlefvllfffddxo1alSWFwkAAABkRKbCbUxMjBo3bmyf/uqrr1SnTh199NFHGjBggCZMmKDZs2dneZEAAABARmQq3J4+fVp+fn726dWrVysiIsI+XatWLR06dCjrqgMAAAAyIVPh1s/PT7GxsZKkS5cuacuWLapbt659+dmzZ5U7d+6srRAAAADIoEyF20ceeUQvv/yy1q5dq8GDBytv3rx66KGH7Mv/+OMPlS5dOsuLBAAAADIiw+PcStIbb7yhNm3aKDQ0VJ6enpo5c6bc3Nzsyz/55BM1bdo0y4sEAAAAMiJT4bZw4cJas2aN4uPj5enpqVy5cjksnzNnjjw9PbO0QAAAACCjMhVuU3l7e6c738fH546KAQAAAO4E31AGAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAy3BquF2zZo1atmypwMBA2Ww2LViwwGG5MUavv/66AgIC5OHhoSZNmmjPnj0ObU6dOqWOHTvKy8tLBQoUULdu3ZSYmHgXjwIAAAA5hVPD7blz51S1alVNmjQp3eVjxozRhAkTNHXqVG3cuFH58uVTeHi4Ll68aG/TsWNHbd++XcuWLdOiRYu0Zs0a9ejR424dAgAAAHIQmzHGOLsISbLZbJo/f75at24t6epd28DAQA0cOFAvvPCCJCk+Pl5+fn6Kjo5W+/bttWPHDlWsWFGbNm1SzZo1JUlLlizRI488on/++UeBgYEZ2ndCQoK8vb0VHx8vLy+vbDm+69lsd2U3uMuc9WqyDeeEsiozNEdcogHA6TKa13Jsn9vY2FjFxcWpSZMm9nne3t6qU6eONmzYIEnasGGDChQoYA+2ktSkSRO5uLho48aNN9x2UlKSEhISHB4AAAC49+XYcBsXFydJ8vPzc5jv5+dnXxYXFydfX1+H5a6urvLx8bG3Sc+oUaPk7e1tfwQFBWVx9QAAAHCGHBtus9PgwYMVHx9vfxw6dMjZJQEAACAL5Nhw6+/vL0k6duyYw/xjx47Zl/n7++v48eMOy69cuaJTp07Z26TH3d1dXl5eDg8AAADc+3JsuC1ZsqT8/f21fPly+7yEhARt3LhRISEhkqSQkBCdOXNGmzdvtrdZsWKFUlJSVKdOnbteMwAAAJzL1Zk7T0xM1N69e+3TsbGxiomJkY+Pj4oXL67+/ftr5MiRKlu2rEqWLKnXXntNgYGB9hEVKlSooGbNmql79+6aOnWqLl++rN69e6t9+/YZHikBAAAA1uHUcPvbb7+pYcOG9ukBAwZIkiIjIxUdHa2XXnpJ586dU48ePXTmzBk9+OCDWrJkifLkyWNfZ9asWerdu7caN24sFxcXtW3bVhMmTLjrxwIAAADnyzHj3DoT49wiqzDOLbIa49wCwFX3/Di3AAAAQGYRbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYBuEWAAAAlkG4BQAAgGUQbgEAAGAZhFsAAABYhquzCwAA5CzDbcOdXQKyyVAz1NklANmOO7cAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwDMItAAAALINwCwAAAMsg3AIAAMAyCLcAAACwjBwdbocNGyabzebwKF++vH35xYsXFRUVpUKFCsnT01Nt27bVsWPHnFgxAAAAnClHh1tJuv/++3X06FH7Y926dfZlzz//vL777jvNmTNHq1ev1pEjR9SmTRsnVgsAAABncnV2Abfi6uoqf3//NPPj4+M1ffp0ffHFF2rUqJEkacaMGapQoYJ++eUX1a1b926XCgAAACfL8Xdu9+zZo8DAQJUqVUodO3bUwYMHJUmbN2/W5cuX1aRJE3vb8uXLq3jx4tqwYcNNt5mUlKSEhASHBwAAAO59OTrc1qlTR9HR0VqyZImmTJmi2NhYPfTQQzp79qzi4uLk5uamAgUKOKzj5+enuLi4m2531KhR8vb2tj+CgoKy8SgAAABwt+TobgkRERH2n6tUqaI6deooODhYs2fPloeHx21vd/DgwRowYIB9OiEhgYALAABgATn6zu31ChQooHLlymnv3r3y9/fXpUuXdObMGYc2x44dS7eP7rXc3d3l5eXl8AAAAMC9754Kt4mJidq3b58CAgJUo0YN5c6dW8uXL7cv37Vrlw4ePKiQkBAnVgkAAABnydHdEl544QW1bNlSwcHBOnLkiIYOHapcuXKpQ4cO8vb2Vrdu3TRgwAD5+PjIy8tLffr0UUhICCMlAAAA/Efl6HD7zz//qEOHDvr3339VpEgRPfjgg/rll19UpEgRSdLYsWPl4uKitm3bKikpSeHh4Zo8ebKTqwYAAICz5Ohw+9VXX910eZ48eTRp0iRNmjTpLlUEAACAnOye6nMLAAAA3AzhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWAbhFgAAAJZBuAUAAIBlEG4BAABgGYRbAAAAWIarswsAAAAWZ7M5uwJkB2OcXUG6uHMLAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAsg3ALAAAAyyDcAgAAwDIItwAAALAMwi0AAAAswzLhdtKkSSpRooTy5MmjOnXq6Ndff3V2SQAAALjLLBFuv/76aw0YMEBDhw7Vli1bVLVqVYWHh+v48ePOLg0AAAB3kSXC7fvvv6/u3burS5cuqlixoqZOnaq8efPqk08+cXZpAAAAuItcnV3Anbp06ZI2b96swYMH2+e5uLioSZMm2rBhQ7rrJCUlKSkpyT4dHx8vSUpISMjeYmF5TjuFLjppv8h2zrguXeSEsiz+ziFL3eXzKfX8NcbctN09H25Pnjyp5ORk+fn5Ocz38/PTzp07011n1KhRGj58eJr5QUFB2VIj/ju8vZ1dAazGezQnFbLOaO/Rzi4BVuKkP3pnz56V9032fc+H29sxePBgDRgwwD6dkpKiU6dOqVChQrLZbE6szHoSEhIUFBSkQ4cOycvLy9nl4B7H+YSsxjmFrMT5lL2MMTp79qwCAwNv2u6eD7eFCxdWrly5dOzYMYf5x44dk7+/f7rruLu7y93d3WFegQIFsqtESPLy8uKFjizD+YSsxjmFrMT5lH1udsc21T3/gTI3NzfVqFFDy5cvt89LSUnR8uXLFRIS4sTKAAAAcLfd83duJWnAgAGKjIxUzZo1Vbt2bY0bN07nzp1Tly5dnF0aAAAA7iJLhNsnn3xSJ06c0Ouvv664uDhVq1ZNS5YsSfMhM9x97u7uGjp0aJpuIMDt4HxCVuOcQlbifMoZbOZW4ykAAAAA94h7vs8tAAAAkIpwCwAAAMsg3AIAAMAyCLe4I8YY9ejRQz4+PrLZbIqJibnlOjabTQsWLJAk7d+/P8PrwflKlCihcePGZbh9Tv39du7cWa1bt3Z2GZKkYcOGqVq1as4uI0e4neuJxDXlXsY1JetxTSHc4g4tWbJE0dHRWrRokY4ePapKlSo5uyRko02bNqlHjx5Zus3o6Ogs+RKVzPyRHD9+vKKjo+94n8ha98L1pHLlynr22WfTXfbZZ5/J3d1dJ0+elHQ1rE+bNk116tSRp6enChQooJo1a2rcuHE6f/783Sw7x+Kacm/ZvHmzbDabfvnll3SXN27cWG3atLFPx8XFqU+fPipVqpTc3d0VFBSkli1bOnw3QXYg3OKO7Nu3TwEBAapXr578/f3l6mqJ0eVwA0WKFFHevHmdXcZtS05OVkpKiry9vflWwhzIWdeTEiVKaNWqVRlq261bN3311Ve6cOFCmmUzZsxQq1atVLhwYUlSp06d1L9/fz366KNauXKlYmJi9Nprr2nhwoX68ccfs/IQ7llcU5xv1apVKlGiRIba1qhRQ1WrVtUnn3ySZtn+/fu1cuVKdevWzT5do0YNrVixQu+88462bdumJUuWqGHDhoqKisrKQ0jLALcpMjLSSLI/goODTXBwsBk7dqxDu6pVq5qhQ4fapyWZ+fPnG2OMiY2NNZLM1q1b71rd/yXfffed8fb2NleuXDHGGLN161YjyQwaNMjeplu3bqZjx47GGGPWrl1rHnzwQZMnTx5TrFgx06dPH5OYmGhve/3vd8eOHaZ+/frG3d3dVKhQwSxbtizd3+/cuXNNWFiY8fDwMFWqVDE///yzMcaYlStXOpxDkhzOlWulpKSYoUOHmqCgIOPm5mYCAgJMnz59jDHGhIaGptmOMcbMmDHDeHt7m4ULF5oKFSqYXLlymdjYWBMZGWkeffRR+7ZDQ0NNVFSUiYqKMl5eXqZQoUJmyJAhJiUlxeHYR4wYYdq3b2/y5s1rAgMDzcSJEx1qPH36tOnWrZspXLiwyZ8/v2nYsKGJiYlxaDNq1Cjj6+trPD09TdeuXc2gQYNM1apVb/GbtL70rifGpD3njMn6a0pwcLBZuXJlhtqeOHHCuLm5mc8++8xh/t9//21sNptZvHixMcaYr7/+2kgyCxYsSLONlJQUc+bMmQzXl1Nk9npiDNeUe+GasnLlSvvrLSMmTJhgvLy8zLlz5xzmDx061AQGBtrPj4iICFO0aFGH3/e1x5WduHOL2zZ+/HiNGDFCxYoV09GjR7Vp0yZnl4TrPPTQQzp79qy2bt0qSVq9erUKFy7scJdq9erVCgsL0759+9SsWTO1bdtWf/zxh77++mutW7dOvXv3TnfbycnJat26tfLmzauNGzdq2rRpevXVV9Nt++qrr+qFF15QTEyMypUrpw4dOujKlSuqV6+exo0bJy8vLx09elRHjx7VCy+8kO425s6dq7Fjx+rDDz/Unj17tGDBAlWuXFmSNG/ePBUrVkwjRoywbyfV+fPn9fbbb+vjjz/W9u3b5evrm+72Z86cKVdXV/36668aP3683n//fX388ccObd555x1VrVpVW7du1csvv6x+/fpp2bJl9uVPPPGEjh8/rsWLF2vz5s164IEH1LhxY506dUqSNHv2bA0bNkxvvfWWfvvtNwUEBGjy5Mnp1vNfc69cTwoXLqxHH300zZ2r6OhoFStWTE2bNpUkzZo1S/fdd58effTRNNuw2Wzy9va+K/VmpcxcTyRxTbHoNaVjx45KSkrSN998Y59njNHMmTPVuXNn5cqVS6dOndKSJUsUFRWlfPnypdlGtt/lztboDMsbO3asw3983LnNeR544AHzzjvvGGOMad26tXnzzTeNm5ubOXv2rPnnn3+MJLN7927TrVs306NHD4d1165da1xcXMyFCxeMMY6/38WLFxtXV1dz9OhRe/sb3WX5+OOP7W22b99uJJkdO3YYY/7vTsitvPfee6ZcuXLm0qVL6S5P79ybMWOGkZTmTkd6d1kqVKjgcFdl0KBBpkKFCg7bb9asmcN2nnzySRMREWGMufpceXl5mYsXLzq0KV26tPnwww+NMcaEhISYXr16OSyvU6cOd27/v+uvJ8bkvDu3xhizZMkSY7PZzN9//22MuXoHMDg42AwZMsTepkKFCqZVq1YZ3ua9IqPXE2MM15R75JqS2Tu3xhjTvn17Exoaap9evny5kWT27NljjDFm48aNRpKZN29eprabVbhzC1hcaGioVq1aJWOM1q5dqzZt2qhChQpat26dVq9ercDAQJUtW1a///67oqOj5enpaX+Eh4crJSVFsbGxaba7a9cuBQUFyd/f3z6vdu3a6dZQpUoV+88BAQGSpOPHj9+w5rfeesuhjoMHD+qJJ57QhQsXVKpUKXXv3l3z58/XlStXbnn8bm5uDvu/kbp168pms9mnQ0JCtGfPHiUnJzvMu1ZISIh27NghSfr999+VmJioQoUKOdQeGxurffv2SZJ27NihOnXqpNkG7q5nn302zfkVERHhMO9mHn74YRUrVkwzZsyQJC1fvlwHDx5Uly5d7G2MRb/8M6PXE0lcU3LwNeXa7UVEROjgwYMO8270oclUXbt21Zo1a+x1fPLJJwoNDVWZMmUkOf/859M/yFIuLi5pTurLly87qRpIUlhYmD755BP9/vvvyp07t8qXL6+wsDCtWrVKp0+fVmhoqCQpMTFRPXv2VN++fdNso3jx4ndUQ+7cue0/p17sU1JSbtj+2WefVbt27ezTgYGBcnV11a5du/TTTz9p2bJl6tWrl9555x2tXr3aYfvX8/DwcPgDk10SExMVEBCQ7geT7tUPmuQE2XFNGTFihMNb1WFhYXr77bfThISb1dS5c2fNnDlTw4YN04wZM9SwYUOVKlXK3qZcuXLauXPnHdWZE2X0eiJxTblT2XlNuXYotY0bN2rQoEEO+/Hy8rrp+o0bN1bx4sUVHR2tF198UfPmzdOHH35oX162bFnZbDanvQYIt8hSRYoUceiblJCQkO5/6Lh7UvvJjR071v6HJywsTKNHj9bp06c1cOBASdIDDzygv/76y/6f963cd999OnTokI4dOyY/Pz9Juq1+km5ubg53MiTJx8dHPj4+adp6eHioZcuWatmypaKiolS+fHlt27ZNDzzwQLrbyYyNGzc6TP/yyy8qW7ascuXK5TDv+jYVKlSQdPX5i4uLk6ur6w0/eVyhQgVt3LhRzzzzzA23CUfZcU3x9fV16Cfp6uqqokWLZvjcl6QuXbpo5MiRmjdvnubPn5+mL+VTTz2l9u3ba+HChWn63RpjlJCQcE/3u73V9UTimpKTrynX/k7++ecfubq6Zur8d3FxUZcuXTR9+nQVLVpUbm5uevzxx+3LfXx8FB4erkmTJqlv375p+t2eOXMmW//pp1sCslSjRo302Wefae3atdq2bZsiIyMdXsi4+woWLKgqVapo1qxZ9g96NGjQQFu2bNHu3bvtf6AGDRqkn3/+Wb1791ZMTIz27NmjhQsX3vDDHw8//LBKly6tyMhI/fHHH1q/fr2GDBkiSZm6q1GiRAklJiZq+fLlOnny5A3H/4yOjtb06dP1559/6u+//9bnn38uDw8PBQcH27ezZs0aHT582D7OaGYcPHhQAwYM0K5du/Tll1/qgw8+UL9+/RzarF+/XmPGjNHu3bs1adIkzZkzx96mSZMmCgkJUevWrfXjjz9q//79+vnnn/Xqq6/qt99+kyT169dPn3zyiWbMmKHdu3dr6NCh2r59e6Zr/S/JqdeUkiVLqlGjRurRo4fc3d0dxvaUpHbt2unJJ59Uhw4d7B/2OXDggBYtWqQmTZpo5cqVTqr8zmT0eiJxTbH6NaVLly46fPiwXnnlFXXo0EEeHh4OyydNmqTk5GTVrl1bc+fO1Z49e7Rjxw5NmDAh27tjEW6RpQYPHqzQ0FC1aNFCzZs3V+vWrVW6dGlnl/WfFxoaquTkZPsfIx8fH1WsWFH+/v667777JF3tw7Z69Wrt3r1bDz30kKpXr67XX39dgYGB6W4zV65cWrBggRITE1WrVi3973//s3+yOU+ePBmurV69enr22Wf15JNPqkiRIhozZky67QoUKKCPPvpI9evXV5UqVfTTTz/pu+++U6FChSRdfat5//79Kl26tIoUKZLh/ad65plndOHCBdWuXVtRUVHq169fmsHlBw4cqN9++03Vq1fXyJEj9f777ys8PFzS1T++P/zwgxo0aKAuXbqoXLlyat++vQ4cOGC/C/Xkk0/qtdde00svvaQaNWrowIEDeu655zJd639JTr6mdOvWTadPn9ZTTz2V5py32Wz64osv9P7772vBggUKDQ1VlSpVNGzYMD366KP28+ZelJHricQ1xerXlOLFi6tJkyY6ffq0unbtmmZ5qVKltGXLFjVs2FADBw5UpUqV9PDDD2v58uWaMmVKttZmM87u9QvAMtavX68HH3xQe/fuzTEBJCPCwsJUrVq1m34bUYkSJdS/f3/179//rtUF/NdxTcHtoM8tgNs2f/58eXp6qmzZstq7d6/69eun+vXr31N/hADkHFxTkBUItwBu29mzZzVo0CAdPHhQhQsXVpMmTfTee+85uywA9yiuKcgKdEsAAACAZfCBMgAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYArhEdHe3wnefDhg1TtWrVbrpO586d1bp162ytKyOmTZumoKAgubi4aNy4cRmq3dn2798vm82mmJgYZ5cCwCIItwAsoXPnzrLZbPZHoUKF1KxZM/3xxx+Z2s6TTz6p3bt3Z1OV2SchIUG9e/fWoEGDdPjwYfXo0UMvvPCCli9f7uzS7NL7JyAoKEhHjx5VpUqVnFMUAMsh3AKwjGbNmuno0aM6evSoli9fLldXV7Vo0SJT2/Dw8JCvr282VZh9Dh48qMuXL6t58+YKCAhQ3rx55enpqUKFCmX7vi9fvnzb6+bKlUv+/v5ydeU7hQBkDcItAMtwd3eXv7+//P39Va1aNb388ss6dOiQTpw4IUlatWqVbDabzpw5Y18nJiZGNptN+/fvl5S2W8L1kpOTNWDAABUoUECFChXSSy+9pIx8F8769esVFhamvHnzqmDBggoPD9fp06clSUlJSerbt698fX2VJ08ePfjgg9q0aZN93dS6ly9frpo1aypv3ryqV6+edu3aZa+5cuXKkqRSpUrZj+f6bglXrlxR37597bUPGjRIkZGRDndTS5QooXHjxjnUXq1aNQ0bNsw+bbPZNGXKFLVq1Ur58uXTm2++qeTkZHXr1k0lS5aUh4eH7rvvPo0fP96+zrBhwzRz5kwtXLjQfnd91apV6XZLWL16tWrXri13d3cFBATo5Zdf1pUrV+zLw8LC1LdvX7300kvy8fGRv7+/Q33GGA0bNkzFixeXu7u7AgMD1bdv31v+jgBYA+EWgCUlJibq888/V5kyZbL07uV7772n6OhoffLJJ1q3bp1OnTql+fPn33SdmJgYNW7cWBUrVtSGDRu0bt06tWzZUsnJyZKkl156SXPnztXMmTO1ZcsWlSlTRuHh4Tp16pTDdl599VW99957+u233+Tq6qquXbtKutqV4qeffpIk/frrrzp69KiCgoLS1PH2229r1qxZmjFjhtavX6+EhAQtWLDgtp6HYcOG6bHHHtO2bdvUtWtXpaSkqFixYpozZ47++usvvf7663rllVc0e/ZsSdILL7ygdu3aOdxdr1evXprtHj58WI888ohq1aql33//XVOmTNH06dM1cuRIh3YzZ85Uvnz5tHHjRo0ZM0YjRozQsmXLJElz587V2LFj9eGHH2rPnj1asGCBPfwDsD7eBwJgGYsWLZKnp6ck6dy5cwoICNCiRYvk4pJ1/8ePGzdOgwcPVps2bSRJU6dO1dKlS2+6zpgxY1SzZk1NnjzZPu/++++31zllyhRFR0crIiJCkvTRRx9p2bJlmj59ul588UX7Om+++aZCQ0MlSS+//LKaN2+uixcvysPDwx7gixQpIn9//3Tr+OCDDzR48GA99thjkqSJEyfqhx9+uJ2nQU899ZS6dOniMG/48OH2n0uWLKkNGzZo9uzZateunTw9PeXh4aGkpKQb1idJkydPVlBQkCZOnCibzaby5cvryJEjGjRokF5//XX777JKlSoaOnSoJKls2bKaOHGili9frocfflgHDx6Uv7+/mjRpoty5c6t48eKqXbv2bR0ngHsPd24BWEbDhg0VExOjmJgY/frrrwoPD1dERIQOHDiQJduPj4/X0aNHVadOHfs8V1dX1axZ86brpd65Tc++fft0+fJl1a9f3z4vd+7cql27tnbs2OHQtkqVKvafAwICJEnHjx/PcO3Hjh1zCHm5cuVSjRo1MrT+9dI75kmTJqlGjRoqUqSIPD09NW3aNB08eDBT292xY4dCQkJks9ns8+rXr6/ExET9888/9nnXPhfS1ecj9bl44okndOHCBZUqVUrdu3fX/PnzHbo1ALA2wi0Ay8iXL5/KlCmjMmXKqFatWvr444917tw5ffTRR5Jkv+t3bR/ZO/kwVEZ5eHhkyXZy585t/zk1/KWkpGTJtlO5uLik6UOc3nOUL18+h+mvvvpKL7zwgrp166Yff/xRMTEx6tKliy5dupSl9aW69rmQrj4fqc9FUFCQdu3apcmTJ8vDw0O9evVSgwYN7srvGoDzEW4BWJbNZpOLi4suXLgg6epb9pJ09OhRe5vMjK/q7e2tgIAAbdy40T7vypUr2rx5803Xq1Klyg2H5CpdurTc3Ny0fv16+7zLly9r06ZNqlixYoZruxVvb2/5+fk5fFAtOTlZW7ZscWhXpEgRh+cnISFBsbGxt9z++vXrVa9ePfXq1UvVq1dXmTJltG/fPoc2bm5u9n7GN1KhQgVt2LDBIWCvX79e+fPnV7FixW5ZRyoPDw+1bNlSEyZM0KpVq7RhwwZt27Ytw+sDuHcRbgFYRlJSkuLi4hQXF6cdO3aoT58+SkxMVMuWLSVJZcqUUVBQkIYNG6Y9e/bo+++/13vvvZepffTr10+jR4/WggULtHPnTvXq1cth9IX0DB48WJs2bVKvXr30xx9/aOfOnZoyZYpOnjypfPny6bnnntOLL76oJUuW6K+//lL37t11/vx5devW7XafinT16dNHo0aN0sKFC7Vr1y7169dPp0+fdugC0KhRI3322Wdau3attm3bpsjISOXKleuW2y5btqx+++03LV26VLt379Zrr73mEKSlqyMx/PHHH9q1a5dOnjyZ7p3UXr166dChQ+rTp4927typhQsXaujQoRowYECG+05HR0dr+vTp+vPPP/X333/r888/l4eHh4KDgzO0PoB7Gx8oA2AZS5YssfdFzZ8/v8qXL685c+YoLCxM0tW3sr/88ks999xzqlKlimrVqqWRI0fqiSeeyPA+Bg4cqKNHjyoyMlIuLi7q2rWrHnvsMcXHx99wnXLlyunHH3/UK6+8otq1a8vDw0N16tRRhw4dJEmjR49WSkqKOnXqpLNnz6pmzZpaunSpChYsePtPRjoGDRqkuLg4PfPMM8qVK5d69Oih8PBwh/A6ePBgxcbGqkWLFvL29tYbb7yRoTu3PXv21NatW/Xkk0/KZrOpQ4cO6tWrlxYvXmxv0717d61atUo1a9ZUYmKiVq5cqRIlSjhsp2jRovrhhx/04osvqmrVqvLx8VG3bt00ZMiQDB9ngQIFNHr0aA0YMEDJycmqXLmyvvvuu7sy5i8A57OZjAzQCACwnJSUFFWoUEHt2rXTG2+84exyACBLcOcWAP4jDhw4oB9//FGhoaFKSkrSxIkTFRsbq6eeesrZpQFAlqHPLQD8R7i4uCg6Olq1atVS/fr1tW3bNv3000+qUKGCs0sDgCxDtwQAAABYBnduAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZRBuAQAAYBmEWwAAAFgG4RYAAACWQbgFAACAZfw/BMsbm9B7E3gAAAAASUVORK5CYII=", "text/plain": [ "
" ] diff --git a/samples/python/sample_weight_stripping/requirements.txt b/samples/python/sample_weight_stripping/requirements.txt index a9ca7fb0..eaf6990e 100644 --- a/samples/python/sample_weight_stripping/requirements.txt +++ b/samples/python/sample_weight_stripping/requirements.txt @@ -3,5 +3,5 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/simple_progress_monitor/requirements.txt b/samples/python/simple_progress_monitor/requirements.txt index a9ca7fb0..eaf6990e 100644 --- a/samples/python/simple_progress_monitor/requirements.txt +++ b/samples/python/simple_progress_monitor/requirements.txt @@ -3,5 +3,5 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/tensorflow_object_detection_api/README.md b/samples/python/tensorflow_object_detection_api/README.md index 47ce0385..b25f1ebf 100644 --- a/samples/python/tensorflow_object_detection_api/README.md +++ b/samples/python/tensorflow_object_detection_api/README.md @@ -7,7 +7,7 @@ Support for [TensorFlow Object Detection (TFOD) API](https://github.com/tensorfl ### TensorFlow and TensorRT Environment -In order for scripts to work we suggest an environment with TensorRT >= 8.0.1 and TensorFlow 2.12.0. +In order for scripts to work we suggest an environment with TensorRT >= 8.0.1 and TensorFlow 2.13.1. Install TensorRT as per the [TensorRT Install Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You will need to make sure the Python bindings for TensorRT are also installed correctly, these are available by installing the `python3-libnvinfer` and `python3-libnvinfer-dev` packages on your TensorRT download. @@ -317,6 +317,9 @@ If you run this on COCO val2017 images, you may also add the parameter `--annota # Changelog +May 2024: + - Update TensorFlow version support to 2.13.1. + August 2023: - Removed support for Python versions < 3.8. - Update ONNX version support to 1.14.0 diff --git a/samples/python/tensorflow_object_detection_api/requirements.txt b/samples/python/tensorflow_object_detection_api/requirements.txt index 13971563..e7ee8eaf 100644 --- a/samples/python/tensorflow_object_detection_api/requirements.txt +++ b/samples/python/tensorflow_object_detection_api/requirements.txt @@ -1,4 +1,4 @@ -onnx==1.14.0 +onnx==1.16.0 onnxruntime==1.15.1 Pillow>=10.0.0 tf2onnx==1.15.0 @@ -9,5 +9,5 @@ pywin32; platform_system == "Windows" Cython<3.0 pyyaml==5.3.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 numpy==1.24.4 diff --git a/samples/python/yolov3_onnx/requirements.txt b/samples/python/yolov3_onnx/requirements.txt index 877da733..31bdfbf0 100644 --- a/samples/python/yolov3_onnx/requirements.txt +++ b/samples/python/yolov3_onnx/requirements.txt @@ -1,9 +1,9 @@ cuda-python==12.2.0 pywin32; platform_system == "Windows" numpy==1.24.4 -onnx==1.14.0 +onnx==1.16.0 Pillow>=10.0.0 protobuf==3.20.3 pyyaml==6.0.1 requests==2.31.0 -tqdm==4.66.1 +tqdm==4.66.4 diff --git a/samples/sampleCharRNN/sampleCharRNN.cpp b/samples/sampleCharRNN/sampleCharRNN.cpp index 8ddbb2ac..9b6721fb 100644 --- a/samples/sampleCharRNN/sampleCharRNN.cpp +++ b/samples/sampleCharRNN/sampleCharRNN.cpp @@ -726,6 +726,13 @@ void SampleCharRNNBase::constructNetwork(SampleUniquePtr& bu network->markOutput(*pred->getOutput(1)); pred->getOutput(1)->setType(nvinfer1::DataType::kINT32); + SampleUniquePtr timingCache{}; + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } + sample::gLogInfo << "Done constructing network..." << std::endl; SampleUniquePtr plan{builder->buildSerializedNetwork(*network, *config)}; @@ -734,6 +741,12 @@ void SampleCharRNNBase::constructNetwork(SampleUniquePtr& bu return; } + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + mRuntime = std::shared_ptr(createInferRuntime(sample::gLogger.getTRTLogger())); if (!mRuntime) { @@ -924,6 +937,7 @@ SampleCharRNNParams initializeSampleParams(const samplesCommon::Args& args) params.weightFileName = locateFile("char-rnn.wts", params.dataDirs); params.saveEngine = args.saveEngine; params.loadEngine = args.loadEngine; + params.timingCacheFile = args.timingCacheFile; // Input strings and their respective expected output strings const std::vector inS{ @@ -963,15 +977,16 @@ SampleCharRNNParams initializeSampleParams(const samplesCommon::Args& args) void printHelpInfo() { std::cout << "Usage: ./sample_char_rnn [-h or --help] [-d or --datadir=]\n"; - std::cout << "--help Display help information\n"; - std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " + std::cout << "--help Display help information\n"; + std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " "multiple times to add multiple directories. If no data directories are given, the default is to use " "data/samples/char-rnn/ and data/char-rnn/" << std::endl; - std::cout << "--loadEngine Specify path from which to load the engine. When this option is provided, engine " - "building is skipped." + std::cout << "--loadEngine Specify path from which to load the engine. When this option is provided, engine " << std::endl; - std::cout << "--saveEngine Specify path at which to save the engine." << std::endl; + std::cout << "--saveEngine Specify path at which to save the engine." << std::endl; + std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " + << "created." << std::endl; } //! diff --git a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp index 0f880509..df7bd1c5 100644 --- a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp +++ b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp @@ -214,16 +214,31 @@ bool SampleDynamicReshape::buildPreprocessorEngine(const SampleUniquePtrsetInt8Calibrator(calibrator.get()); } - SampleUniquePtr preprocessorPlan = makeUnique( - builder->buildSerializedNetwork(*preprocessorNetwork, *preprocessorConfig)); + SampleUniquePtr timingCache{}; + + // Load timing cache + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *preprocessorConfig, mParams.timingCacheFile, sample::gLogError); + } + + SampleUniquePtr preprocessorPlan + = makeUnique(builder->buildSerializedNetwork(*preprocessorNetwork, *preprocessorConfig)); if (!preprocessorPlan) { sample::gLogError << "Preprocessor serialized engine build failed." << std::endl; return false; } - mPreprocessorEngine = makeUnique( - runtime->deserializeCudaEngine(preprocessorPlan->data(), preprocessorPlan->size())); + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + + mPreprocessorEngine + = makeUnique(runtime->deserializeCudaEngine(preprocessorPlan->data(), preprocessorPlan->size())); if (!mPreprocessorEngine) { sample::gLogError << "Preprocessor engine deserialization failed." << std::endl; @@ -322,15 +337,31 @@ bool SampleDynamicReshape::buildPredictionEngine(const SampleUniquePtrsetInt8Calibrator(calibrator.get()); } // Build the prediciton engine. - SampleUniquePtr predictionPlan = makeUnique(builder->buildSerializedNetwork(*network, *config)); + SampleUniquePtr timingCache{}; + + // Load timing cache + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } + + // Build the prediction engine. + SampleUniquePtr predictionPlan + = makeUnique(builder->buildSerializedNetwork(*network, *config)); if (!predictionPlan) { sample::gLogError << "Prediction serialized engine build failed." << std::endl; return false; } - mPredictionEngine = makeUnique( - runtime->deserializeCudaEngine(predictionPlan->data(), predictionPlan->size())); + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + + mPredictionEngine = makeUnique(runtime->deserializeCudaEngine(predictionPlan->data(), predictionPlan->size())); if (!mPredictionEngine) { sample::gLogError << "Prediction engine deserialization failed." << std::endl; @@ -504,6 +535,7 @@ samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args params.int8 = args.runInInt8; params.fp16 = args.runInFp16; params.bf16 = args.runInBf16; + params.timingCacheFile = args.timingCacheFile; return params; } @@ -512,16 +544,19 @@ samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args //! void printHelpInfo() { - std::cout << "Usage: ./sample_dynamic_reshape [-h or --help] [-d or --datadir=]" + std::cout << "Usage: ./sample_dynamic_reshape [-h or --help] [-d or --datadir=] " + "[--timingCacheFile=]" << std::endl; - std::cout << "--help, -h Display help information" << std::endl; - std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " + std::cout << "--help, -h Display help information" << std::endl; + std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " "multiple times to add multiple directories. If no data directories are given, the default is to use " "(data/samples/mnist/, data/mnist/)" << std::endl; - std::cout << "--int8 Run in Int8 mode." << std::endl; - std::cout << "--fp16 Run in FP16 mode." << std::endl; - std::cout << "--bf16 Run in BF16 mode." << std::endl; + std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " + << "created." << std::endl; + std::cout << "--int8 Run in Int8 mode." << std::endl; + std::cout << "--fp16 Run in FP16 mode." << std::endl; + std::cout << "--bf16 Run in BF16 mode." << std::endl; } int main(int argc, char** argv) diff --git a/samples/sampleINT8API/README.md b/samples/sampleINT8API/README.md index 644cab8b..173ea20d 100644 --- a/samples/sampleINT8API/README.md +++ b/samples/sampleINT8API/README.md @@ -75,8 +75,8 @@ Specifically, this sample performs the following steps: 3. Set the dynamic range for per layer tensors: ``` - string tensor_name = network->getLayer(i)->getOutput(j)->getName(); - network->getLayer(i)->getOutput(j)->setDynamicRange(-tensorMap.at(tensor_name), tensorMap.at(tensor_name)); + string tensor_name = network->getLayer(i)->getOutput(j)->getName(); + network->getLayer(i)->getOutput(j)->setDynamicRange(-tensorMap.at(name), tensorMap.at(name)); ``` 4. Optional: This sample also showcases using layer precision APIs. Using these APIs, you can selectively choose to run the layer with user configurable precision and type constraints. It may not result in optimal inference performance, but can be helpful while debugging mixed precision inference. diff --git a/samples/sampleINT8API/sampleINT8API.cpp b/samples/sampleINT8API/sampleINT8API.cpp index 7cf6e819..97742a7f 100644 --- a/samples/sampleINT8API/sampleINT8API.cpp +++ b/samples/sampleINT8API/sampleINT8API.cpp @@ -72,6 +72,7 @@ struct SampleINT8APIParams std::string imageFileName; std::string referenceFileName; std::string networkTensorsFileName; + std::string timingCacheFile; }; //! @@ -84,7 +85,7 @@ class SampleINT8API { private: template - using SampleUniquePtr = std::unique_ptr; + using SampleUniquePtr = std::unique_ptr; public: SampleINT8API(const SampleINT8APIParams& params) @@ -386,10 +387,11 @@ bool SampleINT8API::setDynamicRange(SampleUniquePtr(wts.values)[wb]; break; case DataType::kINT32: val = static_cast(wts.values)[wb]; break; case DataType::kUINT8: val = static_cast(wts.values)[wb]; break; - case DataType::kFP8: ASSERT(!"FP8 is not supported"); break; + case DataType::kFP8: case DataType::kBF16: case DataType::kINT4: - case DataType::kINT64: ASSERT(false && "Unsupported data type"); + case DataType::kINT64: + ASSERT(false && "Unsupported data type"); } max = std::max(max, std::abs(val)); } @@ -578,6 +580,13 @@ sample::Logger::TestResult SampleINT8API::build() } config->setProfileStream(*profileStream); + SampleUniquePtr timingCache; + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } + SampleUniquePtr plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { @@ -585,6 +594,12 @@ sample::Logger::TestResult SampleINT8API::build() return sample::Logger::TestResult::kFAILED; } + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + if (!mRuntime) { mRuntime = SampleUniquePtr(createInferRuntime(sample::gLogger.getTRTLogger())); @@ -745,6 +760,10 @@ bool parseSampleINT8APIArgs(SampleINT8APIArgs& args, int argc, char* argv[]) } args.dataDirs.push_back(dirPath); } + else if (!strncmp(argv[i], "--timingCacheFile=", 18)) + { + args.timingCacheFile = (argv[i] + 18); + } else if (!strncmp(argv[i], "--verbose", 9) || !strncmp(argv[i], "-v", 2)) { args.verbose = true; @@ -807,6 +826,7 @@ SampleINT8APIParams initializeSampleParams(SampleINT8APIArgs args) params.dlaCore = args.useDLACore; params.writeNetworkTensors = args.writeNetworkTensors; params.networkTensorsFileName = args.networkTensorsFileName; + params.timingCacheFile = args.timingCacheFile; validateInputParams(params); return params; } @@ -818,7 +838,8 @@ void printHelpInfo() { std::cout << "Usage: ./sample_int8_api [-h or --help] [--model=model_file] " "[--ranges=per_tensor_dynamic_range_file] [--image=image_file] [--reference=reference_file] " - "[--data=/path/to/data/dir] [--useDLACore=] [-v or --verbose]\n"; + "[--data=/path/to/data/dir] [--useDLACore=] [-v or --verbose] " + "[--timingCacheFile=timing_cache_file]\n"; std::cout << "-h or --help. Display This help information" << std::endl; std::cout << "--model=model_file.onnx or /absolute/path/to/model_file.onnx. Generate model file using README.md in " "case it does not exists. Default to resnet50.onnx" @@ -846,6 +867,9 @@ void printHelpInfo() std::cout << "--useDLACore=N. Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, " "where n is the number of DLA engines on the platform." << std::endl; + std::cout << "--timingCacheFile=functional.cache or /absolute/path/to/functional.cache. Specify path for timing " + "cache file. If it does not already exist, it will be created. Defaults to not using a timing cache." + << std::endl; std::cout << "--verbose. Outputs per-tensor dynamic range and layer precision info for the network" << std::endl; } diff --git a/samples/sampleIOFormats/sampleIOFormats.cpp b/samples/sampleIOFormats/sampleIOFormats.cpp index 9e167134..b42d2cd2 100644 --- a/samples/sampleIOFormats/sampleIOFormats.cpp +++ b/samples/sampleIOFormats/sampleIOFormats.cpp @@ -413,12 +413,27 @@ bool SampleIOFormats::build(int32_t dataWidth) } config->setProfileStream(*profileStream); + SampleUniquePtr timingCache{}; + + // Load timing cache + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } + SampleUniquePtr plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { return false; } + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + if (!mRuntime) { mRuntime = SampleUniquePtr(createInferRuntime(sample::gLogger.getTRTLogger())); @@ -541,6 +556,7 @@ samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const } params.onnxFileName = "mnist.onnx"; params.dlaCore = args.useDLACore; + params.timingCacheFile = args.timingCacheFile; return params; } @@ -550,16 +566,18 @@ samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const void printHelpInfo() { std::cout - << "Usage: ./sample_onnx_mnist [-h or --help] [-d or --datadir=] [--useDLACore=]" - << std::endl; - std::cout << "--help Display help information" << std::endl; - std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " + << "Usage: ./sample_onnx_mnist [-h or --help] [-d or --datadir=] [--useDLACore=] " + << "[-t or --timingCacheFile=]" << std::endl; + std::cout << "--help Display help information" << std::endl; + std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " "multiple times to add multiple directories. If no data directories are given, the default is to use " "(data/samples/mnist/, data/mnist/)" << std::endl; - std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, " + std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, " "where n is the number of DLA engines on the platform." << std::endl; + std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " + << "created." << std::endl; } //! //! \brief Used to run the engine build and inference/reference functions diff --git a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp index 11e04841..3dc5c9e5 100644 --- a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp +++ b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp @@ -181,12 +181,27 @@ bool SampleNamedDimensions::build() addOptimizationProfile(config, builder); + SampleUniquePtr timingCache{}; + + // Load timing cache + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } + SampleUniquePtr plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { return false; } + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + if (!mRuntime) { mRuntime = SampleUniquePtr(createInferRuntime(sample::gLogger.getTRTLogger())); @@ -387,6 +402,7 @@ samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const params.inputTensorNames.push_back("input0"); params.inputTensorNames.push_back("input1"); params.outputTensorNames.push_back("output"); + params.timingCacheFile = params.timingCacheFile; return params; } @@ -396,14 +412,15 @@ samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const //! void printHelpInfo() { - std::cout - << "Usage: ./sample_named_dimensions [-h or --help] [-d or --datadir=]" - << std::endl; - std::cout << "--help Display help information" << std::endl; - std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " + std::cout << "Usage: ./sample_named_dimensions [-h or --help] [-d or --datadir=] " + << "[--timingCacheFile=]" << std::endl; + std::cout << "--help Display help information" << std::endl; + std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " "multiple times to add multiple directories. If no data directories are given, the default is to use " "(trt/samples/sampleNamedDimensions)" << std::endl; + std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " + << "created." << std::endl; } int32_t main(int32_t argc, char** argv) diff --git a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp index 9dfd67c8..1945f6e6 100644 --- a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp +++ b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp @@ -85,7 +85,7 @@ class SampleOnnxMNIST //! bool constructNetwork(SampleUniquePtr& builder, SampleUniquePtr& network, SampleUniquePtr& config, - SampleUniquePtr& parser); + SampleUniquePtr& parser, SampleUniquePtr& timingCache); //! //! \brief Reads the input and stores the result in a managed buffer @@ -133,7 +133,9 @@ bool SampleOnnxMNIST::build() return false; } - auto constructed = constructNetwork(builder, network, config, parser); + auto timingCache = SampleUniquePtr(); + + auto constructed = constructNetwork(builder, network, config, parser, timingCache); if (!constructed) { return false; @@ -153,6 +155,12 @@ bool SampleOnnxMNIST::build() return false; } + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + mRuntime = std::shared_ptr(createInferRuntime(sample::gLogger.getTRTLogger())); if (!mRuntime) { @@ -187,7 +195,7 @@ bool SampleOnnxMNIST::build() //! bool SampleOnnxMNIST::constructNetwork(SampleUniquePtr& builder, SampleUniquePtr& network, SampleUniquePtr& config, - SampleUniquePtr& parser) + SampleUniquePtr& parser, SampleUniquePtr& timingCache) { auto parsed = parser->parseFromFile(locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast(sample::gLogger.getReportableSeverity())); @@ -209,6 +217,11 @@ bool SampleOnnxMNIST::constructNetwork(SampleUniquePtr& buil config->setFlag(BuilderFlag::kINT8); samplesCommon::setAllDynamicRanges(network.get(), 127.0F, 127.0F); } + if (mParams.timingCacheFile.size()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore); @@ -359,6 +372,7 @@ samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args params.int8 = args.runInInt8; params.fp16 = args.runInFp16; params.bf16 = args.runInBf16; + params.timingCacheFile = args.timingCacheFile; return params; } @@ -370,18 +384,20 @@ void printHelpInfo() { std::cout << "Usage: ./sample_onnx_mnist [-h or --help] [-d or --datadir=] [--useDLACore=]" - << std::endl; - std::cout << "--help Display help information" << std::endl; - std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " + << "[-t or --timingCacheFile=(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) { @@ -154,12 +155,27 @@ bool SampleOnnxMnistCoordConvAC::build() } config->setProfileStream(*profileStream); + SampleUniquePtr timingCache{}; + + // Load timing cache + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } + SampleUniquePtr plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { return false; } + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + if (!mRuntime) { mRuntime = SampleUniquePtr(createInferRuntime(sample::gLogger.getTRTLogger())); @@ -365,6 +381,7 @@ samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args params.dlaCore = args.useDLACore; params.int8 = args.runInInt8; params.fp16 = args.runInFp16; + params.timingCacheFile = args.timingCacheFile; return params; } @@ -375,18 +392,20 @@ samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args void printHelpInfo() { std::cout << "Usage: ./sample_onnx_mnist_coord_conv_ac [-h or --help] [-d or --datadir=] " - "[--useDLACore=]" + "[--useDLACore=] [--timingCacheFile=]" << std::endl; - std::cout << "--help Display help information" << std::endl; - std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " + std::cout << "--help Display help information" << std::endl; + std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " "multiple times to add multiple directories. If no data directories are given, the default is to use " "(data/samples/mnist/, data/mnist/)" << std::endl; - std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, " + std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, " "where n is the number of DLA engines on the platform." << std::endl; - std::cout << "--int8 Run in Int8 mode." << std::endl; - std::cout << "--fp16 Run in FP16 mode." << std::endl; + std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " + << "created." << std::endl; + std::cout << "--int8 Run in Int8 mode." << std::endl; + std::cout << "--fp16 Run in FP16 mode." << std::endl; } int main(int argc, char** argv) diff --git a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp index 393dc617..84cfa8db 100644 --- a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp +++ b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp @@ -303,12 +303,27 @@ bool SampleProgressMonitor::build(IProgressMonitor* monitor) } config->setProfileStream(*profileStream); + SampleUniquePtr timingCache{}; + + // Load timing cache + if (!mParams.timingCacheFile.empty()) + { + timingCache = samplesCommon::buildTimingCacheFromFile( + sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile, sample::gLogError); + } + SampleUniquePtr plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { return false; } + if (timingCache != nullptr && !mParams.timingCacheFile.empty()) + { + samplesCommon::updateTimingCacheFile( + sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder); + } + mEngine = std::shared_ptr( mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter()); if (!mEngine) @@ -510,6 +525,7 @@ samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const params.onnxFileName = "mnist.onnx"; params.inputTensorNames.push_back("Input3"); params.outputTensorNames.push_back("Plus214_Output_0"); + params.timingCacheFile = args.timingCacheFile; return params; } @@ -520,7 +536,7 @@ samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const void printHelpInfo() { std::cout << "Usage: ./sample_progress_monitor [-h or --help] [-d or --datadir=] " - "[--useDLACore=]\n"; + "[--useDLACore=] [--timingCacheFile=]\n"; std::cout << "--help Display help information\n"; std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used " "multiple times to add multiple directories. If no data directories are given, the default is to use " @@ -529,6 +545,8 @@ void printHelpInfo() std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, " "where n is the number of DLA engines on the platform." << std::endl; + std::cout << "--timingCacheFile Specify path to a timing cache file. If it does not already exist, it will be " + << "created." << std::endl; std::cout << "--int8 Run in Int8 mode.\n"; std::cout << "--fp16 Run in FP16 mode.\n"; } diff --git a/samples/trtexec/trtexec.cpp b/samples/trtexec/trtexec.cpp index f3f72a1f..3f128173 100644 --- a/samples/trtexec/trtexec.cpp +++ b/samples/trtexec/trtexec.cpp @@ -31,7 +31,9 @@ #include #include "NvInfer.h" +#if !TRT_WINML #include "NvInferPlugin.h" +#endif #include "buffers.h" #include "common.h" @@ -278,7 +280,7 @@ int main(int argc, char** argv) // Record specified runtime gUseRuntime = options.build.useRuntime; - +#if !TRT_WINML #if !TRT_STATIC LibraryPtr nvinferPluginLib{}; #endif @@ -305,7 +307,7 @@ int main(int argc, char** argv) { throw std::runtime_error("TRT-18412: Plugins require --useRuntime=full."); } - +#endif // !TRT_WINML if (options.build.safe && !sample::hasSafeRuntime()) { sample::gLogError << "Safety is not supported because safety runtime library is unavailable." << std::endl; @@ -336,9 +338,10 @@ int main(int argc, char** argv) return sample::gLogger.reportPass(sampleTest); } +#if !TRT_WINML // dynamicPlugins may have been updated by getEngineBuildEnv above bEnv->engine.setDynamicPlugins(options.system.dynamicPlugins); - +#endif if (!options.build.safe && !options.build.buildDLAStandalone && options.build.refittable) { auto* engine = bEnv->engine.get(); @@ -401,9 +404,12 @@ int main(int argc, char** argv) bool const profilerEnabled = options.reporting.profile || !options.reporting.exportProfile.empty(); - if (iEnv->safe && profilerEnabled) + bool const layerInfoEnabled = options.reporting.layerInfo || !options.reporting.exportLayerInfo.empty(); + + if (iEnv->safe && (profilerEnabled || layerInfoEnabled)) { - sample::gLogError << "Safe runtime does not support --dumpProfile or --exportProfile=, please use " + sample::gLogError << "Safe runtime does not support --dumpProfile or --exportProfile= or " + "--dumpLayerInfo or --exportLayerInfo=, please use " "--verbose to print profiling info." << std::endl; return sample::gLogger.reportFail(sampleTest); diff --git a/samples/utils/timingCache.cpp b/samples/utils/timingCache.cpp index aec9674e..18e85ba4 100644 --- a/samples/utils/timingCache.cpp +++ b/samples/utils/timingCache.cpp @@ -18,6 +18,7 @@ #include "timingCache.h" #include "NvInfer.h" #include "fileLock.h" +#include "sampleUtils.h" #include #include #include @@ -61,6 +62,19 @@ std::vector loadTimingCacheFile(ILogger& logger, std::string const& inFile return {}; } +std::unique_ptr buildTimingCacheFromFile( + ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile, std::ostream& err) +{ + std::unique_ptr timingCache{}; + auto timingCacheContents = loadTimingCacheFile(logger, timingCacheFile); + timingCache.reset(config.createTimingCache(timingCacheContents.data(), timingCacheContents.size())); + SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", nullptr, err); + config.clearFlag(BuilderFlag::kDISABLE_TIMING_CACHE); + SMP_RETVAL_IF_FALSE( + config.setTimingCache(*timingCache, true), "IBuilderConfig setTimingCache failed", nullptr, err); + return timingCache; +} + void saveTimingCacheFile(ILogger& logger, std::string const& outFileName, IHostMemory const* blob) { try diff --git a/samples/utils/timingCache.h b/samples/utils/timingCache.h index c8ffbd97..c4c76e37 100644 --- a/samples/utils/timingCache.h +++ b/samples/utils/timingCache.h @@ -27,6 +27,8 @@ namespace nvinfer1 namespace utils { std::vector loadTimingCacheFile(nvinfer1::ILogger& logger, std::string const& inFileName); +std::unique_ptr buildTimingCacheFromFile( + ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile, std::ostream& err); void saveTimingCacheFile(nvinfer1::ILogger& logger, std::string const& outFileName, nvinfer1::IHostMemory const* blob); void updateTimingCacheFile(nvinfer1::ILogger& logger, std::string const& fileName, nvinfer1::ITimingCache const* timingCache, nvinfer1::IBuilder& builder); diff --git a/tools/Polygraphy/CHANGELOG.md b/tools/Polygraphy/CHANGELOG.md index c3fb0151..50cd8e72 100644 --- a/tools/Polygraphy/CHANGELOG.md +++ b/tools/Polygraphy/CHANGELOG.md @@ -3,6 +3,20 @@ Dates are in YYYY-MM-DD format. +## v0.49.12 (2024-05-28) +### Added +- Added `runtime_platform` to `CreateConfig` for TensorRT and corresponding `--runtime-platform` command-line option. + + +## v0.49.11 (2024-05-09) +### Added +- Added TensorRT 10.1 weight streaming V2 APIs. +- Added TensorRT 10.1 runtime device memory V2 APIs. + +### Changed +- Changed the meaning of the `TrtRunner`'s weight streaming budget argument. + + ## v0.49.10 (2024-04-19) ### Added - Added an `EngineFromPath` loader to deserialize an engine directly from disk. This will save CPU memory when weight streaming is enabled. diff --git a/tools/Polygraphy/docs/_templates/footer.html b/tools/Polygraphy/docs/_templates/footer.html new file mode 100644 index 00000000..164c30ce --- /dev/null +++ b/tools/Polygraphy/docs/_templates/footer.html @@ -0,0 +1,29 @@ +{% extends "!footer.html" %} +{%- block contentinfo %} +{{ super }} + + + +{% endblock %} diff --git a/tools/Polygraphy/polygraphy/__init__.py b/tools/Polygraphy/polygraphy/__init__.py index 2feb03ed..de070e0d 100644 --- a/tools/Polygraphy/polygraphy/__init__.py +++ b/tools/Polygraphy/polygraphy/__init__.py @@ -1,3 +1,3 @@ import polygraphy.config -__version__ = "0.49.10" +__version__ = "0.49.12" diff --git a/tools/Polygraphy/polygraphy/backend/trt/config.py b/tools/Polygraphy/polygraphy/backend/trt/config.py index 317deec0..34b7810e 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/config.py +++ b/tools/Polygraphy/polygraphy/backend/trt/config.py @@ -66,6 +66,7 @@ def __init__( disable_compilation_cache=None, progress_monitor=None, weight_streaming=None, + runtime_platform=None, ): """ Creates a TensorRT IBuilderConfig that can be used by EngineFromNetwork. @@ -194,6 +195,10 @@ def __init__( A progress monitor. Allow users to view engine building progress through CLI. weight_streaming (bool): TWhether to enable weight streaming for the TensorRT Engine. + runtime_platform (trt.RuntimePlatform): + Describes the intended runtime platform (operating system and CPU architecture) for the execution of the TensorRT engine. + TensorRT provides support for cross-platform engine compatibility when the target runtime platform is different from the build platform. + Defaults to TensorRT's default runtime platform. """ self.tf32 = util.default(tf32, False) self.fp16 = util.default(fp16, False) @@ -229,6 +234,7 @@ def __init__( self.disable_compilation_cache = util.default(disable_compilation_cache, False) self.progress_monitor = progress_monitor self.weight_streaming = weight_streaming + self.runtime_platform = runtime_platform if self.calibrator is not None and not self.int8: G_LOGGER.warning( @@ -505,6 +511,13 @@ def set_progress_monitor(): if self.weight_streaming: try_set_flag("WEIGHT_STREAMING") + + if self.runtime_platform is not None: + + def set_runtime_platform(): + config.runtime_platform = self.runtime_platform + + try_run(set_runtime_platform, "runtime_platform") return config diff --git a/tools/Polygraphy/polygraphy/backend/trt/runner.py b/tools/Polygraphy/polygraphy/backend/trt/runner.py index ef77aa79..e232f680 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/runner.py +++ b/tools/Polygraphy/polygraphy/backend/trt/runner.py @@ -168,14 +168,14 @@ def __init__( - "profile": Allocate device memory enough for the current profile based on profile max shapes. - "runtime": Allocate device meomry enough for the current input shapes. weight_streaming_budget (int): - The amount of GPU memory that TensorRT can use for weights at runtime. Tt can take on the following values: - None or 0: Disables weight streaming at runtime. + The amount of GPU memory that TensorRT can use for weights at runtime. It can take on the following values: + None or -2: Disables weight streaming at runtime. -1: TensorRT will decide the streaming budget automatically. - > 0: The maximum amount of GPU memory TensorRT is allowed to use for weights in bytes. + >= 0: The maximum amount of GPU memory TensorRT is allowed to use for weights in bytes. weight_streaming_percent (float): - The percentage of weights that TRT will stream from CPU to GPU. It can take on the following values: - None or 0: Disables weight streaming at runtime. - [0 to 100]: The percentage of weights TRT will stream. 100 will stream the maximum number of weights. + The percentage of weights that TRT will keep on the GPU. It can take on the following values: + None or 100%: Disables weight streaming at runtime. + [0 to 100]: The percentage of weights TRT will stream. 0 will stream the maximum number of weights. """ super().__init__(name=name, prefix="trt-runner") self._engine_or_context = engine @@ -190,35 +190,7 @@ def activate_impl(self): if isinstance(engine_or_context, trt.ICudaEngine): self.engine = engine_or_context - - # Setup weight streaming if applicable - if self.weight_streaming_budget != None and self.weight_streaming_percent != None: - G_LOGGER.critical(f"Cannot specify the weight streaming budget both in bytes and percentage.") - - budget_bytes = None - if self.weight_streaming_budget is not None: - assert self.weight_streaming_budget == -1 or self.weight_streaming_budget >= 0 - budget_bytes = self.weight_streaming_budget - elif self.weight_streaming_percent is not None: - assert 0 <= self.weight_streaming_percent <= 100 - if self.weight_streaming_percent == 0: - budget_bytes = 0 # Disable weight streaming - else: - min_budget = self.engine.minimum_weight_streaming_budget - max_budget = self.engine.streamable_weights_size - budget_bytes = (1 - self.weight_streaming_percent / 100.0) * (max_budget - min_budget) + min_budget - if budget_bytes is not None: - budget_bytes = int(budget_bytes) - self.engine.weight_streaming_budget = budget_bytes - if self.engine.weight_streaming_budget != budget_bytes: - G_LOGGER.critical(f"Failed to set weight streaming budget to {budget_bytes}!") - if budget_bytes == 0: - G_LOGGER.info(f"Weight streaming is disabled.") - elif budget_bytes == -1: - G_LOGGER.info(f"Weight streaming is enabled with TensorRT automatically determiing the budget.") - else: - G_LOGGER.info(f"Weight streaming is enabled with a memory budget of {budget_bytes} bytes.") - + self._set_weight_streaming_budget() allocation_strategy = util.default(self.allocation_strategy, "static") if allocation_strategy == "static": self.context = self.engine.create_execution_context() @@ -352,9 +324,15 @@ def get_io(mode): if self.allocation_strategy in ["profile", "runtime"]: if self.allocation_strategy == "profile": # Perform per-profile allocation. - size_to_allocate = self.engine.get_device_memory_size_for_profile( - self.context.active_optimization_profile - ) + size_to_allocate = 0 + if mod.version(trt.__version__) >= mod.version("10.1"): + size_to_allocate = self.engine.get_device_memory_size_for_profile_v2( + self.context.active_optimization_profile + ) + else: + size_to_allocate = self.engine.get_device_memory_size_for_profile( + self.context.active_optimization_profile + ) elif self.allocation_strategy == "runtime": # Perform runtime allocation. size_to_allocate = self.context.update_device_memory_size_for_shapes() @@ -363,7 +341,10 @@ def get_io(mode): self.context_memory_buffer = cuda.DeviceArray.raw((size_to_allocate,)) self.context_memory_buffer.resize((size_to_allocate,)) - self.context.device_memory = self.context_memory_buffer.ptr + if mod.version(trt.__version__) >= mod.version("10.1"): + self.context.set_device_memory(self.context_memory_buffer.ptr, self.context_memory_buffer.allocated_nbytes) + else: + self.context.device_memory = self.context_memory_buffer.ptr if not self.context.execute_async_v3(self.stream.ptr): G_LOGGER.critical("`execute_async_v3()` failed. Please see the logging output above for details.") @@ -467,3 +448,75 @@ def deactivate_impl(self): self.context_memory_buffer, self.output_allocator, ) + + def _set_weight_streaming_budget(self): + # Setup weight streaming if applicable + if self.weight_streaming_budget != None and self.weight_streaming_percent != None: + G_LOGGER.warning(f"Cannot specify the weight streaming budget both in bytes and percentage. Prioritizing the bytes value.") + + if self.weight_streaming_budget is not None: + assert self.weight_streaming_budget == -2 or self.weight_streaming_budget == -1 or self.weight_streaming_budget >= 0 + + if mod.version(trt.__version__) >= mod.version("10.1"): + self._set_weight_streaming_budget_v2() + else: + self._set_weight_streaming_budget_v1() + + def _set_weight_streaming_budget_v1(self): + budget_bytes = None + if self.weight_streaming_budget is not None: + if self.weight_streaming_budget == -2: + budget_bytes = 0 + else: + budget_bytes = self.weight_streaming_budget + + elif self.weight_streaming_percent is not None: + assert 0 <= self.weight_streaming_percent <= 100 + if self.weight_streaming_percent == 0: + budget_bytes = 0 # Disable weight streaming + else: + min_budget = self.engine.minimum_weight_streaming_budget + max_budget = self.engine.streamable_weights_size + budget_bytes = (1 - self.weight_streaming_percent / 100.0) * (max_budget - min_budget) + min_budget + + if budget_bytes is not None: + budget_bytes = int(budget_bytes) + self.engine.weight_streaming_budget = budget_bytes + if self.engine.weight_streaming_budget != budget_bytes: + G_LOGGER.critical(f"Failed to set weight streaming budget to {budget_bytes}!") + if budget_bytes == 0: + G_LOGGER.info(f"Weight streaming is disabled.") + elif budget_bytes == -1: + G_LOGGER.info(f"Weight streaming is enabled with TensorRT automatically determiing the budget.") + else: + G_LOGGER.info(f"Weight streaming is enabled with a memory budget of {budget_bytes} bytes.") + + + def _set_weight_streaming_budget_v2(self): + budget_bytes = None + if self.weight_streaming_budget is not None: + # use V2 path + assert self.weight_streaming_budget == -2 or self.weight_streaming_budget == -1 or self.weight_streaming_budget >= 0 + if self.weight_streaming_budget == -2: + budget_bytes = self.engine.streamable_weights_size + elif self.weight_streaming_budget == -1: + budget_bytes = self.engine.get_weight_streaming_automatic_budget() + else: + budget_bytes = self.weight_streaming_budget + + elif self.weight_streaming_percent is not None: + assert 0 <= self.weight_streaming_percent <= 100 + if self.weight_streaming_percent == 100: + budget_bytes = self.engine.streamable_weights_size + else: + budget_bytes = self.weight_streaming_percent / 100.0 * (self.engine.streamable_weights_size) + + if budget_bytes is not None: + budget_bytes = int(budget_bytes) + self.engine.weight_streaming_budget_v2 = budget_bytes + if self.engine.weight_streaming_budget_v2 != budget_bytes: + G_LOGGER.critical(f"Failed to set weight streaming budget to {budget_bytes}!") + if budget_bytes == self.engine.streamable_weights_size: + G_LOGGER.info(f"Weight streaming is disabled.") + else: + G_LOGGER.info(f"Weight streaming is enabled with a memory budget of {budget_bytes} bytes.") diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py b/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py index 10aea683..e3ecb6fa 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/trt/config.py @@ -452,6 +452,15 @@ def add_parser_args_impl(self): default=None, ) + self.group.add_argument( + "--runtime-platform", + help="The target runtime platform (operating system and CPU architecture) for the execution of the TensorRT engine. " + "TensorRT provides support for cross-platform engine compatibility when the target runtime platform is different from the build platform. " + "Values come from the names of values in the `trt.RuntimePlatform` enum and are case-insensitive. " + "For example, `--runtime-platform same_as_build`, `--runtime-platform windows_amd64` ", + default=None, + ) + if self._allow_engine_capability: self.group.add_argument( "--engine-capability", @@ -512,6 +521,7 @@ def parse_impl(self, args): error_on_timing_cache_miss (bool): Whether to emit error when a tactic being timed is not present in the timing cache. disable_compilation_cache (bool): Whether to disable caching JIT-compiled code. weight_streaming (bool): Whether to enable weight streaming for the TensorRT Engine. + runtime_platform (str): A string representing the target runtime platform enum value. """ trt_min_shapes = args_util.get(args, "trt_min_shapes", default=[]) @@ -637,6 +647,15 @@ def parse_impl(self, args): "HardwareCompatibilityLevel", hardware_compatibility_level ) + self.runtime_platform = None + runtime_platform = args_util.get( + args, "runtime_platform" + ) + if runtime_platform is not None: + self.runtime_platform = make_trt_enum_val( + "RuntimePlatform", runtime_platform + ) + self.profiling_verbosity = None profiling_verbosity = args_util.get(args, "profiling_verbosity") if profiling_verbosity is not None: @@ -757,6 +776,7 @@ def add_to_script_impl(self, script): self.engine_capability, self.profiling_verbosity, self.hardware_compatibility_level, + self.runtime_platform, self.quantization_flags, ] ): @@ -805,6 +825,7 @@ def add_to_script_impl(self, script): error_on_timing_cache_miss=self.error_on_timing_cache_miss, disable_compilation_cache=self.disable_compilation_cache, weight_streaming=self.weight_streaming, + runtime_platform=self.runtime_platform, ) if config_loader_str is not None: script.add_import( diff --git a/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py b/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py index 29ef1129..edfceda2 100644 --- a/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py +++ b/tools/Polygraphy/polygraphy/tools/args/backend/trt/runner.py @@ -56,10 +56,10 @@ def add_parser_args_impl(self): self.group.add_argument( "--weight-streaming-budget", help="The amount of GPU memory in bytes that TensorRT can use for weights at runtime. The engine must be built with weight streaming enabled. It can take on the following values: " - "None or 0: Disables weight streaming at runtime. " + "None or -2: Disables weight streaming at runtime. " "-1: TensorRT will decide the streaming budget automatically. " - "0 to 100%%: The percentage of weights TRT will stream. 100%% will stream the maximum number of weights. " - ">0B: The exact amount of streamable weights that reside on the GPU (unit suffixes are supported).", + "0 to 100%%: The percentage of weights that TRT keeps on the GPU. 0%% will stream the maximum number of weights." + ">=0B: The exact amount of streamable weights that reside on the GPU (unit suffixes are supported).", type=str, default=None, ) @@ -71,8 +71,8 @@ def parse_impl(self, args): Attributes: optimization_profile (int): The index of the optimization profile to initialize the runner with. allocation_strategy (str): The way activation memory is allocated. - weight_streaming_budget (int): The weight streaming budget in bytes. - weight_streaming_percent (float): The percentage of weights streamed. + weight_streaming_budget (int): The size of the weights on the GPU in bytes. + weight_streaming_percent (float): The percentage of weights on the GPU. """ self.optimization_profile = args_util.get(args, "optimization_profile") self.allocation_strategy = args_util.get(args, "allocation_strategy") @@ -89,7 +89,7 @@ def parse_impl(self, args): elif ws_arg: budget = args_util.parse_num_bytes(ws_arg) assert ( - budget == -1 or budget >= 0 + budget == -2 or budget == -1 or budget >= 0 ), "Invalid amount for --weight-streaming-budget!" self.weight_streaming_budget = budget diff --git a/tools/Polygraphy/tests/backend/trt/test_config.py b/tools/Polygraphy/tests/backend/trt/test_config.py index 381db3db..0d6ea2d7 100644 --- a/tools/Polygraphy/tests/backend/trt/test_config.py +++ b/tools/Polygraphy/tests/backend/trt/test_config.py @@ -51,6 +51,11 @@ def test_defaults(self, identity_builder_network): config.hardware_compatibility_level == trt.HardwareCompatibilityLevel.NONE ) + if mod.version(trt.__version__) >= mod.version("10.2"): + assert ( + config.runtime_platform + == trt.RuntimePlatform.SAME_AS_BUILD + ) assert config.num_optimization_profiles == 1 assert config.int8_calibrator is None with contextlib.suppress(AttributeError): @@ -399,6 +404,21 @@ def test_hardware_compatibility_level(self, identity_builder_network, level): with loader(builder, network) as config: assert config.hardware_compatibility_level == level + if mod.version(trt.__version__) >= mod.version("10.2"): + + @pytest.mark.parametrize( + "platform", + [ + trt.RuntimePlatform.SAME_AS_BUILD, + trt.RuntimePlatform.WINDOWS_AMD64, + ], + ) + def test_runtime_platform(self, identity_builder_network, platform): + builder, network = identity_builder_network + loader = CreateConfig(runtime_platform=platform) + with loader(builder, network) as config: + assert config.runtime_platform == platform + @pytest.mark.skipif( mod.version(trt.__version__) < mod.version("8.6"), reason="Unsupported for TRT versions prior to 8.6", diff --git a/tools/Polygraphy/tests/backend/trt/test_runner.py b/tools/Polygraphy/tests/backend/trt/test_runner.py index 601aff24..dc3fe0e0 100644 --- a/tools/Polygraphy/tests/backend/trt/test_runner.py +++ b/tools/Polygraphy/tests/backend/trt/test_runner.py @@ -415,7 +415,7 @@ def test_get_array_on_cpu(self, use_torch): mod.version(trt.__version__) < mod.version("10.0"), reason="Feature not present before 10.0", ) - @pytest.mark.parametrize("budget", [None, 0, 0.5, 0.99, 1000, np.inf]) + @pytest.mark.parametrize("budget", [None, -2, -1, 0, 0.5, 0.99, 1.0, 1000, np.inf]) def test_weight_streaming(self, budget): model = ONNX_MODELS["matmul_2layer"] network_loader = NetworkFromOnnxBytes(model.loader, strongly_typed=True) diff --git a/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py b/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py index ca002f86..0d0fc9d4 100644 --- a/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py +++ b/tools/Polygraphy/tests/tools/args/backend/trt/test_config.py @@ -566,6 +566,30 @@ def test_hardware_compatibility_level(self, trt_config_args, level, expected): ) as config: assert config.hardware_compatibility_level == expected + if mod.version(trt.__version__) >= mod.version("10.2"): + + @pytest.mark.parametrize( + "platform, expected", + [ + ("same_as_build", trt.RuntimePlatform.SAME_AS_BUILD), + ("windows_amd64", trt.RuntimePlatform.WINDOWS_AMD64), + ("Windows_AMD64", trt.RuntimePlatform.WINDOWS_AMD64), + ], + ) + def test_runtime_platform(self, trt_config_args, platform, expected): + trt_config_args.parse_args(["--runtime-platform", str(platform)]) + assert ( + str(trt_config_args.runtime_platform) + == f"trt.RuntimePlatform.{expected.name}" + ) + + builder, network = create_network() + + with builder, network, trt_config_args.create_config( + builder, network=network + ) as config: + assert config.runtime_platform == expected + @pytest.mark.skipif( mod.version(trt.__version__) < mod.version("8.6"), reason="Unsupported for TRT versions prior to 8.6", diff --git a/tools/experimental/trt-engine-explorer/.vscode/launch.json b/tools/experimental/trt-engine-explorer/.vscode/launch.json new file mode 100644 index 00000000..dda1db9a --- /dev/null +++ b/tools/experimental/trt-engine-explorer/.vscode/launch.json @@ -0,0 +1,31 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: current file", + "type": "python", + "request": "launch", + "justMyCode":false, + "program": "${file}", + "console": "integratedTerminal", + }, + { + "name": "Python: tests/test.py", + "type": "python", + "request": "launch", + "program": "tests/test.py", + "console": "integratedTerminal", + }, + { + "name": "draw_engine", + "type": "python", + "request": "launch", + "program": "utils/draw_engine.py", + "console": "integratedTerminal", + "args": ["tests/inputs/mobilenet.qat.onnx.engine"], + } + ] +} \ No newline at end of file diff --git a/tools/experimental/trt-engine-explorer/trex/graphing.py b/tools/experimental/trt-engine-explorer/trex/graphing.py index 9f1fc7e4..18c31c3f 100644 --- a/tools/experimental/trt-engine-explorer/trex/graphing.py +++ b/tools/experimental/trt-engine-explorer/trex/graphing.py @@ -308,7 +308,7 @@ def __add_memory_nodes(self, plan): constants_outputs = [const.outputs[0].name for const in plan.constants] constants_producers = {const.outputs[0].name + ".0": const for const in plan.constants} for region in self.regions: - is_myelin_const = len(region.writers()) == 0 and region.name not in plan.bindings + is_myelin_const = len(region.writers()) == 0 is_constant = region.name in constants_outputs if (is_constant or is_myelin_const) and not self.include_constants: continue @@ -735,7 +735,6 @@ def __add_dot_layer_nodes(self, plan, plan_graph, node_name_2_node_id): for layer_node in plan_graph.layer_nodes: layer = layer_node.layer latency = _get_latency(plan, layer, self.latency_type) - if not layer.type == 'Constant' or plan_graph.include_constants: dot_id = _get_dot_id(layer.name) node_name_2_node_id[layer.name] = dot_id diff --git a/tools/onnx-graphsurgeon/CHANGELOG.md b/tools/onnx-graphsurgeon/CHANGELOG.md index 7f555d53..7eaed393 100644 --- a/tools/onnx-graphsurgeon/CHANGELOG.md +++ b/tools/onnx-graphsurgeon/CHANGELOG.md @@ -2,11 +2,16 @@ Dates are in YYYY-MM-DD format. -## v0.5.2 (2024-04-01) +## v0.5.3 (TBD) ### Added - Added `export_dtype` field to `gs.Constant` to allow numpy-unsupported dtypes such as BFloat16. +## v0.5.2 (2024-04-11) +### Fixed +- Fixed a bug in `setup.py` where the format of the long description was not specified. + + ## v0.5.1 (2024-02-23) ### Changed - Removed dependency on `typing_extensions` package. diff --git a/tools/onnx-graphsurgeon/examples/12_using_bf16/README.md b/tools/onnx-graphsurgeon/examples/12_using_bf16/README.md deleted file mode 100644 index 3b8f2571..00000000 --- a/tools/onnx-graphsurgeon/examples/12_using_bf16/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# BFloat16 - -## Introduction - -This example generates a model with bf16 weights. - -Numpy currently doesn't support bf16 natively so data values are stored as float32 and the conversion happens prior to onnx export. -```python -tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32), export_dtype=onnx.TensorProto.BFLOAT16) -# or -tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32)) -tensor.export_dtype = onnx.TensorProto.BFLOAT16 - -``` - -## Running the example - -1. Generate the model: - ```bash - python3 generate.py - ``` - - This creates a model with bfloat16 weights - - ![../resources/12_bf16.onnx.png](../resources/12_bf16.onnx.png) - diff --git a/tools/onnx-graphsurgeon/examples/12_using_numpy_unsupported_dtypes/README.md b/tools/onnx-graphsurgeon/examples/12_using_numpy_unsupported_dtypes/README.md new file mode 100644 index 00000000..2051fa8f --- /dev/null +++ b/tools/onnx-graphsurgeon/examples/12_using_numpy_unsupported_dtypes/README.md @@ -0,0 +1,36 @@ +# Numpy Unsupported Dtypes + +## Introduction + +This example generates two models with `bfloat16` and `float8` weights respectively. + +Currently `bfloat16` and `float8` aren't supported by numpy natively so data values are stored as float32 and the conversion happens prior to onnx export. + +## BFloat16 +```python +tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32), export_dtype=onnx.TensorProto.BFLOAT16) +# or +tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32)) +tensor.export_dtype = onnx.TensorProto.BFLOAT16 +``` + +## Float8E4M3 +```python +tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32), export_dtype=onnx.TensorProto.FLOAT8E4M3FN) +# or +tensor = gs.Constant(name="weight", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32)) +tensor.export_dtype = onnx.TensorProto.FLOAT8E4M3FN +``` + +## Running the example + +1. Generate the model: + ```bash + python3 generate.py + ``` + + This creates two models with bfloat16 and float8 weights respectively + + ![../resources/12_bf16.onnx.png](../resources/12_bf16.onnx.png) + ![../resources/12_float8.onnx.png](../resources/12_float8.onnx.png) + diff --git a/tools/onnx-graphsurgeon/examples/12_using_bf16/generate.py b/tools/onnx-graphsurgeon/examples/12_using_numpy_unsupported_dtypes/generate.py similarity index 56% rename from tools/onnx-graphsurgeon/examples/12_using_bf16/generate.py rename to tools/onnx-graphsurgeon/examples/12_using_numpy_unsupported_dtypes/generate.py index ebfe67a1..ab9a2a4a 100644 --- a/tools/onnx-graphsurgeon/examples/12_using_bf16/generate.py +++ b/tools/onnx-graphsurgeon/examples/12_using_numpy_unsupported_dtypes/generate.py @@ -20,14 +20,19 @@ import numpy as np import onnx -BF16 = onnx.TensorProto.BFLOAT16 -X = gs.Variable(name="X", dtype=BF16, shape=(1, 3, 224, 224)) -W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32) * 0.5, export_dtype=BF16) -Y = gs.Variable(name="Y", dtype=BF16, shape=(1, 5, 222, 222)) +def generate(dtype, out_path): + X = gs.Variable(name="X", dtype=dtype, shape=(1, 3, 224, 224)) + W = gs.Constant( + name="W", + values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32) * 0.5, + export_dtype=dtype, + ) + Y = gs.Variable(name="Y", dtype=dtype, shape=(1, 5, 222, 222)) + node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y]) + graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y]) + onnx.save(gs.export_onnx(graph), out_path) -node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y]) -graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y]) - -onnx.save(gs.export_onnx(graph), "test_conv_bf16.onnx") +generate(onnx.TensorProto.BFLOAT16, "test_conv_bf16.onnx") +generate(onnx.TensorProto.FLOAT8E4M3FN, "test_conv_float8e4m3fn.onnx") diff --git a/tools/onnx-graphsurgeon/examples/resources/12_float8.onnx.png b/tools/onnx-graphsurgeon/examples/resources/12_float8.onnx.png new file mode 100644 index 00000000..8b3bfe20 Binary files /dev/null and b/tools/onnx-graphsurgeon/examples/resources/12_float8.onnx.png differ diff --git a/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py b/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py index 100229f5..6756daa1 100644 --- a/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py +++ b/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py @@ -7,4 +7,4 @@ from onnx_graphsurgeon.ir.tensor import Constant, Tensor, Variable from onnx_graphsurgeon.util.exception import OnnxGraphSurgeonException -__version__ = "0.5.1" +__version__ = "0.5.2" diff --git a/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py b/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py index 1aa64f3e..a727319c 100644 --- a/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py +++ b/tools/onnx-graphsurgeon/onnx_graphsurgeon/exporters/onnx_exporter.py @@ -82,28 +82,62 @@ def update_import_domains(graph): DEFAULT_CUSTOM_OPSET_VERSION = 1 for used_domain in all_used_domains: if used_domain not in current_domains: - graph.import_domains.append(onnx.helper.make_opsetid(used_domain, DEFAULT_CUSTOM_OPSET_VERSION)) + graph.import_domains.append( + onnx.helper.make_opsetid(used_domain, DEFAULT_CUSTOM_OPSET_VERSION) + ) current_domains.add(used_domain) return graph.import_domains -# Converts a fp32 gs.Constant to a bf16 onnx.TensorProto -def tensor_to_onnx_bf16(tensor: Constant): +class NumpyArrayConverter(object): + def __init__(self, container, scalar_converter): + self.container = container + self.scalar_converter = scalar_converter - # Converts the fp32 numpy array to bf16 values and store in a uint16 numpy array - def np_float32_to_bf16_as_uint16(arr): - new_arr = np.empty(arr.size, dtype=np.uint16) + def __call__(self, arr): + new_arr = np.empty(arr.size, dtype=self.container) flatten = arr.flatten() for i in range(arr.size): - new_arr[i] = onnx.helper.float32_to_bfloat16(flatten[i]) + new_arr[i] = self.scalar_converter(flatten[i]) return new_arr.reshape(arr.shape) - arr_bf16_as_uint16 = np_float32_to_bf16_as_uint16(tensor.values) - onnx_tensor = onnx.TensorProto() - onnx_tensor.data_type = onnx.TensorProto.BFLOAT16 - onnx_tensor.dims.extend(arr_bf16_as_uint16.shape) - onnx_tensor.raw_data = arr_bf16_as_uint16.tobytes() +_NUMPY_ARRAY_CONVERTERS = { + onnx.TensorProto.BFLOAT16: NumpyArrayConverter( + np.uint16, onnx.helper.float32_to_bfloat16 + ), + # FP8 in TensorRT supports negative zeros, no infinities + # See https://onnx.ai/onnx/technical/float8.html#papers + onnx.TensorProto.FLOAT8E4M3FN: NumpyArrayConverter( + np.uint8, lambda x: onnx.helper.float32_to_float8e4m3(x, fn=True, uz=False) + ), +} + + +# Converts a gs.Constant to an onnx tensor and convert the values according to gs.Constant.export_dtype +def constant_to_onnx_tensor(tensor: Constant): + source_dtype = dtype_to_onnx(tensor.dtype) + target_dtype = dtype_to_onnx(tensor.export_dtype) + + if source_dtype == target_dtype: + onnx_tensor = onnx.numpy_helper.from_array(tensor.values) + else: + source_dtype_str = onnx.helper.tensor_dtype_to_string(source_dtype) + target_dtype_str = onnx.helper.tensor_dtype_to_string(target_dtype) + assert source_dtype == onnx.TensorProto.FLOAT, ( + f"Cannot convert onnx dtype {source_dtype_str} to {target_dtype_str}. " + "Source dtype must be float32 to convert to numpy unsupported dtypes." + ) + assert target_dtype in _NUMPY_ARRAY_CONVERTERS.keys(), ( + f"Cannot convert onnx dtype {source_dtype_str} to {target_dtype_str}. " + "Only float32 to {_NUMPY_ARRAY_CONVERTERS.keys()} is supported" + ) + arr = _NUMPY_ARRAY_CONVERTERS[target_dtype](tensor.values) + + onnx_tensor = onnx.TensorProto() + onnx_tensor.data_type = target_dtype + onnx_tensor.dims.extend(arr.shape) + onnx_tensor.raw_data = arr.tobytes() return onnx_tensor @@ -116,18 +150,7 @@ def export_tensor_proto(tensor: Constant) -> onnx.TensorProto: if isinstance(tensor._values, LazyValues): onnx_tensor = tensor._values.tensor else: - if dtype_to_onnx(tensor.dtype) != dtype_to_onnx(tensor.export_dtype): - assert tensor.dtype == np.float32, ( - f"Cannot convert onnx dtype {dtype_to_onnx(tensor.dtype)} to {dtype_to_onnx(tensor.export_dtype)}." - "Only float32 to bfloat16 is supported" - ) - assert tensor.export_dtype == onnx.TensorProto.BFLOAT16, ( - f"Cannot convert onnx dtype {dtype_to_onnx(tensor.dtype)} to {dtype_to_onnx(tensor.export_dtype)}." - "Only float32 to bfloat16 is supported" - ) - onnx_tensor = tensor_to_onnx_bf16(tensor) - else: - onnx_tensor = onnx.numpy_helper.from_array(tensor.values) + onnx_tensor = constant_to_onnx_tensor(tensor) if tensor.data_location is not None: onnx_tensor.data_location = tensor.data_location @@ -139,7 +162,9 @@ def export_sparse_tensor_proto(tensor: Constant) -> onnx.SparseTensorProto: return tensor._values.tensor @staticmethod - def export_value_info_proto(tensor: Tensor, do_type_check: bool) -> onnx.ValueInfoProto: + def export_value_info_proto( + tensor: Tensor, do_type_check: bool + ) -> onnx.ValueInfoProto: if do_type_check and tensor.dtype is None: G_LOGGER.critical( "Graph input and output tensors must include dtype information. Please set the dtype attribute for: {:}".format( @@ -149,7 +174,9 @@ def export_value_info_proto(tensor: Tensor, do_type_check: bool) -> onnx.ValueIn if tensor.dtype is not None: if isinstance(tensor, Constant) or tensor.type == "tensor_type": - onnx_tensor = onnx.helper.make_tensor_value_info(tensor.name, dtype_to_onnx(tensor.dtype), tensor.shape) + onnx_tensor = onnx.helper.make_tensor_value_info( + tensor.name, dtype_to_onnx(tensor.dtype), tensor.shape + ) elif tensor.type == "sequence_type": onnx_tensor = onnx.helper.make_tensor_sequence_value_info( tensor.name, dtype_to_onnx(tensor.dtype), tensor.shape @@ -179,7 +206,9 @@ def export_attributes(attrs: dict) -> List[onnx.AttributeProto]: # Netron has a bug which makes it crash if a Tensor attribute has no tensor data. # So provide some meaningless tensor data for Netron to read. if val.type == Tensor: - tensor_proto = OnnxExporter.export_tensor_proto(Constant("", np.array([0], dtype=np.float32))) + tensor_proto = OnnxExporter.export_tensor_proto( + Constant("", np.array([0], dtype=np.float32)) + ) onnx_attr.t.CopyFrom(tensor_proto) onnx_attr.ref_attr_name = val.name @@ -223,7 +252,9 @@ def export_function(func: Function) -> onnx.FunctionProto: for tensor in func.tensors().values(): if isinstance(tensor, Constant): # Copying the tensor prevents the new node from appearing in the Constant tensor's inputs. - new_const_nodes.append(Node("Constant", attrs={"value": tensor}, outputs=[tensor.copy()])) + new_const_nodes.append( + Node("Constant", attrs={"value": tensor}, outputs=[tensor.copy()]) + ) # Const nodes have no inputs, so this maintains a topological ordering. func_nodes = new_const_nodes + func_nodes @@ -270,8 +301,14 @@ def export_graph(graph: Graph, do_type_check=True) -> onnx.GraphProto: """ check_duplicate_node_names(graph.nodes, level=G_LOGGER.WARNING) nodes = [OnnxExporter.export_node(node) for node in graph.nodes] - inputs = [OnnxExporter.export_value_info_proto(inp, do_type_check) for inp in graph.inputs] - outputs = [OnnxExporter.export_value_info_proto(out, do_type_check) for out in graph.outputs] + inputs = [ + OnnxExporter.export_value_info_proto(inp, do_type_check) + for inp in graph.inputs + ] + outputs = [ + OnnxExporter.export_value_info_proto(out, do_type_check) + for out in graph.outputs + ] tensor_map = graph.tensors() initializer = [ OnnxExporter.export_tensor_proto(tensor) @@ -292,7 +329,9 @@ def export_graph(graph: Graph, do_type_check=True) -> onnx.GraphProto: # Omit tensors from value_info if we don't know their shape/dtype def has_value_info(tensor): - return isinstance(tensor, Variable) and (tensor.dtype is not None or tensor.shape is not None) + return isinstance(tensor, Variable) and ( + tensor.dtype is not None or tensor.shape is not None + ) value_info = [ OnnxExporter.export_value_info_proto(tensor, do_type_check) diff --git a/tools/onnx-graphsurgeon/setup.py b/tools/onnx-graphsurgeon/setup.py index df0b5188..458cb6f7 100644 --- a/tools/onnx-graphsurgeon/setup.py +++ b/tools/onnx-graphsurgeon/setup.py @@ -40,6 +40,7 @@ def main(): version=onnx_graphsurgeon.__version__, description="ONNX GraphSurgeon", long_description=open("README.md", "r", encoding="utf-8").read(), + long_description_content_type="text/markdown", license="Apache 2.0", url="https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon", author="NVIDIA", @@ -47,11 +48,6 @@ def main(): classifiers=[ "Intended Audience :: Developers", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", ], install_requires=REQUIRED_PACKAGES, packages=find_packages(), diff --git a/tools/onnx-graphsurgeon/tests/test_exporters.py b/tools/onnx-graphsurgeon/tests/test_exporters.py index 6c0834ab..f05292c6 100644 --- a/tools/onnx-graphsurgeon/tests/test_exporters.py +++ b/tools/onnx-graphsurgeon/tests/test_exporters.py @@ -21,7 +21,10 @@ import onnx import onnx.numpy_helper import pytest -from onnx_graphsurgeon.exporters.onnx_exporter import OnnxExporter +from onnx_graphsurgeon.exporters.onnx_exporter import ( + OnnxExporter, + constant_to_onnx_tensor, +) from onnx_graphsurgeon.importers.onnx_importer import OnnxImporter from onnx_graphsurgeon.ir.node import Node from onnx_graphsurgeon.ir.function import Function @@ -79,6 +82,37 @@ def test_export_constant_tensor_to_value_info_proto(self): onnx_shape.append(dim.dim_value) assert tuple(onnx_shape) == shape + @pytest.mark.parametrize( + "export_dtype, container_dtype, threshold, onnx_to_numpy_converter", + [ + ( + onnx.TensorProto.BFLOAT16, + np.uint16, + 0.02, + onnx.numpy_helper.bfloat16_to_float32, + ), + ( + onnx.TensorProto.FLOAT8E4M3FN, + np.uint8, + 0.35, + lambda x, dims: onnx.numpy_helper.float8e4m3_to_float32(x, dims, fn=True, uz=False), + ), + ], + ) + def test_export_numpy_unsupported_dtypes_accuracy( + self, export_dtype, container_dtype, threshold, onnx_to_numpy_converter + ): + name = "constant_tensor" + shape = (3, 224, 224) + values = np.random.random_sample(size=shape).astype(np.float32) + + tensor = Constant(name=name, values=values, export_dtype=export_dtype) + onnx_tensor = constant_to_onnx_tensor(tensor) + np_arr = np.frombuffer(onnx_tensor.raw_data, dtype=container_dtype) + np_arr_fp32 = onnx_to_numpy_converter(np_arr, dims=values.shape) + + assert np.max(np.abs(np_arr_fp32 - values)) <= threshold + @pytest.mark.parametrize( "dtype, expected_type", [