tenstorrent · anirudTT · Sep 24, 2024 · Jul 5, 2024 · Jul 11, 2024 · Aug 1, 2024
diff --git a/first_5_steps/1_install_tt_buda.md b/first_5_steps/1_install_tt_buda.md
@@ -28,6 +28,7 @@ Once you have identified the release version you would like to install, you can
    3. [Device Firmware Update](#device-firmware-update)
    4. [Backend Compiler Dependencies](#backend-compiler-dependencies)
    5. [TT-SMI](#tt-smi)
+   6. [Topology (TT-LoudBox/TT-QuietBox Only)](#tt-topology-tt-loudboxtt-quietbox-systems-only)
 2. [PyBuda Installation](#pybuda-installation)
    1. [Python Environment Installation](#python-environment-installation)
    2. [Docker Container Installation](#docker-container-installation)
@@ -44,29 +45,11 @@ If you would like to run PyBuda in a Python virtualenv, then follow the instruct
 
 ### Setup HugePages
 
-1. Download latest [setup_hugepages.py](https://github.com/tenstorrent/tt-metal/blob/main/infra/machine_setup/scripts/setup_hugepages.py) script.
-
-    ```sh
-    wget https://raw.githubusercontent.com/tenstorrent/tt-metal/main/infra/machine_setup/scripts/setup_hugepages.py
-    ```
-
-2. Run first setup script.
-
-    ```sh
-    sudo -E python3 setup_hugepages.py first_pass
-    ```
-
-3. Reboot
-
-    ```sh
-    sudo reboot now
-    ```
-
-4. Run second setup script & check setup.
-
-    ```sh
-    sudo -E python3 setup_hugepages.py enable && sudo -E python3 setup_hugepages.py check
-    ```
+```bash
+git clone https://github.com/tenstorrent/tt-system-tools.git
+cd tt-system-tools
+sudo ./hugepages-setup.sh
+```
 
 ### PCI Driver Installation
 
@@ -111,6 +94,10 @@ rm libyaml-cpp-dev_0.6.2-4ubuntu1_amd64.deb libyaml-cpp0.6_0.6.2-4ubuntu1_amd64.
 
 Please navigate to [tt-smi](https://github.com/tenstorrent/tt-smi) homepage and follow instructions within the README.
 
+### TT-Topology (TT-LoudBox/TT-QuietBox Systems Only)
+
+If you are running on a TT-LoudBox or TT-QuietBox system, please navigate to [tt-topology](https://github.com/tenstorrent/tt-topology) homepage and follow instructions within the README.
+
 ## PyBuda Installation
 
 There are two ways to install PyBuda within the host environment: using Python virtual environment or Docker container.
@@ -187,7 +174,7 @@ For example, to run on an Ubuntu version 20.04 on a Grayskull device, use this c
 sudo docker pull ghcr.io/tenstorrent/tt-buda/ubuntu-20-04-amd64/gs:<TAG>
 ```
 
-where `<TAG>` is the version number i.e. `v0.12.3`.
+where `<TAG>` is the release version number from: <https://github.com/tenstorrent/tt-buda/tags>
 
 #### Step 2. Run the container
 

diff --git a/model_demos/README.md b/model_demos/README.md
diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py
@@ -14,7 +14,7 @@
 from pybuda._C.backend_api import BackendDevice
 
 
-def run_efficientnet_lite0_1x1():
+def run_efficientnet_lite0_1x1(batch_size=1):
 
     # Device specific configurations
     available_devices = pybuda.detect_available_devices()
@@ -50,7 +50,7 @@ def run_efficientnet_lite0_1x1():
     tt_model = TFLiteModule("tflite_efficientnet_lite0", tflite_path)
 
     # Run inference on Tenstorrent device
-    input_shape = (1, 224, 224, 3)
+    input_shape = (batch_size, 224, 224, 3)
     input_tensor = torch.rand(input_shape)
 
     output_q = pybuda.run_inference(tt_model, inputs=([input_tensor]))

diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py
@@ -14,7 +14,7 @@
 from pybuda._C.backend_api import BackendDevice
 
 
-def run_efficientnet_lite4_1x1():
+def run_efficientnet_lite4_1x1(batch_size=1):
 
     # Device specific configurations
     available_devices = pybuda.detect_available_devices()
@@ -50,7 +50,7 @@ def run_efficientnet_lite4_1x1():
     tt_model = TFLiteModule("tflite_efficientnet_lite4", tflite_path)
 
     # STEP 3: Run inference on Tenstorrent device
-    input_shape = (1, 320, 320, 3)
+    input_shape = (batch_size, 320, 320, 3)
     input_tensor = torch.rand(input_shape)
 
     output_q = pybuda.run_inference(tt_model, inputs=([input_tensor]))

diff --git a/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py b/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py
@@ -44,10 +44,9 @@ def run_hand_landmark_lite_1x1(batch_size=1):
     tt_model = TFLiteModule("tflite_hand_landmark_lite", tflite_path)
 
     # Run inference on Tenstorrent device
-    input_shape = (1, 224, 224, 3)
+    input_shape = (batch_size, 224, 224, 3)
     input_tensor = torch.rand(input_shape)
-    batch_tensor = torch.cat([input_tensor] * batch_size, dim=0)
-    output_q = pybuda.run_inference(tt_model, inputs=([batch_tensor]))
+    output_q = pybuda.run_inference(tt_model, inputs=([input_tensor]))
     output = output_q.get()
 
     # Combine outputs for data parallel runs

diff --git a/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py b/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py
@@ -42,10 +42,9 @@ def run_palm_detection_lite_1x1(batch_size=1):
     tt_model = TFLiteModule("tflite_palm_detection_lite", tflite_path)
 
     # Run inference on Tenstorrent device
-    input_shape = (1, 192, 192, 3)
+    input_shape = (batch_size, 192, 192, 3)
     input_tensor = torch.rand(input_shape)
-    batch_tensor = torch.cat([input_tensor] * batch_size, dim=0)
-    output_q = pybuda.run_inference(tt_model, inputs=([batch_tensor]))
+    output_q = pybuda.run_inference(tt_model, inputs=([input_tensor]))
     output = output_q.get()
 
     # Combine outputs for data parallel runs

diff --git a/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py b/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py
@@ -45,10 +45,9 @@ def run_pose_landmark_lite_1x1(batch_size=1):
     tt_model = TFLiteModule("tflite_pose_landmark_light", tflite_path)
 
     # STEP 3: Run inference on Tenstorrent device
-    input_shape = (1, 256, 256, 3)
+    input_shape = (batch_size, 256, 256, 3)
     input_tensor = torch.rand(input_shape)
-    batch_tensor = torch.cat([input_tensor] * batch_size, dim=0)
-    output_q = pybuda.run_inference(tt_model, inputs=([batch_tensor]))
+    output_q = pybuda.run_inference(tt_model, inputs=([input_tensor]))
     output = output_q.get()
 
     # Combine outputs for data parallel runs

diff --git a/model_demos/cv_demos/segformer/pytorch_segformer_semantic_segmentation.py b/model_demos/cv_demos/segformer/pytorch_segformer_semantic_segmentation.py
@@ -50,6 +50,15 @@ def run_segformer_semseg_pytorch(variant="nvidia/segformer-b0-finetuned-ade-512-
             ]:
                 compiler_cfg.amp_level = 1
 
+            if variant == "nvidia/segformer-b2-finetuned-ade-512-512":
+                compiler_cfg.place_on_new_epoch("concatenate_1098.dc.concatenate.0")
+
+            elif variant == "nvidia/segformer-b3-finetuned-ade-512-512":
+                compiler_cfg.place_on_new_epoch("concatenate_1890.dc.concatenate.0")
+
+            elif variant == "nvidia/segformer-b4-finetuned-ade-512-512":
+                compiler_cfg.place_on_new_epoch("concatenate_2748.dc.concatenate.0")
+
     # Load the model from HuggingFace
     model = SegformerForSemanticSegmentation.from_pretrained(variant)
     model.eval()