From 4811aa66debf84cd0cf04f5dfef6f7bcf9b7433e Mon Sep 17 00:00:00 2001 From: kamalrajkannan78 <157608228+kamalrajkannan78@users.noreply.github.com> Date: Fri, 5 Jul 2024 13:29:43 +0530 Subject: [PATCH 01/14] Add demo script for YOLOX(n300 & n150)- Pytorch (#110) --- model_demos/cv_demos/yolo_x/pytorch_yolox.py | 151 +++++++++++++++++++ model_demos/pyproject.toml | 1 + model_demos/tests/test_pytorch_yolox.py | 10 ++ 3 files changed, 162 insertions(+) create mode 100644 model_demos/cv_demos/yolo_x/pytorch_yolox.py create mode 100644 model_demos/tests/test_pytorch_yolox.py diff --git a/model_demos/cv_demos/yolo_x/pytorch_yolox.py b/model_demos/cv_demos/yolo_x/pytorch_yolox.py new file mode 100644 index 00000000..9b4024fe --- /dev/null +++ b/model_demos/cv_demos/yolo_x/pytorch_yolox.py @@ -0,0 +1,151 @@ +# yolox demo script + +import subprocess + +subprocess.run(["pip", "install", "yolox==0.3.0", "--no-deps"]) # Install yolox==0.3.0 without installing its dependencies + +import pybuda +import torch +import cv2 +import numpy as np +import requests +import os +from pybuda._C.backend_api import BackendDevice + +torch.multiprocessing.set_sharing_strategy("file_system") +from yolox.exp import get_exp +from yolox.data.data_augment import preproc as preprocess +from yolox.data.datasets import COCO_CLASSES +from yolox.utils import multiclass_nms, demo_postprocess + + +def run_yolox_pytorch(variant): + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1" + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] == BackendDevice.Wormhole_B0: + if variant not in ["yolox_nano", "yolox_s"]: + os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" + os.environ["PYBUDA_FORK_JOIN_EXPAND_OUTPUT_BUFFERS"] = "1" + os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" + + if variant in ["yolox_nano", "yolox_tiny"]: + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 2)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 2)) + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "81920" + if variant == "yolox_nano": + compiler_cfg.balancer_op_override("max_pool2d_630.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (1, 1)) + elif variant == "yolox_tiny": + compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (1, 1)) + + elif variant == "yolox_s": + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override("conv2d_33.dc.matmul.8", "t_stream_shape", (1, 1)) + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "4096" + compiler_cfg.place_on_new_epoch("concatenate_1163.dc.sparse_matmul.11.lc2") + compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "grid_shape", (1, 2)) + + elif variant == "yolox_m": + compiler_cfg.place_on_new_epoch("conv2d_811.dc.matmul.8") + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 6)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (5, 1)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.place_on_new_epoch("concatenate_1530.dc.sparse_matmul.11.lc2") + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "4096" + compiler_cfg.balancer_op_override("max_pool2d_671.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (169, 1)) + + elif variant == "yolox_l": + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "245760" + compiler_cfg.place_on_new_epoch("conv2d_1644.dc.matmul.11") + compiler_cfg.place_on_new_epoch("concatenate_1897.dc.sparse_matmul.11.lc2") + + elif variant == "yolox_darknet": + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "245760" + compiler_cfg.place_on_new_epoch("conv2d_1147.dc.matmul.11") + + elif variant == "yolox_x": + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (5, 1)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.place_on_new_epoch("concatenate_2264.dc.sparse_matmul.11.lc2") + compiler_cfg.balancer_op_override("max_pool2d_1104.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (13, 1)) + + # prepare model + weight_name = f"{variant}.pth" + url = f"https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/{weight_name}" + response = requests.get(url) + with open(f"{weight_name}", "wb") as file: + file.write(response.content) + + if variant == "yolox_darknet": + model_name = "yolov3" + else: + model_name = variant.replace("_", "-") + + exp = get_exp(exp_name=model_name) + model = exp.get_model() + ckpt = torch.load(f"{variant}.pth", map_location="cpu") + model.load_state_dict(ckpt["model"]) + model.eval() + model_name = f"pt_{variant}" + tt_model = pybuda.PyTorchModule(f"pt_{variant}", model) + + # prepare input + if variant in ["yolox_nano", "yolox_tiny"]: + input_shape = (416, 416) + else: + input_shape = (640, 640) + + url = "http://images.cocodataset.org/val2017/000000397133.jpg" + response = requests.get(url) + with open("input.jpg", "wb") as f: + f.write(response.content) + img = cv2.imread("input.jpg") + img, ratio = preprocess(img, input_shape) + img_tensor = torch.from_numpy(img) + img_tensor = img_tensor.unsqueeze(0) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=[(img_tensor)]) + output = output_q.get() + + # Post-processing + for i in range(len(output)): + output[i] = output[i].value().detach().float().numpy() + + predictions = demo_postprocess(output[0], input_shape)[0] + boxes = predictions[:, :4] + scores = predictions[:, 4:5] * predictions[:, 5:] + boxes_xyxy = np.ones_like(boxes) + boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0 + boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0 + boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0 + boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0 + boxes_xyxy /= ratio + dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1) + if dets is not None: + final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5] + for box, score, cls_ind in zip(final_boxes, final_scores, final_cls_inds): + class_name = COCO_CLASSES[int(cls_ind)] + x_min, y_min, x_max, y_max = box + print(f"Class: {class_name}, Confidence: {score}, Coordinates: ({x_min}, {y_min}, {x_max}, {y_max})") + + # remove downloaded weights,image + os.remove(weight_name) + os.remove("input.jpg") + + +if __name__ == "__main__": + run_yolox_pytorch() diff --git a/model_demos/pyproject.toml b/model_demos/pyproject.toml index 334d1b69..bf7c8b4f 100644 --- a/model_demos/pyproject.toml +++ b/model_demos/pyproject.toml @@ -87,4 +87,5 @@ markers = [ "yolov6: tests that involve yolov6", "segformer: tests that involve SegFormer", "monodle: tests that involve Monodle", + "yolox: tests that involve YOLOX", ] diff --git a/model_demos/tests/test_pytorch_yolox.py b/model_demos/tests/test_pytorch_yolox.py new file mode 100644 index 00000000..41cb48e2 --- /dev/null +++ b/model_demos/tests/test_pytorch_yolox.py @@ -0,0 +1,10 @@ +import pytest +from cv_demos.yolo_x.pytorch_yolox import run_yolox_pytorch + +variants = ["yolox_nano", "yolox_tiny", "yolox_s", "yolox_m", "yolox_l", "yolox_darknet", "yolox_x"] + + +@pytest.mark.parametrize("variant", variants) +@pytest.mark.yolox +def test_yolox_pytorch(variant, clear_pybuda, test_device): + run_yolox_pytorch(variant) From 7f67f247cb138cc0be5b815233357473cf3767aa Mon Sep 17 00:00:00 2001 From: kamalrajkannan78 <157608228+kamalrajkannan78@users.noreply.github.com> Date: Thu, 11 Jul 2024 14:36:49 +0530 Subject: [PATCH 02/14] Add flags for YOLOX-N,T,S,M variants(e300 & e150)- Pytorch (#111) --- model_demos/cv_demos/yolo_x/pytorch_yolox.py | 37 ++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/model_demos/cv_demos/yolo_x/pytorch_yolox.py b/model_demos/cv_demos/yolo_x/pytorch_yolox.py index 9b4024fe..d175e63a 100644 --- a/model_demos/cv_demos/yolo_x/pytorch_yolox.py +++ b/model_demos/cv_demos/yolo_x/pytorch_yolox.py @@ -82,6 +82,43 @@ def run_yolox_pytorch(variant): compiler_cfg.place_on_new_epoch("concatenate_2264.dc.sparse_matmul.11.lc2") compiler_cfg.balancer_op_override("max_pool2d_1104.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (13, 1)) + elif available_devices[0] == BackendDevice.Grayskull: + + if variant == "yolox_nano": + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1)) + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "81920" + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13)) + + elif variant == "yolox_tiny": + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "81920" + compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (13, 1)) + compiler_cfg.balancer_op_override("_fused_op_34", "t_stream_shape", (1, 1)) + + elif variant == "yolox_s": + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (10, 1)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5)) + compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (169, 1)) + + elif variant == "yolox_m": + os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" + os.environ["PYBUDA_FORK_JOIN_EXPAND_OUTPUT_BUFFERS"] = "1" + os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" + compiler_cfg.balancer_op_override("concatenate_1530.dc.concatenate.7_to_concatenate_1530.dc.sparse_matmul.11.lc2_1_serialized_dram_queue.before_padded_node.nop_0", "grid_shape", (1, 1)) + compiler_cfg.place_on_new_epoch("concatenate_1530.dc.sparse_matmul.11.lc2") + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5)) + compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5)) + compiler_cfg.place_on_new_epoch("max_pool2d_671.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2") + # prepare model weight_name = f"{variant}.pth" url = f"https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/{weight_name}" From 184f0a1fcbdd5400088af4fcbd3e3e8b07fbb305 Mon Sep 17 00:00:00 2001 From: chandrasekaranpradeep Date: Thu, 1 Aug 2024 02:20:50 -0400 Subject: [PATCH 03/14] Fix segformer semantic segmentation ci failure --- .../segformer/pytorch_segformer_semantic_segmentation.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/model_demos/cv_demos/segformer/pytorch_segformer_semantic_segmentation.py b/model_demos/cv_demos/segformer/pytorch_segformer_semantic_segmentation.py index b6998f7e..2a921965 100644 --- a/model_demos/cv_demos/segformer/pytorch_segformer_semantic_segmentation.py +++ b/model_demos/cv_demos/segformer/pytorch_segformer_semantic_segmentation.py @@ -50,6 +50,15 @@ def run_segformer_semseg_pytorch(variant="nvidia/segformer-b0-finetuned-ade-512- ]: compiler_cfg.amp_level = 1 + if variant == "nvidia/segformer-b2-finetuned-ade-512-512": + compiler_cfg.place_on_new_epoch("concatenate_1098.dc.concatenate.0") + + elif variant == "nvidia/segformer-b3-finetuned-ade-512-512": + compiler_cfg.place_on_new_epoch("concatenate_1890.dc.concatenate.0") + + elif variant == "nvidia/segformer-b4-finetuned-ade-512-512": + compiler_cfg.place_on_new_epoch("concatenate_2748.dc.concatenate.0") + # Load the model from HuggingFace model = SegformerForSemanticSegmentation.from_pretrained(variant) model.eval() From 6439fd011a5e18e4e6d0bfc196bbfc05de9e30b8 Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Thu, 29 Aug 2024 00:21:31 +0000 Subject: [PATCH 04/14] Update HugePages setup instructions --- first_5_steps/1_install_tt_buda.md | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/first_5_steps/1_install_tt_buda.md b/first_5_steps/1_install_tt_buda.md index e28802d1..8dd3a971 100644 --- a/first_5_steps/1_install_tt_buda.md +++ b/first_5_steps/1_install_tt_buda.md @@ -44,29 +44,11 @@ If you would like to run PyBuda in a Python virtualenv, then follow the instruct ### Setup HugePages -1. Download latest [setup_hugepages.py](https://github.com/tenstorrent/tt-metal/blob/main/infra/machine_setup/scripts/setup_hugepages.py) script. - - ```sh - wget https://raw.githubusercontent.com/tenstorrent/tt-metal/main/infra/machine_setup/scripts/setup_hugepages.py - ``` - -2. Run first setup script. - - ```sh - sudo -E python3 setup_hugepages.py first_pass - ``` - -3. Reboot - - ```sh - sudo reboot now - ``` - -4. Run second setup script & check setup. - - ```sh - sudo -E python3 setup_hugepages.py enable && sudo -E python3 setup_hugepages.py check - ``` +```bash +git clone https://github.com/tenstorrent/tt-system-tools.git +cd tt-system-tools +sudo ./hugepages-setup.sh +``` ### PCI Driver Installation From 3c71b9f06a46d2f110daf3502e1e5b0a66217abb Mon Sep 17 00:00:00 2001 From: "Jush (yupiop12)" <36951064+JushBJJ@users.noreply.github.com> Date: Tue, 3 Sep 2024 21:17:03 +1000 Subject: [PATCH 05/14] [BOUNTY] Add Phi-2 (#117) * Qwen1.5 0.5B pybuda implementation * remove unneeded requirement * rename "acceleration" to "accelerate" * Update env vars and compiler configs * remove undefined device_map * Remove misleading and unnecessary environment variables * remove qwen from phi branch * Add Phi 2 * Update requirements.txt * Standardize Phi2 demo and added tests * Remove old phi2 demo * fix missing quote in pyproject.toml * fix * Fix test saying qwen1_5 instead of phi2 --- .../phi2/pytorch_phi2_text_generation.py | 68 +++++++++++++++++++ model_demos/pyproject.toml | 1 + model_demos/tests/test_pytorch_phi2.py | 10 +++ 3 files changed, 79 insertions(+) create mode 100644 model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py create mode 100644 model_demos/tests/test_pytorch_phi2.py diff --git a/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py b/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py new file mode 100644 index 00000000..a0f172f3 --- /dev/null +++ b/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py @@ -0,0 +1,68 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-License-Identifier: Apache-2.0 + +# Phi2 Demo - Text Generation + +import os +import pybuda + +from transformers import PhiForCausalLM, AutoTokenizer, PhiConfig +from pybuda.transformers.pipeline import pipeline as pybuda_pipeline + +def run_phi2_causal_lm(batch_size=1): + os.environ["TT_BACKEND_TIMEOUT"] = '0' + + # Set PyBuda configurations + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + compiler_cfg.enable_auto_fusing = True + compiler_cfg.balancer_policy = "Ribbon" + + # Setup model configuration + config = PhiConfig.from_pretrained("microsoft/phi-2") + config.use_cache = False + config.return_dict = False + + # Load model and tokenizer with config + model = PhiForCausalLM.from_pretrained("microsoft/phi-2", config=config) + tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2") + tokenizer.pad_token, tokenizer.pad_token_id = (tokenizer.eos_token, tokenizer.eos_token_id) + + # Disable DynamicCache + # See: https://github.com/tenstorrent/tt-buda/issues/42 + model._supports_cache_class = False + + # Example usage + prompt = ["My name is Jim Keller and"] * batch_size + + # Initialize pipeline + text_generator = pybuda_pipeline( + "text-generation", + model=model, + tokenizer=tokenizer + ) + + # Inference on TT device + response = text_generator( + prompt, + temperature=0.7, + top_k=50, + top_p=0.9, + max_new_tokens=512, + num_beams=1, + do_sample=True, + no_repeat_ngram_size=5, + pad_token_id=tokenizer.pad_token_id, + eos_token_id=tokenizer.eos_token_id, + early_stopping=True + ) + + # Display Responses + for batch_id in range(batch_size): + print(f"Batch: {batch_id}") + print(f"Response: {response[batch_id][0]['generated_text']}") + print() + + +if __name__ == "__main__": + run_phi2_causal_lm() \ No newline at end of file diff --git a/model_demos/pyproject.toml b/model_demos/pyproject.toml index bf7c8b4f..0b58a4c5 100644 --- a/model_demos/pyproject.toml +++ b/model_demos/pyproject.toml @@ -87,5 +87,6 @@ markers = [ "yolov6: tests that involve yolov6", "segformer: tests that involve SegFormer", "monodle: tests that involve Monodle", + "phi2: tests that involve Phi2", "yolox: tests that involve YOLOX", ] diff --git a/model_demos/tests/test_pytorch_phi2.py b/model_demos/tests/test_pytorch_phi2.py new file mode 100644 index 00000000..394fb41d --- /dev/null +++ b/model_demos/tests/test_pytorch_phi2.py @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from nlp_demos.phi2.pytorch_phi2_text_generation import run_phi2_causal_lm + +@pytest.mark.phi2 +def test_phi2_causal_lm_pytorch(clear_pybuda, test_device, batch_size): + run_phi2_causal_lm(batch_size=batch_size) \ No newline at end of file From 2c6b89ea6ced480719bf0c9379c2583dc17f29d6 Mon Sep 17 00:00:00 2001 From: "Jush (yupiop12)" <36951064+JushBJJ@users.noreply.github.com> Date: Tue, 3 Sep 2024 22:00:58 +1000 Subject: [PATCH 06/14] [BOUNTY] Add Qwen1.5 0.5B (#37) * Qwen1.5 0.5B pybuda implementation * remove unneeded requirement * Update env vars and compiler configs * remove undefined device_map * Remove misleading and unnecessary environment variables * Refine qwen solution * Rename qwen file * Rename qwen filename and added qwen1.5-chat * Add qwen1.5 test case * Fix typo in pyproject.toml * Disable dynamic caching * Add extra whitespace below model title commment * Fix typo "moadl" to "model" --- .../nlp_demos/qwen1_5/pytorch_qwen1_5.py | 68 +++++++++++++ .../nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py | 98 +++++++++++++++++++ model_demos/pyproject.toml | 1 + model_demos/requirements.txt | 2 +- model_demos/tests/test_pytorch_qwen1_5.py | 16 +++ 5 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py create mode 100644 model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py create mode 100644 model_demos/tests/test_pytorch_qwen1_5.py diff --git a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py new file mode 100644 index 00000000..96adf282 --- /dev/null +++ b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py @@ -0,0 +1,68 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-License-Identifier: Apache-2.0 + +# Qwen1.5-0.5B Demo - Text Generation + +import os +import pybuda + +from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config +from pybuda.transformers.pipeline import pipeline as pybuda_pipeline + +def run_qwen1_5_causal_lm(batch_size=1): + os.environ["TT_BACKEND_TIMEOUT"] = '0' + + # Set PyBuda configurations + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + compiler_cfg.enable_auto_fusing = False + compiler_cfg.balancer_policy = "Ribbon" + + # Setup model configuration + config = Qwen2Config.from_pretrained("Qwen/Qwen1.5-0.5B") + config.use_cache = False + config.return_dict = False + + # Load model and tokenizer with config + model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B", config=config) + tokenizer = Qwen2Tokenizer.from_pretrained("Qwen/Qwen1.5-0.5B") + tokenizer.pad_token, tokenizer.pad_token_id = (tokenizer.eos_token, tokenizer.eos_token_id) + + # Disable DynamicCache + # See: https://github.com/tenstorrent/tt-buda/issues/42 + model._supports_cache_class = False + + # Example usage + prompt = ["My name is Jim Keller and"] * batch_size + + # Initialize pipeline + text_generator = pybuda_pipeline( + "text-generation", + model=model, + tokenizer=tokenizer + ) + + # Inference on TT device + response = text_generator( + prompt, + temperature=0.7, + top_k=50, + top_p=0.9, + max_new_tokens=512, + num_beams=1, + do_sample=True, + no_repeat_ngram_size=5, + pad_token_id=tokenizer.pad_token_id, + eos_token_id=tokenizer.eos_token_id, + early_stopping=True + ) + + # Display Responses + for batch_id in range(batch_size): + print(f"Batch: {batch_id}") + print(f"Response: {response[batch_id][0]['generated_text']}") + print() + + +if __name__ == "__main__": + run_qwen1_5_causal_lm() diff --git a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py new file mode 100644 index 00000000..cbd4eeb3 --- /dev/null +++ b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py @@ -0,0 +1,98 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-License-Identifier: Apache-2.0 + +# Qwen1.5-0.5B-Chat Demo - Chat + +import os +import pybuda +import re + +from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config +from pybuda.transformers.pipeline import pipeline as pybuda_pipeline + +def parse_chat_completion(text: str): + pattern = r'<\|im_start\|>\s*(\w+)\s*([\s\S]*?)\s*(?:<\|im_end\|>|$)' + matches = re.findall(pattern, text, re.DOTALL) + + messages = [] + for role, content in matches: + messages.append({"role": role, "content": content.strip()}) + + return messages + +def run_qwen1_5_chat(): + os.environ["TT_BACKEND_TIMEOUT"] = '0' + + # Set PyBuda configurations + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + compiler_cfg.enable_auto_fusing = False + compiler_cfg.balancer_policy = "Ribbon" + + # Setup model configuration + config = Qwen2Config.from_pretrained("Qwen/Qwen1.5-0.5B-Chat") + config.use_cache = False + config.return_dict = False + + # Load model and tokenizer with config + model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen1.5-0.5B-Chat", config=config) + tokenizer = Qwen2Tokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat") + tokenizer.pad_token, tokenizer.pad_token_id = (tokenizer.eos_token, tokenizer.eos_token_id) + + # Disable DynamicCache + # See: https://github.com/tenstorrent/tt-buda/issues/42 + model._supports_cache_class = False + + # Sample chat messages + batch_messages = [ + [ + {"role": "system", "content": "You are Jim Keller, the CEO of Tenstorrent"}, + {"role": "user", "content": "Introduce yourself please!"} + ] + ] + batch_size = len(batch_messages) + + # Apply chat template to each batch + chat_texts = [ + tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + for messages in batch_messages[:batch_size] + ] + + # Initialize pipeline + text_generator = pybuda_pipeline( + "text-generation", + model=model, + tokenizer=tokenizer + ) + + # Inference on TT device + responses = text_generator( + chat_texts, + temperature=0.7, + top_k=50, + top_p=0.9, + max_new_tokens=512, + num_beams=1, + do_sample=True, + no_repeat_ngram_size=5, + pad_token_id=tokenizer.pad_token_id, + eos_token_id=tokenizer.eos_token_id, + early_stopping=True + ) + + # Display Responses + for batch_id in range(batch_size): + print(f"Batch: {batch_id}") + raw_text = responses[batch_id][0]['generated_text'] + parsed_messages = parse_chat_completion(raw_text) + + for message in parsed_messages: + print(f"{message['role'].capitalize()}: {message['content']}") + print() + +if __name__ == "__main__": + run_qwen1_5_chat() \ No newline at end of file diff --git a/model_demos/pyproject.toml b/model_demos/pyproject.toml index 0b58a4c5..b73df1dd 100644 --- a/model_demos/pyproject.toml +++ b/model_demos/pyproject.toml @@ -89,4 +89,5 @@ markers = [ "monodle: tests that involve Monodle", "phi2: tests that involve Phi2", "yolox: tests that involve YOLOX", + "qwen1_5: tests that involve Qwen1.5", ] diff --git a/model_demos/requirements.txt b/model_demos/requirements.txt index bf33dcbb..0001d18b 100644 --- a/model_demos/requirements.txt +++ b/model_demos/requirements.txt @@ -10,4 +10,4 @@ librosa==0.10.0 # For Whisper segmentation-models-pytorch==0.3.3 # For U-Net diffusers==0.27.2 # For Stable Diffusion yolov6detect==0.4.1 # For YOLOv6 -datasets==2.16.1 +datasets==2.16.1 \ No newline at end of file diff --git a/model_demos/tests/test_pytorch_qwen1_5.py b/model_demos/tests/test_pytorch_qwen1_5.py new file mode 100644 index 00000000..d1e94812 --- /dev/null +++ b/model_demos/tests/test_pytorch_qwen1_5.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from nlp_demos.qwen1_5.pytorch_qwen1_5_chat import run_qwen1_5_chat +from nlp_demos.qwen1_5.pytorch_qwen1_5 import run_qwen1_5_causal_lm + +@pytest.mark.qwen1_5 +def test_qwen1_5_causal_lm_pytorch(clear_pybuda, test_device, batch_size): + run_qwen1_5_causal_lm(batch_size=batch_size) + + +@pytest.mark.qwen1_5 +def test_qwen1_5_chat_pytorch(clear_pybuda, test_device, batch_size): + run_qwen1_5_chat(batch_size=batch_size) From 6812d5e3ad9ed8fce63e4d6a4ec573510cb8c4c3 Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Mon, 9 Sep 2024 10:46:50 -0400 Subject: [PATCH 07/14] Update batching for 1x1 models --- .../efficientnet_lite/tflite_efficientnet_lite0_1x1.py | 4 ++-- .../efficientnet_lite/tflite_efficientnet_lite4_1x1.py | 4 ++-- model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py | 5 ++--- model_demos/cv_demos/landmark/palm_detection_lite_1x1.py | 5 ++--- model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py | 5 ++--- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py index f1841177..38e2ca59 100644 --- a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py +++ b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py @@ -14,7 +14,7 @@ from pybuda._C.backend_api import BackendDevice -def run_efficientnet_lite0_1x1(): +def run_efficientnet_lite0_1x1(batch_size=1): # Device specific configurations available_devices = pybuda.detect_available_devices() @@ -50,7 +50,7 @@ def run_efficientnet_lite0_1x1(): tt_model = TFLiteModule("tflite_efficientnet_lite0", tflite_path) # Run inference on Tenstorrent device - input_shape = (1, 224, 224, 3) + input_shape = (batch_size, 224, 224, 3) input_tensor = torch.rand(input_shape) output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py index 130b6a51..a0456662 100644 --- a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py +++ b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py @@ -14,7 +14,7 @@ from pybuda._C.backend_api import BackendDevice -def run_efficientnet_lite4_1x1(): +def run_efficientnet_lite4_1x1(batch_size=1): # Device specific configurations available_devices = pybuda.detect_available_devices() @@ -50,7 +50,7 @@ def run_efficientnet_lite4_1x1(): tt_model = TFLiteModule("tflite_efficientnet_lite4", tflite_path) # STEP 3: Run inference on Tenstorrent device - input_shape = (1, 320, 320, 3) + input_shape = (batch_size, 320, 320, 3) input_tensor = torch.rand(input_shape) output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) diff --git a/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py b/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py index ba9895d9..2871d56a 100644 --- a/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py +++ b/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py @@ -44,10 +44,9 @@ def run_hand_landmark_lite_1x1(batch_size=1): tt_model = TFLiteModule("tflite_hand_landmark_lite", tflite_path) # Run inference on Tenstorrent device - input_shape = (1, 224, 224, 3) + input_shape = (batch_size, 224, 224, 3) input_tensor = torch.rand(input_shape) - batch_tensor = torch.cat([input_tensor] * batch_size, dim=0) - output_q = pybuda.run_inference(tt_model, inputs=([batch_tensor])) + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) output = output_q.get() # Combine outputs for data parallel runs diff --git a/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py b/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py index c105e94a..be1aef4e 100644 --- a/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py +++ b/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py @@ -42,10 +42,9 @@ def run_palm_detection_lite_1x1(batch_size=1): tt_model = TFLiteModule("tflite_palm_detection_lite", tflite_path) # Run inference on Tenstorrent device - input_shape = (1, 192, 192, 3) + input_shape = (batch_size, 192, 192, 3) input_tensor = torch.rand(input_shape) - batch_tensor = torch.cat([input_tensor] * batch_size, dim=0) - output_q = pybuda.run_inference(tt_model, inputs=([batch_tensor])) + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) output = output_q.get() # Combine outputs for data parallel runs diff --git a/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py b/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py index baaa2b43..a9f64c5b 100644 --- a/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py +++ b/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py @@ -45,10 +45,9 @@ def run_pose_landmark_lite_1x1(batch_size=1): tt_model = TFLiteModule("tflite_pose_landmark_light", tflite_path) # STEP 3: Run inference on Tenstorrent device - input_shape = (1, 256, 256, 3) + input_shape = (batch_size, 256, 256, 3) input_tensor = torch.rand(input_shape) - batch_tensor = torch.cat([input_tensor] * batch_size, dim=0) - output_q = pybuda.run_inference(tt_model, inputs=([batch_tensor])) + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) output = output_q.get() # Combine outputs for data parallel runs From f7f5029edfc66b061bfe8b953192f09fd0277321 Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Mon, 9 Sep 2024 10:47:21 -0400 Subject: [PATCH 08/14] Code clean up for YOLOX, Phi2, Qwen1.5 --- model_demos/cv_demos/yolo_x/pytorch_yolox.py | 169 +++++++++++++----- .../phi2/pytorch_phi2_text_generation.py | 17 +- .../nlp_demos/qwen1_5/pytorch_qwen1_5.py | 15 +- .../nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py | 35 ++-- model_demos/tests/test_pytorch_phi2.py | 3 +- model_demos/tests/test_pytorch_qwen1_5.py | 3 +- model_demos/tests/test_pytorch_yolox.py | 1 + 7 files changed, 158 insertions(+), 85 deletions(-) diff --git a/model_demos/cv_demos/yolo_x/pytorch_yolox.py b/model_demos/cv_demos/yolo_x/pytorch_yolox.py index d175e63a..0ea0ac6d 100644 --- a/model_demos/cv_demos/yolo_x/pytorch_yolox.py +++ b/model_demos/cv_demos/yolo_x/pytorch_yolox.py @@ -2,21 +2,24 @@ import subprocess -subprocess.run(["pip", "install", "yolox==0.3.0", "--no-deps"]) # Install yolox==0.3.0 without installing its dependencies +subprocess.run( + ["pip", "install", "yolox==0.3.0", "--no-deps"] +) # Install yolox==0.3.0 without installing its dependencies + +import os -import pybuda -import torch import cv2 import numpy as np +import pybuda import requests -import os +import torch from pybuda._C.backend_api import BackendDevice torch.multiprocessing.set_sharing_strategy("file_system") -from yolox.exp import get_exp from yolox.data.data_augment import preproc as preprocess from yolox.data.datasets import COCO_CLASSES -from yolox.utils import multiclass_nms, demo_postprocess +from yolox.exp import get_exp +from yolox.utils import demo_postprocess, multiclass_nms def run_yolox_pytorch(variant): @@ -37,33 +40,61 @@ def run_yolox_pytorch(variant): os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" if variant in ["yolox_nano", "yolox_tiny"]: - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 2)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 2)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 2) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 2) + ) os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "81920" if variant == "yolox_nano": - compiler_cfg.balancer_op_override("max_pool2d_630.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (1, 1)) + compiler_cfg.balancer_op_override( + "max_pool2d_630.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (1, 1) + ) elif variant == "yolox_tiny": - compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (1, 1)) + compiler_cfg.balancer_op_override( + "max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (1, 1) + ) elif variant == "yolox_s": - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4) + ) compiler_cfg.balancer_op_override("conv2d_33.dc.matmul.8", "t_stream_shape", (1, 1)) os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "4096" compiler_cfg.place_on_new_epoch("concatenate_1163.dc.sparse_matmul.11.lc2") - compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "grid_shape", (1, 2)) + compiler_cfg.balancer_op_override( + "max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "grid_shape", (1, 2) + ) elif variant == "yolox_m": compiler_cfg.place_on_new_epoch("conv2d_811.dc.matmul.8") - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 6)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (5, 1)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 6) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (5, 1) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4) + ) compiler_cfg.place_on_new_epoch("concatenate_1530.dc.sparse_matmul.11.lc2") os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "4096" - compiler_cfg.balancer_op_override("max_pool2d_671.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (169, 1)) + compiler_cfg.balancer_op_override( + "max_pool2d_671.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (169, 1) + ) elif variant == "yolox_l": os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "245760" @@ -75,48 +106,98 @@ def run_yolox_pytorch(variant): compiler_cfg.place_on_new_epoch("conv2d_1147.dc.matmul.11") elif variant == "yolox_x": - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (5, 1)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "t_stream_shape", (1, 4) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (5, 1) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 4) + ) compiler_cfg.place_on_new_epoch("concatenate_2264.dc.sparse_matmul.11.lc2") - compiler_cfg.balancer_op_override("max_pool2d_1104.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (13, 1)) + compiler_cfg.balancer_op_override( + "max_pool2d_1104.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (13, 1) + ) elif available_devices[0] == BackendDevice.Grayskull: if variant == "yolox_nano": - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 1) + ) os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "81920" - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.14.lc2", "t_stream_shape", (1, 13) + ) elif variant == "yolox_tiny": os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "81920" - compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (13, 1)) + compiler_cfg.balancer_op_override( + "max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (13, 1) + ) compiler_cfg.balancer_op_override("_fused_op_34", "t_stream_shape", (1, 1)) elif variant == "yolox_s": - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (10, 1)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5)) - compiler_cfg.balancer_op_override("max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (169, 1)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", "t_stream_shape", (10, 1) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "t_stream_shape", (1, 5) + ) + compiler_cfg.balancer_op_override( + "max_pool2d_454.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2", "t_stream_shape", (169, 1) + ) elif variant == "yolox_m": os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" os.environ["PYBUDA_FORK_JOIN_EXPAND_OUTPUT_BUFFERS"] = "1" os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" - compiler_cfg.balancer_op_override("concatenate_1530.dc.concatenate.7_to_concatenate_1530.dc.sparse_matmul.11.lc2_1_serialized_dram_queue.before_padded_node.nop_0", "grid_shape", (1, 1)) + compiler_cfg.balancer_op_override( + "concatenate_1530.dc.concatenate.7_to_concatenate_1530.dc.sparse_matmul.11.lc2_1_serialized_dram_queue.before_padded_node.nop_0", + "grid_shape", + (1, 1), + ) compiler_cfg.place_on_new_epoch("concatenate_1530.dc.sparse_matmul.11.lc2") - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5)) - compiler_cfg.balancer_op_override("conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5)) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.4.lc2", "grid_shape", (4, 2) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.5.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.3.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5) + ) + compiler_cfg.balancer_op_override( + "conv2d_7.dc.conv2d.1.dc.reshape.0.dc.sparse_matmul.10.lc2", "grid_shape", (1, 5) + ) compiler_cfg.place_on_new_epoch("max_pool2d_671.dc.sparse_matmul.5.dc.sparse_matmul.1.lc2") # prepare model diff --git a/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py b/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py index a0f172f3..c09472f2 100644 --- a/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py +++ b/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py @@ -4,13 +4,14 @@ # Phi2 Demo - Text Generation import os -import pybuda -from transformers import PhiForCausalLM, AutoTokenizer, PhiConfig +import pybuda from pybuda.transformers.pipeline import pipeline as pybuda_pipeline +from transformers import AutoTokenizer, PhiConfig, PhiForCausalLM + def run_phi2_causal_lm(batch_size=1): - os.environ["TT_BACKEND_TIMEOUT"] = '0' + os.environ["TT_BACKEND_TIMEOUT"] = "0" # Set PyBuda configurations compiler_cfg = pybuda.config._get_global_compiler_config() @@ -36,11 +37,7 @@ def run_phi2_causal_lm(batch_size=1): prompt = ["My name is Jim Keller and"] * batch_size # Initialize pipeline - text_generator = pybuda_pipeline( - "text-generation", - model=model, - tokenizer=tokenizer - ) + text_generator = pybuda_pipeline("text-generation", model=model, tokenizer=tokenizer) # Inference on TT device response = text_generator( @@ -54,7 +51,7 @@ def run_phi2_causal_lm(batch_size=1): no_repeat_ngram_size=5, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, - early_stopping=True + early_stopping=True, ) # Display Responses @@ -65,4 +62,4 @@ def run_phi2_causal_lm(batch_size=1): if __name__ == "__main__": - run_phi2_causal_lm() \ No newline at end of file + run_phi2_causal_lm() diff --git a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py index 96adf282..4c3722e1 100644 --- a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py +++ b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5.py @@ -4,13 +4,14 @@ # Qwen1.5-0.5B Demo - Text Generation import os -import pybuda -from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config +import pybuda from pybuda.transformers.pipeline import pipeline as pybuda_pipeline +from transformers import Qwen2Config, Qwen2ForCausalLM, Qwen2Tokenizer + def run_qwen1_5_causal_lm(batch_size=1): - os.environ["TT_BACKEND_TIMEOUT"] = '0' + os.environ["TT_BACKEND_TIMEOUT"] = "0" # Set PyBuda configurations compiler_cfg = pybuda.config._get_global_compiler_config() @@ -36,11 +37,7 @@ def run_qwen1_5_causal_lm(batch_size=1): prompt = ["My name is Jim Keller and"] * batch_size # Initialize pipeline - text_generator = pybuda_pipeline( - "text-generation", - model=model, - tokenizer=tokenizer - ) + text_generator = pybuda_pipeline("text-generation", model=model, tokenizer=tokenizer) # Inference on TT device response = text_generator( @@ -54,7 +51,7 @@ def run_qwen1_5_causal_lm(batch_size=1): no_repeat_ngram_size=5, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, - early_stopping=True + early_stopping=True, ) # Display Responses diff --git a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py index cbd4eeb3..5395be92 100644 --- a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py +++ b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py @@ -4,24 +4,26 @@ # Qwen1.5-0.5B-Chat Demo - Chat import os -import pybuda import re -from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config +import pybuda from pybuda.transformers.pipeline import pipeline as pybuda_pipeline +from transformers import Qwen2Config, Qwen2ForCausalLM, Qwen2Tokenizer + def parse_chat_completion(text: str): - pattern = r'<\|im_start\|>\s*(\w+)\s*([\s\S]*?)\s*(?:<\|im_end\|>|$)' + pattern = r"<\|im_start\|>\s*(\w+)\s*([\s\S]*?)\s*(?:<\|im_end\|>|$)" matches = re.findall(pattern, text, re.DOTALL) - + messages = [] for role, content in matches: messages.append({"role": role, "content": content.strip()}) - + return messages + def run_qwen1_5_chat(): - os.environ["TT_BACKEND_TIMEOUT"] = '0' + os.environ["TT_BACKEND_TIMEOUT"] = "0" # Set PyBuda configurations compiler_cfg = pybuda.config._get_global_compiler_config() @@ -47,27 +49,19 @@ def run_qwen1_5_chat(): batch_messages = [ [ {"role": "system", "content": "You are Jim Keller, the CEO of Tenstorrent"}, - {"role": "user", "content": "Introduce yourself please!"} + {"role": "user", "content": "Introduce yourself please!"}, ] ] batch_size = len(batch_messages) # Apply chat template to each batch chat_texts = [ - tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) + tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for messages in batch_messages[:batch_size] ] # Initialize pipeline - text_generator = pybuda_pipeline( - "text-generation", - model=model, - tokenizer=tokenizer - ) + text_generator = pybuda_pipeline("text-generation", model=model, tokenizer=tokenizer) # Inference on TT device responses = text_generator( @@ -81,18 +75,19 @@ def run_qwen1_5_chat(): no_repeat_ngram_size=5, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, - early_stopping=True + early_stopping=True, ) # Display Responses for batch_id in range(batch_size): print(f"Batch: {batch_id}") - raw_text = responses[batch_id][0]['generated_text'] + raw_text = responses[batch_id][0]["generated_text"] parsed_messages = parse_chat_completion(raw_text) for message in parsed_messages: print(f"{message['role'].capitalize()}: {message['content']}") print() + if __name__ == "__main__": - run_qwen1_5_chat() \ No newline at end of file + run_qwen1_5_chat() diff --git a/model_demos/tests/test_pytorch_phi2.py b/model_demos/tests/test_pytorch_phi2.py index 394fb41d..1602fa6b 100644 --- a/model_demos/tests/test_pytorch_phi2.py +++ b/model_demos/tests/test_pytorch_phi2.py @@ -5,6 +5,7 @@ from nlp_demos.phi2.pytorch_phi2_text_generation import run_phi2_causal_lm + @pytest.mark.phi2 def test_phi2_causal_lm_pytorch(clear_pybuda, test_device, batch_size): - run_phi2_causal_lm(batch_size=batch_size) \ No newline at end of file + run_phi2_causal_lm(batch_size=batch_size) diff --git a/model_demos/tests/test_pytorch_qwen1_5.py b/model_demos/tests/test_pytorch_qwen1_5.py index d1e94812..624555bb 100644 --- a/model_demos/tests/test_pytorch_qwen1_5.py +++ b/model_demos/tests/test_pytorch_qwen1_5.py @@ -3,8 +3,9 @@ import pytest -from nlp_demos.qwen1_5.pytorch_qwen1_5_chat import run_qwen1_5_chat from nlp_demos.qwen1_5.pytorch_qwen1_5 import run_qwen1_5_causal_lm +from nlp_demos.qwen1_5.pytorch_qwen1_5_chat import run_qwen1_5_chat + @pytest.mark.qwen1_5 def test_qwen1_5_causal_lm_pytorch(clear_pybuda, test_device, batch_size): diff --git a/model_demos/tests/test_pytorch_yolox.py b/model_demos/tests/test_pytorch_yolox.py index 41cb48e2..801d958b 100644 --- a/model_demos/tests/test_pytorch_yolox.py +++ b/model_demos/tests/test_pytorch_yolox.py @@ -1,4 +1,5 @@ import pytest + from cv_demos.yolo_x.pytorch_yolox import run_yolox_pytorch variants = ["yolox_nano", "yolox_tiny", "yolox_s", "yolox_m", "yolox_l", "yolox_darknet", "yolox_x"] From 92326ffafdeca0278250cddfc8c4cf229dbcdedf Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Mon, 9 Sep 2024 10:52:19 -0400 Subject: [PATCH 09/14] Update installation instructions with link to release tags --- first_5_steps/1_install_tt_buda.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/first_5_steps/1_install_tt_buda.md b/first_5_steps/1_install_tt_buda.md index 8dd3a971..3519b3bb 100644 --- a/first_5_steps/1_install_tt_buda.md +++ b/first_5_steps/1_install_tt_buda.md @@ -169,7 +169,7 @@ For example, to run on an Ubuntu version 20.04 on a Grayskull device, use this c sudo docker pull ghcr.io/tenstorrent/tt-buda/ubuntu-20-04-amd64/gs: ``` -where `` is the version number i.e. `v0.12.3`. +where `` is the release version number from: #### Step 2. Run the container From efaf36a546fcc172f95e3bd8839ed0dbc00af0ec Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Tue, 10 Sep 2024 13:42:01 +0000 Subject: [PATCH 10/14] Update installation instructions for configuring systems --- first_5_steps/1_install_tt_buda.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/first_5_steps/1_install_tt_buda.md b/first_5_steps/1_install_tt_buda.md index 3519b3bb..d738fe04 100644 --- a/first_5_steps/1_install_tt_buda.md +++ b/first_5_steps/1_install_tt_buda.md @@ -28,6 +28,7 @@ Once you have identified the release version you would like to install, you can 3. [Device Firmware Update](#device-firmware-update) 4. [Backend Compiler Dependencies](#backend-compiler-dependencies) 5. [TT-SMI](#tt-smi) + 6. [Topology (TT-LoudBox/TT-QuietBox Only)](#tt-topology-tt-loudboxtt-quietbox-systems-only) 2. [PyBuda Installation](#pybuda-installation) 1. [Python Environment Installation](#python-environment-installation) 2. [Docker Container Installation](#docker-container-installation) @@ -93,6 +94,10 @@ rm libyaml-cpp-dev_0.6.2-4ubuntu1_amd64.deb libyaml-cpp0.6_0.6.2-4ubuntu1_amd64. Please navigate to [tt-smi](https://github.com/tenstorrent/tt-smi) homepage and follow instructions within the README. +### TT-Topology (TT-LoudBox/TT-QuietBox Systems Only) + +If you are running on a TT-LoudBox or TT-QuietBox system, please navigate to [tt-topology](https://github.com/tenstorrent/tt-topology) homepage and follow instructions within the README. + ## PyBuda Installation There are two ways to install PyBuda within the host environment: using Python virtual environment or Docker container. From 0cea364ee8a42f0fe3169906dde373da82e7b5d8 Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Thu, 12 Sep 2024 14:31:28 +0000 Subject: [PATCH 11/14] Update models support table for 8/30 pybuda release --- model_demos/README.md | 120 +++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 59 deletions(-) diff --git a/model_demos/README.md b/model_demos/README.md index 10213de5..f28a790a 100644 --- a/model_demos/README.md +++ b/model_demos/README.md @@ -27,69 +27,71 @@ python cv_demos/resnet/pytorch_resnet.py ## Models Support Table -| **Model** | **e75** | **e150** | **n150** | **n300 (single-chip)** | **Supported Release** | -| --------------------------------------------------------- | :-----: | :------: | :------: | :--------------------: | :-------------------: | -| [ALBERT](nlp_demos/albert/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Autoencoder (convolutional)](cv_demos/conv_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Autoencoder (linear)](cv_demos/linear_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [BeiT](cv_demos/beit/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [BERT](nlp_demos/bert/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [CLIP](cv_demos/clip/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [CodeGen](nlp_demos/codegen/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [DeiT](cv_demos/deit/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [DenseNet](cv_demos/densenet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [DistilBERT](nlp_demos/distilbert/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [DLA](cv_demos/dla/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [DPR](nlp_demos/dpr/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [EfficientNet-Lite](cv_demos/efficientnet_lite/) | ✘ | ✘ | ✔️ | ✔️ | v0.12.3 | -| [Falcon-7B](nlp_demos/falcon/) | ✘ | ✘ | ✔️ | ✔️ | v0.18.2 | -| [FLAN-T5](nlp_demos/flant5/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| FPN | ✘ | ✘ | ✘ | ✘ | TBD | -| [Fuyu-8B](nlp_demos/fuyu8b/) | ✘ | ✘ | ✘ | ✘ | TBD | -| [GhostNet](cv_demos/ghostnet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [GoogLeNet](cv_demos/googlenet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [GPT-2](nlp_demos/gpt2/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [GPT Neo](nlp_demos/gptneo/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Hand Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | v0.12.3 | -| [HardNet](cv_demos/hardnet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [HRNet](cv_demos/hrnet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Inception-v4](cv_demos/inceptionv4/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [MLP-Mixer](cv_demos/mlpmixer/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [MobileNetSSD](cv_demos/mobilenet_ssd/) | ✘ | ✘ | ✔️ | ✔️ | v0.12.3 | -| [MobileNetV1](cv_demos/mobilenet_v1/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [MobileNetV2](cv_demos/mobilenet_v2/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [MobileNetV3](cv_demos/mobilenet_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Monodle](cv_demos/monodle/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [OpenPose](cv_demos/openpose/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [OPT](nlp_demos/opt/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Pose Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | v0.12.3 | -| [Perceiver IO](cv_demos/perceiverio/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [ResNet](cv_demos/resnet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [ResNeXt](cv_demos/resnext/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [RetinaNet](cv_demos/retinanet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [RoBERTa](nlp_demos/roberta/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [SegFormer](cv_demos/segformer/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [SqueezeBERT](nlp_demos/squeezebert/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [SSD300 ResNet50](cv_demos/ssd300_resnet50/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Stable Diffusion](cv_demos/stable_diffusion/) | ✘ | ✘ | ✔️ | ✔️ | v0.18.2 | -| [T5](nlp_demos/t5/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [U-Net](cv_demos/unet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [VGG](cv_demos/vgg/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [ViLT](cv_demos/vilt/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [ViT](cv_demos/vit/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [VoVNet](cv_demos/vovnet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [WideResNet](cv_demos/wideresnet/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Whisper](audio_demos/whisper/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [Xception](cv_demos/xception/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [XGLM](nlp_demos/xglm/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [YOLOv3](cv_demos/yolo_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [YOLOv5](cv_demos/yolo_v5/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | -| [YOLOv6](cv_demos/yolo_v6/) | ✔️ | ✔️ | ✔️ | ✔️ | v0.18.2 | +| **Model** | **e75** | **e150** | **n150** | **n300 (single-chip)** | **n300 (dual-chip)** | **Supported Release** | +| --------------------------------------------------------- | :-----: | :------: | :------: | :--------------------: | :-------------------: | :-------------------: | +| [ALBERT](nlp_demos/albert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Autoencoder (convolutional)](cv_demos/conv_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [Autoencoder (linear)](cv_demos/linear_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [BeiT](cv_demos/beit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [BERT](nlp_demos/bert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [CLIP](cv_demos/clip/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [CodeGen](nlp_demos/codegen/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [DeiT](cv_demos/deit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [DenseNet](cv_demos/densenet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [DistilBERT](nlp_demos/distilbert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [DLA](cv_demos/dla/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [DPR](nlp_demos/dpr/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [EfficientNet-Lite](cv_demos/efficientnet_lite/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Falcon-7B](nlp_demos/falcon/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [FLAN-T5](nlp_demos/flant5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Fuyu-8B](nlp_demos/fuyu8b/) | ✘ | ✘ | ✘ | ✘ | ✘ | TBD | +| [GhostNet](cv_demos/ghostnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [GoogLeNet](cv_demos/googlenet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [GPT-2](nlp_demos/gpt2/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [GPT Neo](nlp_demos/gptneo/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Hand Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [HardNet](cv_demos/hardnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [HRNet](cv_demos/hrnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [Inception-v4](cv_demos/inceptionv4/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [MLP-Mixer](cv_demos/mlpmixer/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [MobileNetSSD](cv_demos/mobilenet_ssd/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [MobileNetV1](cv_demos/mobilenet_v1/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [MobileNetV2](cv_demos/mobilenet_v2/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [MobileNetV3](cv_demos/mobilenet_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [Monodle](cv_demos/monodle/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [OpenPose](cv_demos/openpose/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [OPT](nlp_demos/opt/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Pose Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Perceiver IO](cv_demos/perceiverio/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [Phi2](nlp_demos/phi2/) | ✔️ | ✔️ | ✘ | ✘ | ✘ | v0.19.1 | +| [Qwen1.5](nlp_demos/qwen1_5/) | ✔️ | ✔️ | ✘ | ✘ | ✘ | v0.19.1 | +| [ResNet](cv_demos/resnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [ResNeXt](cv_demos/resnext/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [RetinaNet](cv_demos/retinanet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [RoBERTa](nlp_demos/roberta/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [SegFormer](cv_demos/segformer/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [SqueezeBERT](nlp_demos/squeezebert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [SSD300 ResNet50](cv_demos/ssd300_resnet50/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Stable Diffusion](cv_demos/stable_diffusion/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [T5](nlp_demos/t5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [U-Net](cv_demos/unet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [VGG](cv_demos/vgg/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [ViLT](cv_demos/vilt/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [ViT](cv_demos/vit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [VoVNet](cv_demos/vovnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [WideResNet](cv_demos/wideresnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [Whisper](audio_demos/whisper/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [Xception](cv_demos/xception/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [XGLM](nlp_demos/xglm/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [YOLOv3](cv_demos/yolo_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [YOLOv5](cv_demos/yolo_v5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [YOLOv6](cv_demos/yolo_v6/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [YOLOX](cv_demos/yolo_x/) | ✘ | ✘ | ✔️ | ✔️ | ✔️ | v0.19.1 | ### Legend - ✔️: Supported on the device -- ✘: Not supported on the device +- ✘: Not all variants supported on the device ## Note From 55e2f798e90b9796bb753f5ba7c6f15639c1715d Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Thu, 12 Sep 2024 14:51:39 +0000 Subject: [PATCH 12/14] Update batching for Qwen1.5 --- model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py index 5395be92..5c9cc249 100644 --- a/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py +++ b/model_demos/nlp_demos/qwen1_5/pytorch_qwen1_5_chat.py @@ -22,7 +22,7 @@ def parse_chat_completion(text: str): return messages -def run_qwen1_5_chat(): +def run_qwen1_5_chat(batch_size=1): os.environ["TT_BACKEND_TIMEOUT"] = "0" # Set PyBuda configurations @@ -51,8 +51,8 @@ def run_qwen1_5_chat(): {"role": "system", "content": "You are Jim Keller, the CEO of Tenstorrent"}, {"role": "user", "content": "Introduce yourself please!"}, ] + * batch_size ] - batch_size = len(batch_messages) # Apply chat template to each batch chat_texts = [ From 6309fd10ca52a918bf209e8a9a20675fa07e61ac Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Thu, 12 Sep 2024 14:51:59 +0000 Subject: [PATCH 13/14] Update batching for YOLOX --- model_demos/cv_demos/yolo_x/pytorch_yolox.py | 14 ++++++++++++-- model_demos/tests/test_pytorch_yolox.py | 9 ++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/model_demos/cv_demos/yolo_x/pytorch_yolox.py b/model_demos/cv_demos/yolo_x/pytorch_yolox.py index 0ea0ac6d..843bbc8c 100644 --- a/model_demos/cv_demos/yolo_x/pytorch_yolox.py +++ b/model_demos/cv_demos/yolo_x/pytorch_yolox.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-License-Identifier: Apache-2.0 + # yolox demo script import subprocess @@ -22,7 +25,7 @@ from yolox.utils import demo_postprocess, multiclass_nms -def run_yolox_pytorch(variant): +def run_yolox_pytorch(variant, batch_size=1): # Set PyBuda configuration parameters compiler_cfg = pybuda.config._get_global_compiler_config() @@ -234,11 +237,18 @@ def run_yolox_pytorch(variant): img, ratio = preprocess(img, input_shape) img_tensor = torch.from_numpy(img) img_tensor = img_tensor.unsqueeze(0) + batch_input = torch.cat([img_tensor] * batch_size, dim=0) # Run inference on Tenstorrent device - output_q = pybuda.run_inference(tt_model, inputs=[(img_tensor)]) + output_q = pybuda.run_inference(tt_model, inputs=[(batch_input)]) output = output_q.get() + # Combine outputs for data parallel runs + if os.environ.get("PYBUDA_N300_DATA_PARALLEL", "0") == "1": + concat_tensor = torch.cat((output[0].to_pytorch(), output[1].to_pytorch()), dim=0) + buda_tensor = pybuda.Tensor.create_from_torch(concat_tensor) + output = [buda_tensor] + # Post-processing for i in range(len(output)): output[i] = output[i].value().detach().float().numpy() diff --git a/model_demos/tests/test_pytorch_yolox.py b/model_demos/tests/test_pytorch_yolox.py index 801d958b..c26db035 100644 --- a/model_demos/tests/test_pytorch_yolox.py +++ b/model_demos/tests/test_pytorch_yolox.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-License-Identifier: Apache-2.0 + import pytest from cv_demos.yolo_x.pytorch_yolox import run_yolox_pytorch @@ -5,7 +8,7 @@ variants = ["yolox_nano", "yolox_tiny", "yolox_s", "yolox_m", "yolox_l", "yolox_darknet", "yolox_x"] -@pytest.mark.parametrize("variant", variants) +@pytest.mark.parametrize("variant", variants, ids=variants) @pytest.mark.yolox -def test_yolox_pytorch(variant, clear_pybuda, test_device): - run_yolox_pytorch(variant) +def test_yolox_pytorch(clear_pybuda, test_device, variant, batch_size): + run_yolox_pytorch(variant, batch_size=batch_size) From 1d039d19aaa4fc6770a2f808d27292e4405324e2 Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Wed, 18 Sep 2024 13:00:15 -0400 Subject: [PATCH 14/14] Update Model Support table --- model_demos/README.md | 114 +++++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/model_demos/README.md b/model_demos/README.md index f28a790a..b34b38cf 100644 --- a/model_demos/README.md +++ b/model_demos/README.md @@ -29,64 +29,64 @@ python cv_demos/resnet/pytorch_resnet.py | **Model** | **e75** | **e150** | **n150** | **n300 (single-chip)** | **n300 (dual-chip)** | **Supported Release** | | --------------------------------------------------------- | :-----: | :------: | :------: | :--------------------: | :-------------------: | :-------------------: | -| [ALBERT](nlp_demos/albert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [Autoencoder (convolutional)](cv_demos/conv_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [Autoencoder (linear)](cv_demos/linear_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [BeiT](cv_demos/beit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [BERT](nlp_demos/bert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [CLIP](cv_demos/clip/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [CodeGen](nlp_demos/codegen/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [DeiT](cv_demos/deit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [DenseNet](cv_demos/densenet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [DistilBERT](nlp_demos/distilbert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [DLA](cv_demos/dla/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [DPR](nlp_demos/dpr/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [EfficientNet-Lite](cv_demos/efficientnet_lite/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [Falcon-7B](nlp_demos/falcon/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [FLAN-T5](nlp_demos/flant5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | +| [ALBERT](nlp_demos/albert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [Autoencoder (convolutional)](cv_demos/conv_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [Autoencoder (linear)](cv_demos/linear_autoencoder/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [BeiT](cv_demos/beit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [BERT](nlp_demos/bert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [CLIP](cv_demos/clip/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [CodeGen](nlp_demos/codegen/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [DeiT](cv_demos/deit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [DenseNet](cv_demos/densenet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [DistilBERT](nlp_demos/distilbert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [DLA](cv_demos/dla/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [DPR](nlp_demos/dpr/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [EfficientNet-Lite](cv_demos/efficientnet_lite/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [Falcon-7B](nlp_demos/falcon/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [FLAN-T5](nlp_demos/flant5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | | [Fuyu-8B](nlp_demos/fuyu8b/) | ✘ | ✘ | ✘ | ✘ | ✘ | TBD | -| [GhostNet](cv_demos/ghostnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [GoogLeNet](cv_demos/googlenet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [GPT-2](nlp_demos/gpt2/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [GPT Neo](nlp_demos/gptneo/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [Hand Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [HardNet](cv_demos/hardnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [HRNet](cv_demos/hrnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [Inception-v4](cv_demos/inceptionv4/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [MLP-Mixer](cv_demos/mlpmixer/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [MobileNetSSD](cv_demos/mobilenet_ssd/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [MobileNetV1](cv_demos/mobilenet_v1/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [MobileNetV2](cv_demos/mobilenet_v2/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [MobileNetV3](cv_demos/mobilenet_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [Monodle](cv_demos/monodle/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [OpenPose](cv_demos/openpose/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [OPT](nlp_demos/opt/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [Pose Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [Perceiver IO](cv_demos/perceiverio/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [Phi2](nlp_demos/phi2/) | ✔️ | ✔️ | ✘ | ✘ | ✘ | v0.19.1 | -| [Qwen1.5](nlp_demos/qwen1_5/) | ✔️ | ✔️ | ✘ | ✘ | ✘ | v0.19.1 | -| [ResNet](cv_demos/resnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [ResNeXt](cv_demos/resnext/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [RetinaNet](cv_demos/retinanet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [RoBERTa](nlp_demos/roberta/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [SegFormer](cv_demos/segformer/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [SqueezeBERT](nlp_demos/squeezebert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [SSD300 ResNet50](cv_demos/ssd300_resnet50/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [Stable Diffusion](cv_demos/stable_diffusion/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [T5](nlp_demos/t5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [U-Net](cv_demos/unet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [VGG](cv_demos/vgg/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [ViLT](cv_demos/vilt/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [ViT](cv_demos/vit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [VoVNet](cv_demos/vovnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [WideResNet](cv_demos/wideresnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [Whisper](audio_demos/whisper/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [Xception](cv_demos/xception/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [XGLM](nlp_demos/xglm/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [YOLOv3](cv_demos/yolo_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [YOLOv5](cv_demos/yolo_v5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.1 | -| [YOLOv6](cv_demos/yolo_v6/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.1 | -| [YOLOX](cv_demos/yolo_x/) | ✘ | ✘ | ✔️ | ✔️ | ✔️ | v0.19.1 | +| [GhostNet](cv_demos/ghostnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [GoogLeNet](cv_demos/googlenet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [GPT-2](nlp_demos/gpt2/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [GPT Neo](nlp_demos/gptneo/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [Hand Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [HardNet](cv_demos/hardnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [HRNet](cv_demos/hrnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [Inception-v4](cv_demos/inceptionv4/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [MLP-Mixer](cv_demos/mlpmixer/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [MobileNetSSD](cv_demos/mobilenet_ssd/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [MobileNetV1](cv_demos/mobilenet_v1/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [MobileNetV2](cv_demos/mobilenet_v2/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [MobileNetV3](cv_demos/mobilenet_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [Monodle](cv_demos/monodle/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [OpenPose](cv_demos/openpose/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [OPT](nlp_demos/opt/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [Pose Landmark](cv_demos/landmark/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [Perceiver IO](cv_demos/perceiverio/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [Phi2](nlp_demos/phi2/) | ✔️ | ✔️ | ✘ | ✘ | ✘ | v0.19.3 | +| [Qwen1.5](nlp_demos/qwen1_5/) | ✔️ | ✔️ | ✘ | ✘ | ✘ | v0.19.3 | +| [ResNet](cv_demos/resnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [ResNeXt](cv_demos/resnext/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [RetinaNet](cv_demos/retinanet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [RoBERTa](nlp_demos/roberta/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [SegFormer](cv_demos/segformer/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [SqueezeBERT](nlp_demos/squeezebert/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [SSD300 ResNet50](cv_demos/ssd300_resnet50/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [Stable Diffusion](cv_demos/stable_diffusion/) | ✘ | ✘ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [T5](nlp_demos/t5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [U-Net](cv_demos/unet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [VGG](cv_demos/vgg/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [ViLT](cv_demos/vilt/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [ViT](cv_demos/vit/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [VoVNet](cv_demos/vovnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [WideResNet](cv_demos/wideresnet/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [Whisper](audio_demos/whisper/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [Xception](cv_demos/xception/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [XGLM](nlp_demos/xglm/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [YOLOv3](cv_demos/yolo_v3/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [YOLOv5](cv_demos/yolo_v5/) | ✔️ | ✔️ | ✔️ | ✔️ | ✘ | v0.19.3 | +| [YOLOv6](cv_demos/yolo_v6/) | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | v0.19.3 | +| [YOLOX](cv_demos/yolo_x/) | ✘ | ✘ | ✔️ | ✔️ | ✔️ | v0.19.3 | ### Legend