From 7a1d0b4b686404f1dd663a91e9cd5834278f7ad4 Mon Sep 17 00:00:00 2001
From: Milan Kordic <mkordic@tenstorrent.com>
Date: Thu, 18 Jul 2024 11:23:20 +0000
Subject: [PATCH] Code clean-up and refactor

---
 benchmark/models/bert/bert.py                 | 17 -----
 benchmark/models/deit/deit.py                 | 23 -------
 benchmark/models/falcon/utils/model.py        |  3 +-
 benchmark/models/falcon/utils/pybudify.py     | 65 -------------------
 benchmark/models/flant5/flant5.py             | 39 +----------
 .../flant5/flant5_past_cache_enc_dec.py       | 21 +-----
 benchmark/models/hrnet/hrnet.py               | 29 +--------
 benchmark/models/inception_v4/inception_v4.py | 10 ---
 benchmark/models/mobilenet_v1/mobilenet_v1.py | 33 ++--------
 benchmark/models/mobilenet_v2/mobilenet_v2.py | 44 ++-----------
 benchmark/models/mobilenet_v3/mobilenet_v3.py | 12 ----
 benchmark/models/open_pose/open_pose.py       | 15 +----
 benchmark/models/resnet/resnet.py             | 23 -------
 benchmark/models/t5/t5.py                     | 39 +----------
 benchmark/models/t5/t5_past_cache_enc_dec.py  | 18 -----
 benchmark/models/unet/unet.py                 | 29 +--------
 benchmark/models/vit/vit.py                   | 22 -------
 benchmark/models/vovnet/vovnet_v2.py          | 26 +-------
 benchmark/models/whisper/whisper_enc_dec.py   | 16 +----
 benchmark/models/yolo_v5/utils/common.py      |  2 +-
 benchmark/models/yolo_v5/yolo_v5.py           | 32 ---------
 21 files changed, 24 insertions(+), 494 deletions(-)

diff --git a/benchmark/models/bert/bert.py b/benchmark/models/bert/bert.py
index f1b1f65..1dd3b6c 100644
--- a/benchmark/models/bert/bert.py
+++ b/benchmark/models/bert/bert.py
@@ -35,23 +35,6 @@ def bert(training: bool, task: str, config: str, microbatch: int, device: str, d
         if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
             os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-        #     os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-        #     os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-        #     if data_type == "Bfp8_b":
-        #         if pybuda.detect_available_devices()[0] != BackendDevice.Grayskull:
-        #             os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
-        #         os.environ["PYBUDA_EXP_APPROX"] = "1"
-        #         pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
-        #         pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b)
-        #         pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
     # Set model parameters based on chosen task and model configuration
     if task == "na":
         if config == "tiny":
diff --git a/benchmark/models/deit/deit.py b/benchmark/models/deit/deit.py
index 00e82b1..2942f49 100644
--- a/benchmark/models/deit/deit.py
+++ b/benchmark/models/deit/deit.py
@@ -40,29 +40,6 @@ def deit(training: bool, task: str, config: str, microbatch: int, device: str, d
             pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
             os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-
-        # if data_type == "Fp16_b":
-        #     os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1"
-
-        # if data_type == "Bfp8_b":
-        #     os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
-        #     pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
-        #     os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"
-
-
     # Set model parameters based on chosen task and model configuration
     img_res = 224
     target_microbatch = 32
diff --git a/benchmark/models/falcon/utils/model.py b/benchmark/models/falcon/utils/model.py
index 79ae610..fc47f12 100644
--- a/benchmark/models/falcon/utils/model.py
+++ b/benchmark/models/falcon/utils/model.py
@@ -14,6 +14,7 @@
 from torch.nn import functional as F
 from transformers import AutoTokenizer
 from transformers.generation.utils import TopKLogitsWarper, TopPLogitsWarper
+
 from benchmark.common.benchmark_run import OutputType
 from benchmark.models.falcon.utils.configuration_RW import RWConfig
 from benchmark.models.falcon.utils.pybudify import PyBudify
@@ -337,7 +338,7 @@ def top_k_top_p_filtering(
 
 
 def sample_kp_logits(logits, k, p):
-    
+
     next_token_logscores = top_k_top_p_filtering(logits, top_k=k, top_p=p)
     probs = F.softmax(next_token_logscores, dim=-1)
     next_token = torch.multinomial(probs, num_samples=1).squeeze(1)
diff --git a/benchmark/models/falcon/utils/pybudify.py b/benchmark/models/falcon/utils/pybudify.py
index 0610ce0..0e3e279 100644
--- a/benchmark/models/falcon/utils/pybudify.py
+++ b/benchmark/models/falcon/utils/pybudify.py
@@ -62,17 +62,6 @@ def __init__(
             os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"] = "0"  # faster compile times... why would this ever be 1?
             os.environ["TT_BACKEND_TIMEOUT"] = "0"  # default is too aggressive for large models?
 
-            # os.environ["PYBUDA_ENABLE_BROADCAST_SPLITTING"] = "1"
-            # os.environ["PYBUDA_DISABLE_FORK_JOIN_BUF"] = "1"
-            # os.environ["PYBUDA_DRAM_PICK_CAPACITY"] = "1"
-            # os.environ["WHA0_DISABLE_RELAY_BUFS"] = "1"
-            # os.environ["PYBUDA_FUSE_STOP_ON_RECIPROCAL"] = "1"
-            # os.environ["PYBUDA_PLACER_SNAKE"] = "1" Not what we want for dual chip placement
-            # os.environ["PYBUDA_DISABLE_INTERACTIVE_PLACER"] = "1" # Until interactive placer supports multi-chip placement overrides
-            # os.environ["PYBUDA_PLACER_SNAKE"] = "1"
-            # os.environ["PYBUDA_ETH_LINKS_NEBULA"] = "1"
-            # os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1"
-
             if self.odkv or self.masked_odkv:
                 os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1"  # much better performance, not sure why?
 
@@ -123,12 +112,6 @@ def __init__(
                     },
                 )
 
-            # pybuda.config.configure_mixed_precision(
-            #     name_regex="matmul_.*",
-            #     input_df={1: [pybuda.DataFormat.Bfp8_b, True]})
-
-            # pybuda.override_op_size('matmul_61', (1,2))
-
             if lora:
                 os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "147456"
 
@@ -300,21 +283,6 @@ def __init__(
             compiler_cfg.input_queues_on_host = host_queues
 
             if self.masked_odkv:
-                # print('masked_odkv')
-
-                # compiler_cfg.enable_t_streaming = True
-                # compiler_cfg.manual_t_streaming = True
-
-                # pybuda.config.override_t_stream_dir(f"concatenate_50.dc.sparse_matmul.4.lc2", "c")
-                # pybuda.config.override_t_stream_dir(f"concatenate_67.dc.sparse_matmul.4.lc2", "c")
-
-                # import pdb; pdb.set_trace()
-
-                # pybuda.config.set_epoch_break("transpose_58.dc.sparse_matmul.4.lc2")
-
-                # pybuda.config.set_epoch_break("matmul_64")
-
-                # pybuda.config.add_schedule_constraint(['transpose_58.dc.sparse_matmul.4.lc2', 'add_59'])
 
                 if num_layers == 1:
                     names = "input__56, input__57"
@@ -329,28 +297,10 @@ def __init__(
                 names_dict = {name: (i + 1) for i, name in enumerate(names)}
 
                 compiler_cfg = pybuda.config._get_global_compiler_config()
-
-                # pybuda.config.insert_fracture_group([(f"concatenate_50", 2, 2)])
-                # pybuda.config.insert_fracture_group([(f"concatenate_67", 2, 2)])
-
-                # pybuda.config.configure_mixed_precision(
-                #     name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
-                #     input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})
-
-                # pybuda.config.configure_mixed_precision(
-                #     name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
-                #     input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})
-
                 compiler_cfg.loopback_outputs = names_dict
 
             elif self.odkv:
 
-                # compiler_cfg.enable_t_streaming = True
-                # compiler_cfg.manual_t_streaming = True
-
-                # pybuda.config.override_t_stream_dir(f"concatenate_50.dc.sparse_matmul.4.lc2", "c")
-                # pybuda.config.override_t_stream_dir(f"concatenate_67.dc.sparse_matmul.4.lc2", "c")
-
                 if num_layers == 1:
                     names = "input__54, input__55"
                 elif num_layers == 32:
@@ -363,18 +313,6 @@ def __init__(
                 names_dict = {name: (i + 1) for i, name in enumerate(names)}
 
                 compiler_cfg = pybuda.config._get_global_compiler_config()
-
-                # pybuda.config.insert_fracture_group([(f"concatenate_50", 2, 2)])
-                # pybuda.config.insert_fracture_group([(f"concatenate_67", 2, 2)])
-
-                # pybuda.config.configure_mixed_precision(
-                #     name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
-                #     input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})
-
-                # pybuda.config.configure_mixed_precision(
-                #     name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
-                #     input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})
-
                 compiler_cfg.loopback_outputs = names_dict
             else:
                 compiler_cfg.enable_t_streaming = True
@@ -504,8 +442,5 @@ def add_sched(self, pybuda, entries, exits, ops, factor, constr):
                             print(f"[add_sched]: Override op spatial epoch: {fop}, chip {f}")
                             pybuda.config.override_op_placement(fop, chip_id=f, spatial_epoch_break=True)
                     constr.append(fop)
-        # for elem in exits:
-        # constr.append(elem)
-        # pybuda.config.override_op_placement(exits[0], temporal_epoch_break=True)
         print(f"[add_sched] sched: {constr}")
         return constr
diff --git a/benchmark/models/flant5/flant5.py b/benchmark/models/flant5/flant5.py
index 46465dc..c0edb65 100644
--- a/benchmark/models/flant5/flant5.py
+++ b/benchmark/models/flant5/flant5.py
@@ -24,7 +24,7 @@ def flant5(training: bool, task: str, config: str, microbatch: int, device: str,
         os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
         os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
         os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"
-        
+
         compiler_cfg = pybuda.config._get_global_compiler_config()
         compiler_cfg.enable_tvm_cpu_fallback = False
         compiler_cfg.default_df_override = pybuda._C.Float16_b
@@ -32,43 +32,6 @@ def flant5(training: bool, task: str, config: str, microbatch: int, device: str,
         if pybuda.detect_available_devices()[0] == BackendDevice.Grayskull:
             compiler_cfg.enable_auto_fusing = False
         compiler_cfg.enable_amp_light()
-        # compiler_cfg.compile_subgraphs = True
-        # compiler_cfg.enable_link_past_cache_ios = True
-
-        # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
-        # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1"
-        # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
-        # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
-        # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"
-        
-        # compiler_cfg = pybuda.config._get_global_compiler_config()
-        # compiler_cfg.enable_tvm_cpu_fallback = False
-        # compiler_cfg.default_df_override = pybuda._C.Float16_b
-        # compiler_cfg.default_dram_parameters = False
-        # compiler_cfg.enable_amp_light()
-        # compiler_cfg.compile_subgraphs = True
-        # compiler_cfg.enable_link_past_cache_ios = True
-
-        # # Add PyBUDA configurations
-        # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
-        # os.environ["TT_BACKEND_MULTI_THREADED_PUSH"] = "1"
-        # os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1"
-        # os.environ["PYBUDA_EXTRA_L1_MARGIN"] = "120000"
-        # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1"
-        # os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "35000"
-        # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
-        # os.environ["TT_BACKEND_PROFILER"] = "1"
-        # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
-        # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"
-        # os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1"
-
-        # compiler_cfg = pybuda.config._get_global_compiler_config()
-        # compiler_cfg.enable_t_streaming = True
-        # compiler_cfg.enable_tvm_cpu_fallback = False
-        # compiler_cfg.default_df_override = pybuda._C.Float16_b
-        # compiler_cfg.default_dram_parameters = False
-        # compiler_cfg.enable_auto_fusing = False
-        # compiler_cfg.enable_amp_light()
 
     # Set model parameters based on chosen task and model configuration
     if task in ["na", "text_classification", "text_summarization"]:
diff --git a/benchmark/models/flant5/flant5_past_cache_enc_dec.py b/benchmark/models/flant5/flant5_past_cache_enc_dec.py
index a68fccb..4f0b0db 100644
--- a/benchmark/models/flant5/flant5_past_cache_enc_dec.py
+++ b/benchmark/models/flant5/flant5_past_cache_enc_dec.py
@@ -59,8 +59,8 @@ def flant5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch
 
     if device == "tt":
         import pybuda
-        from pybuda.pybudaglobal import TILE_DIM
         from pybuda._C.backend_api import BackendDevice
+        from pybuda.pybudaglobal import TILE_DIM
 
         # ---------------------------------------------------------------------------------------- #
         # flan-T5, START
@@ -76,28 +76,9 @@ def flant5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch
             os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
 
         # These are about to be enabled by default.
-        #
         os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
         os.environ["PYBUDA_EXP_APPROX"] = "1"
 
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-        # os.environ["PYBUDA_EXP_APPROX"] = "1"
-        
-        # if data_type == "Bfp8_b":
-        #     pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
-
         # ---------------------------------------------------------------------------------------- #
         # flan-T5, END
         # ---------------------------------------------------------------------------------------- #
diff --git a/benchmark/models/hrnet/hrnet.py b/benchmark/models/hrnet/hrnet.py
index 50fff28..7726802 100644
--- a/benchmark/models/hrnet/hrnet.py
+++ b/benchmark/models/hrnet/hrnet.py
@@ -43,7 +43,7 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
             os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
             os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
 
-        os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139
+        os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46"  # removing causes hang #2139
         os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
 
         # These are about to be enabled by default.
@@ -57,31 +57,6 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
         if compiler_cfg.balancer_policy == "Ribbon":
             compiler_cfg.enable_amp_light()
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139
-        # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-        # if data_type == "Fp16_b":
-        #     # Hangs with autotranspose on #2542
-        #     compiler_cfg.enable_auto_transposing_placement = False
-
-        # # Manually enable amp light for Ribbon
-        # if compiler_cfg.balancer_policy == "Ribbon":
-        #     compiler_cfg.enable_amp_light()
-
-
     # Set model parameters based on chosen task and model configuration
     img_res = 224
     target_microbatch = 32
@@ -108,7 +83,7 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
         available_devices = pybuda.detect_available_devices()
         if available_devices:
             if available_devices[0] == BackendDevice.Grayskull:
-                pybuda.config._internal_insert_fj_buffering_nop('add_312', ['add_341'], nop_count=2)
+                pybuda.config._internal_insert_fj_buffering_nop("add_312", ["add_341"], nop_count=2)
                 pybuda.config.set_epoch_break("resize2d_3176.dc.sparse_matmul.3.lc2")
     else:
         raise RuntimeError("Unknown config")
diff --git a/benchmark/models/inception_v4/inception_v4.py b/benchmark/models/inception_v4/inception_v4.py
index f6acd3e..1b76535 100644
--- a/benchmark/models/inception_v4/inception_v4.py
+++ b/benchmark/models/inception_v4/inception_v4.py
@@ -30,16 +30,6 @@ def inception_v4(training: bool, task: str, config: str, microbatch: int, device
             os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
             os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
     if config == "224":
         model_name = "inception_v4"
         img_res = 224  # https://github.com/huggingface/pytorch-image-models/blob/main/train.py#L122
diff --git a/benchmark/models/mobilenet_v1/mobilenet_v1.py b/benchmark/models/mobilenet_v1/mobilenet_v1.py
index c295230..285808e 100644
--- a/benchmark/models/mobilenet_v1/mobilenet_v1.py
+++ b/benchmark/models/mobilenet_v1/mobilenet_v1.py
@@ -15,7 +15,7 @@
 def mobilenetv1(training: bool, task: str, config: str, microbatch: int, device: str, data_type: str):
 
     if device == "tt":
-        
+
         import pybuda
         from pybuda._C.backend_api import BackendDevice
 
@@ -44,34 +44,9 @@ def mobilenetv1(training: bool, task: str, config: str, microbatch: int, device:
             pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
             pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
             pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
-            pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2)
-
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-
-        # if data_type == "Fp16_b":
-        #     os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "40"
-        #     os.environ["PYBUDA_TEMP_DISABLE_MODEL_KB_PROLOGUE_BW"] = "1"
-
-        # if data_type == "Bfp8_b":
-        #     os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"
-        #     pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
-        #     pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2)
+            pybuda.config.configure_mixed_precision(
+                op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2
+            )
 
     # Set model parameters based on chosen task and model configuration
     if config == "192":
diff --git a/benchmark/models/mobilenet_v2/mobilenet_v2.py b/benchmark/models/mobilenet_v2/mobilenet_v2.py
index 9dad7a5..1fc12b2 100644
--- a/benchmark/models/mobilenet_v2/mobilenet_v2.py
+++ b/benchmark/models/mobilenet_v2/mobilenet_v2.py
@@ -17,7 +17,7 @@ def mobilenetv2(training: bool, task: str, config: str, microbatch: int, device:
     if device == "tt":
         import pybuda
         from pybuda._C.backend_api import BackendDevice
-    
+
         compiler_cfg = pybuda.config._get_global_compiler_config()
         compiler_cfg.enable_auto_transposing_placement = True
 
@@ -34,7 +34,6 @@ def mobilenetv2(training: bool, task: str, config: str, microbatch: int, device:
         os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
 
         # These are about to be enabled by default.
-        #
         os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
 
         if data_type == "Fp16_b":
@@ -44,45 +43,16 @@ def mobilenetv2(training: bool, task: str, config: str, microbatch: int, device:
             pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
             pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
             pybuda.config.configure_mixed_precision(
-                op_type="depthwise", 
-                input_df={1: (pybuda.DataFormat.Float16_b, False),}, 
-                output_df=pybuda.DataFormat.Float16_b, 
-                math_fidelity=pybuda.MathFidelity.HiFi2
+                op_type="depthwise",
+                input_df={
+                    1: (pybuda.DataFormat.Float16_b, False),
+                },
+                output_df=pybuda.DataFormat.Float16_b,
+                math_fidelity=pybuda.MathFidelity.HiFi2,
             )
             pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
             pybuda.config.configure_mixed_precision(op_type="matmul", math_fidelity=pybuda.MathFidelity.HiFi2)
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-
-        # if data_type == "Fp16_b":
-        #     os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1"
-
-        # if data_type == "Bfp8_b":
-        #     pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(
-        #         op_type="depthwise", 
-        #         input_df={1: (pybuda.DataFormat.Float16_b, False),}, 
-        #         output_df=pybuda.DataFormat.Float16_b, 
-        #         math_fidelity=pybuda.MathFidelity.HiFi2
-        #     )
-        #     pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
-        #     pybuda.config.configure_mixed_precision(op_type="matmul", math_fidelity=pybuda.MathFidelity.HiFi2)
-
     # Set model parameters based on chosen task and model configuration
     if config == "224":
         model_name = "google/mobilenet_v2_1.0_224"
diff --git a/benchmark/models/mobilenet_v3/mobilenet_v3.py b/benchmark/models/mobilenet_v3/mobilenet_v3.py
index c8081d2..af49de5 100644
--- a/benchmark/models/mobilenet_v3/mobilenet_v3.py
+++ b/benchmark/models/mobilenet_v3/mobilenet_v3.py
@@ -36,18 +36,6 @@ def mobilenetv3(training: bool, task: str, config: str, microbatch: int, device:
         os.environ["PYBUDA_BALANCER_PREPASS_DISABLED"] = "1"
         os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
 
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-            
-        # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1"
-        # os.environ["PYBUDA_BALANCER_PREPASS_DISABLED"] = "1"
-        # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-
     # Set model parameters based on chosen task and model configuration
     if config == "sm":
         model_name = "hf_hub:timm/mobilenetv3_small_100.lamb_in1k"
diff --git a/benchmark/models/open_pose/open_pose.py b/benchmark/models/open_pose/open_pose.py
index 6101fcf..5ac6a9f 100644
--- a/benchmark/models/open_pose/open_pose.py
+++ b/benchmark/models/open_pose/open_pose.py
@@ -20,7 +20,7 @@ def open_pose(training: bool, task: str, config: str, microbatch: int, device: s
     if device == "tt":
         import pybuda
         from pybuda._C.backend_api import BackendDevice
-        
+
         compiler_cfg = pybuda.config._get_global_compiler_config()
         compiler_cfg.enable_auto_transposing_placement = True
 
@@ -34,19 +34,6 @@ def open_pose(training: bool, task: str, config: str, microbatch: int, device: s
         os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "13"
         os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1" 
-
-        # if data_type == "Fp16" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "13"
-        # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-
         # Set model parameters based on chosen task and model configuration
         model_name = ""
         img_res = 224
diff --git a/benchmark/models/resnet/resnet.py b/benchmark/models/resnet/resnet.py
index 1bcef67..52197e1 100644
--- a/benchmark/models/resnet/resnet.py
+++ b/benchmark/models/resnet/resnet.py
@@ -44,29 +44,6 @@ def resnet(training: bool, task: str, config: str, microbatch: int, device: str,
         if data_type == "Bfp8_b":
             pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-        # os.environ["PYBUDA_ALLOW_MULTICOLUMN_SPARSE_MATMUL"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-
-        # if data_type == "Fp16_b":
-        #     os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1"
-
-        # if data_type == "Bfp8_b":
-        #     pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
-
     # Set model parameters based on chosen task and model configuration
     if config == "resnet18":
         model_name = "microsoft/resnet-18"
diff --git a/benchmark/models/t5/t5.py b/benchmark/models/t5/t5.py
index d8d6f30..511cdcf 100644
--- a/benchmark/models/t5/t5.py
+++ b/benchmark/models/t5/t5.py
@@ -24,7 +24,7 @@ def t5(training: bool, task: str, config: str, microbatch: int, device: str, dat
         os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
         os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
         os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"
-        
+
         compiler_cfg = pybuda.config._get_global_compiler_config()
         compiler_cfg.enable_t_streaming = True
         compiler_cfg.enable_tvm_cpu_fallback = False
@@ -33,43 +33,6 @@ def t5(training: bool, task: str, config: str, microbatch: int, device: str, dat
         if pybuda.detect_available_devices()[0] == BackendDevice.Grayskull:
             compiler_cfg.enable_auto_fusing = False
         compiler_cfg.enable_amp_light()
-        # compiler_cfg.compile_subgraphs = True
-        # compiler_cfg.enable_link_past_cache_ios = True
-
-        # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
-        # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1"
-        # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
-        # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
-        # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"
-        
-        # compiler_cfg = pybuda.config._get_global_compiler_config()
-        # compiler_cfg.enable_tvm_cpu_fallback = False
-        # compiler_cfg.default_df_override = pybuda._C.Float16_b
-        # compiler_cfg.default_dram_parameters = False
-        # compiler_cfg.enable_amp_light()
-        # compiler_cfg.compile_subgraphs = True
-        # compiler_cfg.enable_link_past_cache_ios = True
-
-        # # Add PyBUDA configurations
-        # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
-        # os.environ["TT_BACKEND_MULTI_THREADED_PUSH"] = "1"
-        # os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1"
-        # os.environ["PYBUDA_EXTRA_L1_MARGIN"] = "120000"
-        # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1"
-        # os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "26000"
-        # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
-        # os.environ["TT_BACKEND_PROFILER"] = "1"
-        # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
-        # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"
-        # os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1"
-
-        # compiler_cfg = pybuda.config._get_global_compiler_config()
-        # compiler_cfg.enable_t_streaming = True
-        # compiler_cfg.enable_tvm_cpu_fallback = False
-        # compiler_cfg.default_df_override = pybuda._C.Float16_b
-        # compiler_cfg.default_dram_parameters = False
-        # compiler_cfg.enable_auto_fusing = False
-        # compiler_cfg.enable_amp_light()
 
     # Set model parameters based on chosen task and model configuration
     if task in ["na", "text_classification", "text_summarization"]:
diff --git a/benchmark/models/t5/t5_past_cache_enc_dec.py b/benchmark/models/t5/t5_past_cache_enc_dec.py
index 4dcf913..fd88ec3 100644
--- a/benchmark/models/t5/t5_past_cache_enc_dec.py
+++ b/benchmark/models/t5/t5_past_cache_enc_dec.py
@@ -85,24 +85,6 @@ def t5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch: in
             pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b)
             pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
 
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-        # os.environ["PYBUDA_EXP_APPROX"] = "1"
-
-        # if data_type == "Bfp8_b":
-        #     pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b)
-        #     pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
-
         # ---------------------------------------------------------------------------------------- #
         # T5, END
         # ---------------------------------------------------------------------------------------- #
diff --git a/benchmark/models/unet/unet.py b/benchmark/models/unet/unet.py
index 7c94822..080fe4c 100644
--- a/benchmark/models/unet/unet.py
+++ b/benchmark/models/unet/unet.py
@@ -15,7 +15,7 @@
 def unet(training: bool, task: str, config: str, microbatch: int, device: str, data_type: str):
 
     if device == "tt":
-        
+
         import pybuda
         from pybuda._C.backend_api import BackendDevice
 
@@ -39,33 +39,8 @@ def unet(training: bool, task: str, config: str, microbatch: int, device: str, d
         os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60"
 
         # These are about to be enabled by default.
-        #
         os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
 
-        # compiler_cfg.enable_tvm_constant_prop = True
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # # Manually enable amp light for Ribbon
-        # if compiler_cfg.balancer_policy == "Ribbon":
-        #     compiler_cfg.enable_amp_light()
-
-        # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
-        # os.environ["PYBUDA_ALLOW_MULTICOLUMN_SPARSE_MATMUL"] = "1"
-        # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-
-
     # Set model parameters based on chosen task and model configuration
     if config == "256":
         model = torch.hub.load(
@@ -124,7 +99,7 @@ def eval_fn(**kwargs):
             n_samples=n_samples,
         )
 
-        def collate_fn(batch):            
+        def collate_fn(batch):
             # Separate inputs and labels
             inputs = [item[0] for item in batch]
             labels = [item[1] for item in batch]
diff --git a/benchmark/models/vit/vit.py b/benchmark/models/vit/vit.py
index b988bdb..cff8616 100644
--- a/benchmark/models/vit/vit.py
+++ b/benchmark/models/vit/vit.py
@@ -40,28 +40,6 @@ def vit(training: bool, task: str, config: str, microbatch: int, device: str, da
             pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
             os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"
 
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-
-        # if data_type == "Fp16_b":
-        #     os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1"
-
-        # if data_type == "Bfp8_b":
-        #     os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
-        #     pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
-        #     os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"
-
     # Set model parameters based on chosen task and model configuration
     img_res = 224
     target_microbatch = 32
diff --git a/benchmark/models/vovnet/vovnet_v2.py b/benchmark/models/vovnet/vovnet_v2.py
index 1b055c2..cf5b488 100644
--- a/benchmark/models/vovnet/vovnet_v2.py
+++ b/benchmark/models/vovnet/vovnet_v2.py
@@ -24,7 +24,7 @@ def vovnet_v2(training: bool, task: str, config: str, microbatch: int, device: s
         available_devices = pybuda.detect_available_devices()
         if available_devices[0] != BackendDevice.Grayskull:
             compiler_cfg.enable_auto_transposing_placement = True
-            
+
         if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
             os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
             os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
@@ -37,34 +37,10 @@ def vovnet_v2(training: bool, task: str, config: str, microbatch: int, device: s
         os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60"
 
         # These are about to be enabled by default.
-        #
         os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
 
         if config == "39" and data_type != "Bfp8_b":
             compiler_cfg.enable_amp_light()
-        
-        # available_devices = pybuda.detect_available_devices()
-        # if available_devices[0] != BackendDevice.Grayskull:
-        #     compiler_cfg.enable_auto_transposing_placement = True
-            
-        # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # os.environ["PYBUDA_ALLOW_MULTICOLUMN_SPARSE_MATMUL"] = "1"
-        # os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
-        # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60"
-
-        # # These are about to be enabled by default.
-        # #
-        # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
-
-        # if config == "39" and data_type != "Bfp8_b":
-        #     compiler_cfg.enable_amp_light()
 
     # Set model parameters based on chosen task and model configuration
     img_res = 224
diff --git a/benchmark/models/whisper/whisper_enc_dec.py b/benchmark/models/whisper/whisper_enc_dec.py
index 9541517..49e2c57 100644
--- a/benchmark/models/whisper/whisper_enc_dec.py
+++ b/benchmark/models/whisper/whisper_enc_dec.py
@@ -29,27 +29,13 @@ def whisper_enc_dec(training: bool, task: str, config: str, microbatch: int, dev
     if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
         os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
         os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-    
+
     available_devices = pybuda.detect_available_devices()
     if available_devices:
         if available_devices[0] == BackendDevice.Grayskull:
             pybuda.config.set_epoch_break("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2")
             pybuda.config.override_op_size("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", (1, 12))
 
-    # if compiler_cfg.balancer_policy == "default":
-    #     compiler_cfg.balancer_policy = "Ribbon"
-    #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-    # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-    #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-    #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-    
-    # available_devices = pybuda.detect_available_devices()
-    # if available_devices:
-    #     if available_devices[0] == BackendDevice.Grayskull:
-    #         pybuda.config.set_epoch_break("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2")
-    #         pybuda.config.override_op_size("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", (1, 12))
-
     # Determine model variant
     if config == "small":
         variant = "openai/whisper-small"
diff --git a/benchmark/models/yolo_v5/utils/common.py b/benchmark/models/yolo_v5/utils/common.py
index 5bcc8dd..d71907b 100644
--- a/benchmark/models/yolo_v5/utils/common.py
+++ b/benchmark/models/yolo_v5/utils/common.py
@@ -5,9 +5,9 @@
 
 import cv2
 import numpy as np
+import PIL
 import torch
 import yolov5
-import PIL
 from PIL import Image
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
diff --git a/benchmark/models/yolo_v5/yolo_v5.py b/benchmark/models/yolo_v5/yolo_v5.py
index 9e63ec7..64ccf9a 100644
--- a/benchmark/models/yolo_v5/yolo_v5.py
+++ b/benchmark/models/yolo_v5/yolo_v5.py
@@ -49,38 +49,6 @@ def yolo_v5(training: bool, task: str, config: str, microbatch: int, device: str
             compiler_cfg.enable_auto_fusing = False  # required to fix accuracy
             os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1"
 
-
-        # compiler_cfg.enable_auto_transposing_placement = True
-
-        # if compiler_cfg.balancer_policy == "default":
-        #     compiler_cfg.balancer_policy = "Ribbon"
-        #     os.environ["PYBUDA_RIBBON2"] = "1"
-
-        # from pybuda._C.backend_api import BackendDevice
-        # available_devices = pybuda.detect_available_devices()
-
-        # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
-        #     os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
-        #     os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"
-
-        # # Temp perf workaround for tenstorrent/bbe#2595
-        # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
-
-        # if data_type == "Fp16_b":
-        #     if available_devices[0] != BackendDevice.Grayskull:
-        #         os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
-
-        # if data_type == "Bfp8_b":
-        #     os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1"
-        #     # Temp workaround for tenstorrent/bbe#2595, output BW is unpredictable.
-        #     os.environ["PYBUDA_DISABLE_STREAM_OUTPUT"] = "1"
-
-        # if available_devices[0] == BackendDevice.Grayskull:
-        #     compiler_cfg.enable_tm_cpu_fallback = True
-        #     compiler_cfg.enable_tm_cpu_fallback = True
-        #     compiler_cfg.enable_auto_fusing = False  # required to fix accuracy
-        #     os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1"
-
     # Set model parameters based on chosen task and model configuration
     if config == "s":
         # Load model