From 7a1d0b4b686404f1dd663a91e9cd5834278f7ad4 Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Thu, 18 Jul 2024 11:23:20 +0000 Subject: [PATCH] Code clean-up and refactor --- benchmark/models/bert/bert.py | 17 ----- benchmark/models/deit/deit.py | 23 ------- benchmark/models/falcon/utils/model.py | 3 +- benchmark/models/falcon/utils/pybudify.py | 65 ------------------- benchmark/models/flant5/flant5.py | 39 +---------- .../flant5/flant5_past_cache_enc_dec.py | 21 +----- benchmark/models/hrnet/hrnet.py | 29 +-------- benchmark/models/inception_v4/inception_v4.py | 10 --- benchmark/models/mobilenet_v1/mobilenet_v1.py | 33 ++-------- benchmark/models/mobilenet_v2/mobilenet_v2.py | 44 ++----------- benchmark/models/mobilenet_v3/mobilenet_v3.py | 12 ---- benchmark/models/open_pose/open_pose.py | 15 +---- benchmark/models/resnet/resnet.py | 23 ------- benchmark/models/t5/t5.py | 39 +---------- benchmark/models/t5/t5_past_cache_enc_dec.py | 18 ----- benchmark/models/unet/unet.py | 29 +-------- benchmark/models/vit/vit.py | 22 ------- benchmark/models/vovnet/vovnet_v2.py | 26 +------- benchmark/models/whisper/whisper_enc_dec.py | 16 +---- benchmark/models/yolo_v5/utils/common.py | 2 +- benchmark/models/yolo_v5/yolo_v5.py | 32 --------- 21 files changed, 24 insertions(+), 494 deletions(-) diff --git a/benchmark/models/bert/bert.py b/benchmark/models/bert/bert.py index f1b1f65..1dd3b6c 100644 --- a/benchmark/models/bert/bert.py +++ b/benchmark/models/bert/bert.py @@ -35,23 +35,6 @@ def bert(training: bool, task: str, config: str, microbatch: int, device: str, d if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - # compiler_cfg.enable_auto_transposing_placement = True - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - # if data_type == "Bfp8_b": - # if pybuda.detect_available_devices()[0] != BackendDevice.Grayskull: - # os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" - # os.environ["PYBUDA_EXP_APPROX"] = "1" - # pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - # Set model parameters based on chosen task and model configuration if task == "na": if config == "tiny": diff --git a/benchmark/models/deit/deit.py b/benchmark/models/deit/deit.py index 00e82b1..2942f49 100644 --- a/benchmark/models/deit/deit.py +++ b/benchmark/models/deit/deit.py @@ -40,29 +40,6 @@ def deit(training: bool, task: str, config: str, microbatch: int, device: str, d pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0" - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - - # if data_type == "Fp16_b": - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1" - - # if data_type == "Bfp8_b": - # os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" - # pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) - # os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0" - - # Set model parameters based on chosen task and model configuration img_res = 224 target_microbatch = 32 diff --git a/benchmark/models/falcon/utils/model.py b/benchmark/models/falcon/utils/model.py index 79ae610..fc47f12 100644 --- a/benchmark/models/falcon/utils/model.py +++ b/benchmark/models/falcon/utils/model.py @@ -14,6 +14,7 @@ from torch.nn import functional as F from transformers import AutoTokenizer from transformers.generation.utils import TopKLogitsWarper, TopPLogitsWarper + from benchmark.common.benchmark_run import OutputType from benchmark.models.falcon.utils.configuration_RW import RWConfig from benchmark.models.falcon.utils.pybudify import PyBudify @@ -337,7 +338,7 @@ def top_k_top_p_filtering( def sample_kp_logits(logits, k, p): - + next_token_logscores = top_k_top_p_filtering(logits, top_k=k, top_p=p) probs = F.softmax(next_token_logscores, dim=-1) next_token = torch.multinomial(probs, num_samples=1).squeeze(1) diff --git a/benchmark/models/falcon/utils/pybudify.py b/benchmark/models/falcon/utils/pybudify.py index 0610ce0..0e3e279 100644 --- a/benchmark/models/falcon/utils/pybudify.py +++ b/benchmark/models/falcon/utils/pybudify.py @@ -62,17 +62,6 @@ def __init__( os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"] = "0" # faster compile times... why would this ever be 1? os.environ["TT_BACKEND_TIMEOUT"] = "0" # default is too aggressive for large models? - # os.environ["PYBUDA_ENABLE_BROADCAST_SPLITTING"] = "1" - # os.environ["PYBUDA_DISABLE_FORK_JOIN_BUF"] = "1" - # os.environ["PYBUDA_DRAM_PICK_CAPACITY"] = "1" - # os.environ["WHA0_DISABLE_RELAY_BUFS"] = "1" - # os.environ["PYBUDA_FUSE_STOP_ON_RECIPROCAL"] = "1" - # os.environ["PYBUDA_PLACER_SNAKE"] = "1" Not what we want for dual chip placement - # os.environ["PYBUDA_DISABLE_INTERACTIVE_PLACER"] = "1" # Until interactive placer supports multi-chip placement overrides - # os.environ["PYBUDA_PLACER_SNAKE"] = "1" - # os.environ["PYBUDA_ETH_LINKS_NEBULA"] = "1" - # os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1" - if self.odkv or self.masked_odkv: os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1" # much better performance, not sure why? @@ -123,12 +112,6 @@ def __init__( }, ) - # pybuda.config.configure_mixed_precision( - # name_regex="matmul_.*", - # input_df={1: [pybuda.DataFormat.Bfp8_b, True]}) - - # pybuda.override_op_size('matmul_61', (1,2)) - if lora: os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "147456" @@ -300,21 +283,6 @@ def __init__( compiler_cfg.input_queues_on_host = host_queues if self.masked_odkv: - # print('masked_odkv') - - # compiler_cfg.enable_t_streaming = True - # compiler_cfg.manual_t_streaming = True - - # pybuda.config.override_t_stream_dir(f"concatenate_50.dc.sparse_matmul.4.lc2", "c") - # pybuda.config.override_t_stream_dir(f"concatenate_67.dc.sparse_matmul.4.lc2", "c") - - # import pdb; pdb.set_trace() - - # pybuda.config.set_epoch_break("transpose_58.dc.sparse_matmul.4.lc2") - - # pybuda.config.set_epoch_break("matmul_64") - - # pybuda.config.add_schedule_constraint(['transpose_58.dc.sparse_matmul.4.lc2', 'add_59']) if num_layers == 1: names = "input__56, input__57" @@ -329,28 +297,10 @@ def __init__( names_dict = {name: (i + 1) for i, name in enumerate(names)} compiler_cfg = pybuda.config._get_global_compiler_config() - - # pybuda.config.insert_fracture_group([(f"concatenate_50", 2, 2)]) - # pybuda.config.insert_fracture_group([(f"concatenate_67", 2, 2)]) - - # pybuda.config.configure_mixed_precision( - # name_regex="concatenate_50.dc.sparse_matmul.4.lc2", - # input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]}) - - # pybuda.config.configure_mixed_precision( - # name_regex="concatenate_50.dc.sparse_matmul.4.lc2", - # input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]}) - compiler_cfg.loopback_outputs = names_dict elif self.odkv: - # compiler_cfg.enable_t_streaming = True - # compiler_cfg.manual_t_streaming = True - - # pybuda.config.override_t_stream_dir(f"concatenate_50.dc.sparse_matmul.4.lc2", "c") - # pybuda.config.override_t_stream_dir(f"concatenate_67.dc.sparse_matmul.4.lc2", "c") - if num_layers == 1: names = "input__54, input__55" elif num_layers == 32: @@ -363,18 +313,6 @@ def __init__( names_dict = {name: (i + 1) for i, name in enumerate(names)} compiler_cfg = pybuda.config._get_global_compiler_config() - - # pybuda.config.insert_fracture_group([(f"concatenate_50", 2, 2)]) - # pybuda.config.insert_fracture_group([(f"concatenate_67", 2, 2)]) - - # pybuda.config.configure_mixed_precision( - # name_regex="concatenate_50.dc.sparse_matmul.4.lc2", - # input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]}) - - # pybuda.config.configure_mixed_precision( - # name_regex="concatenate_50.dc.sparse_matmul.4.lc2", - # input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]}) - compiler_cfg.loopback_outputs = names_dict else: compiler_cfg.enable_t_streaming = True @@ -504,8 +442,5 @@ def add_sched(self, pybuda, entries, exits, ops, factor, constr): print(f"[add_sched]: Override op spatial epoch: {fop}, chip {f}") pybuda.config.override_op_placement(fop, chip_id=f, spatial_epoch_break=True) constr.append(fop) - # for elem in exits: - # constr.append(elem) - # pybuda.config.override_op_placement(exits[0], temporal_epoch_break=True) print(f"[add_sched] sched: {constr}") return constr diff --git a/benchmark/models/flant5/flant5.py b/benchmark/models/flant5/flant5.py index 46465dc..c0edb65 100644 --- a/benchmark/models/flant5/flant5.py +++ b/benchmark/models/flant5/flant5.py @@ -24,7 +24,7 @@ def flant5(training: bool, task: str, config: str, microbatch: int, device: str, os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64" os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64" os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1" - + compiler_cfg = pybuda.config._get_global_compiler_config() compiler_cfg.enable_tvm_cpu_fallback = False compiler_cfg.default_df_override = pybuda._C.Float16_b @@ -32,43 +32,6 @@ def flant5(training: bool, task: str, config: str, microbatch: int, device: str, if pybuda.detect_available_devices()[0] == BackendDevice.Grayskull: compiler_cfg.enable_auto_fusing = False compiler_cfg.enable_amp_light() - # compiler_cfg.compile_subgraphs = True - # compiler_cfg.enable_link_past_cache_ios = True - - # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1" - # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" - # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64" - # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64" - # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1" - - # compiler_cfg = pybuda.config._get_global_compiler_config() - # compiler_cfg.enable_tvm_cpu_fallback = False - # compiler_cfg.default_df_override = pybuda._C.Float16_b - # compiler_cfg.default_dram_parameters = False - # compiler_cfg.enable_amp_light() - # compiler_cfg.compile_subgraphs = True - # compiler_cfg.enable_link_past_cache_ios = True - - # # Add PyBUDA configurations - # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1" - # os.environ["TT_BACKEND_MULTI_THREADED_PUSH"] = "1" - # os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1" - # os.environ["PYBUDA_EXTRA_L1_MARGIN"] = "120000" - # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" - # os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "35000" - # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64" - # os.environ["TT_BACKEND_PROFILER"] = "1" - # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64" - # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1" - # os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1" - - # compiler_cfg = pybuda.config._get_global_compiler_config() - # compiler_cfg.enable_t_streaming = True - # compiler_cfg.enable_tvm_cpu_fallback = False - # compiler_cfg.default_df_override = pybuda._C.Float16_b - # compiler_cfg.default_dram_parameters = False - # compiler_cfg.enable_auto_fusing = False - # compiler_cfg.enable_amp_light() # Set model parameters based on chosen task and model configuration if task in ["na", "text_classification", "text_summarization"]: diff --git a/benchmark/models/flant5/flant5_past_cache_enc_dec.py b/benchmark/models/flant5/flant5_past_cache_enc_dec.py index a68fccb..4f0b0db 100644 --- a/benchmark/models/flant5/flant5_past_cache_enc_dec.py +++ b/benchmark/models/flant5/flant5_past_cache_enc_dec.py @@ -59,8 +59,8 @@ def flant5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch if device == "tt": import pybuda - from pybuda.pybudaglobal import TILE_DIM from pybuda._C.backend_api import BackendDevice + from pybuda.pybudaglobal import TILE_DIM # ---------------------------------------------------------------------------------------- # # flan-T5, START @@ -76,28 +76,9 @@ def flant5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" # These are about to be enabled by default. - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" os.environ["PYBUDA_EXP_APPROX"] = "1" - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - # os.environ["PYBUDA_EXP_APPROX"] = "1" - - # if data_type == "Bfp8_b": - # pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) - # ---------------------------------------------------------------------------------------- # # flan-T5, END # ---------------------------------------------------------------------------------------- # diff --git a/benchmark/models/hrnet/hrnet.py b/benchmark/models/hrnet/hrnet.py index 50fff28..7726802 100644 --- a/benchmark/models/hrnet/hrnet.py +++ b/benchmark/models/hrnet/hrnet.py @@ -43,7 +43,7 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str, os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139 + os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139 os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" # These are about to be enabled by default. @@ -57,31 +57,6 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str, if compiler_cfg.balancer_policy == "Ribbon": compiler_cfg.enable_amp_light() - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139 - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - # if data_type == "Fp16_b": - # # Hangs with autotranspose on #2542 - # compiler_cfg.enable_auto_transposing_placement = False - - # # Manually enable amp light for Ribbon - # if compiler_cfg.balancer_policy == "Ribbon": - # compiler_cfg.enable_amp_light() - - # Set model parameters based on chosen task and model configuration img_res = 224 target_microbatch = 32 @@ -108,7 +83,7 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str, available_devices = pybuda.detect_available_devices() if available_devices: if available_devices[0] == BackendDevice.Grayskull: - pybuda.config._internal_insert_fj_buffering_nop('add_312', ['add_341'], nop_count=2) + pybuda.config._internal_insert_fj_buffering_nop("add_312", ["add_341"], nop_count=2) pybuda.config.set_epoch_break("resize2d_3176.dc.sparse_matmul.3.lc2") else: raise RuntimeError("Unknown config") diff --git a/benchmark/models/inception_v4/inception_v4.py b/benchmark/models/inception_v4/inception_v4.py index f6acd3e..1b76535 100644 --- a/benchmark/models/inception_v4/inception_v4.py +++ b/benchmark/models/inception_v4/inception_v4.py @@ -30,16 +30,6 @@ def inception_v4(training: bool, task: str, config: str, microbatch: int, device os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - if config == "224": model_name = "inception_v4" img_res = 224 # https://github.com/huggingface/pytorch-image-models/blob/main/train.py#L122 diff --git a/benchmark/models/mobilenet_v1/mobilenet_v1.py b/benchmark/models/mobilenet_v1/mobilenet_v1.py index c295230..285808e 100644 --- a/benchmark/models/mobilenet_v1/mobilenet_v1.py +++ b/benchmark/models/mobilenet_v1/mobilenet_v1.py @@ -15,7 +15,7 @@ def mobilenetv1(training: bool, task: str, config: str, microbatch: int, device: str, data_type: str): if device == "tt": - + import pybuda from pybuda._C.backend_api import BackendDevice @@ -44,34 +44,9 @@ def mobilenetv1(training: bool, task: str, config: str, microbatch: int, device: pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b) pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b) pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2) - pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2) - - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - - # if data_type == "Fp16_b": - # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "40" - # os.environ["PYBUDA_TEMP_DISABLE_MODEL_KB_PROLOGUE_BW"] = "1" - - # if data_type == "Bfp8_b": - # os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0" - # pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2) - # pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2) + pybuda.config.configure_mixed_precision( + op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2 + ) # Set model parameters based on chosen task and model configuration if config == "192": diff --git a/benchmark/models/mobilenet_v2/mobilenet_v2.py b/benchmark/models/mobilenet_v2/mobilenet_v2.py index 9dad7a5..1fc12b2 100644 --- a/benchmark/models/mobilenet_v2/mobilenet_v2.py +++ b/benchmark/models/mobilenet_v2/mobilenet_v2.py @@ -17,7 +17,7 @@ def mobilenetv2(training: bool, task: str, config: str, microbatch: int, device: if device == "tt": import pybuda from pybuda._C.backend_api import BackendDevice - + compiler_cfg = pybuda.config._get_global_compiler_config() compiler_cfg.enable_auto_transposing_placement = True @@ -34,7 +34,6 @@ def mobilenetv2(training: bool, task: str, config: str, microbatch: int, device: os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" # These are about to be enabled by default. - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" if data_type == "Fp16_b": @@ -44,45 +43,16 @@ def mobilenetv2(training: bool, task: str, config: str, microbatch: int, device: pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b) pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b) pybuda.config.configure_mixed_precision( - op_type="depthwise", - input_df={1: (pybuda.DataFormat.Float16_b, False),}, - output_df=pybuda.DataFormat.Float16_b, - math_fidelity=pybuda.MathFidelity.HiFi2 + op_type="depthwise", + input_df={ + 1: (pybuda.DataFormat.Float16_b, False), + }, + output_df=pybuda.DataFormat.Float16_b, + math_fidelity=pybuda.MathFidelity.HiFi2, ) pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2) pybuda.config.configure_mixed_precision(op_type="matmul", math_fidelity=pybuda.MathFidelity.HiFi2) - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - - # if data_type == "Fp16_b": - # os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" - - # if data_type == "Bfp8_b": - # pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision( - # op_type="depthwise", - # input_df={1: (pybuda.DataFormat.Float16_b, False),}, - # output_df=pybuda.DataFormat.Float16_b, - # math_fidelity=pybuda.MathFidelity.HiFi2 - # ) - # pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2) - # pybuda.config.configure_mixed_precision(op_type="matmul", math_fidelity=pybuda.MathFidelity.HiFi2) - # Set model parameters based on chosen task and model configuration if config == "224": model_name = "google/mobilenet_v2_1.0_224" diff --git a/benchmark/models/mobilenet_v3/mobilenet_v3.py b/benchmark/models/mobilenet_v3/mobilenet_v3.py index c8081d2..af49de5 100644 --- a/benchmark/models/mobilenet_v3/mobilenet_v3.py +++ b/benchmark/models/mobilenet_v3/mobilenet_v3.py @@ -36,18 +36,6 @@ def mobilenetv3(training: bool, task: str, config: str, microbatch: int, device: os.environ["PYBUDA_BALANCER_PREPASS_DISABLED"] = "1" os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" - # os.environ["PYBUDA_BALANCER_PREPASS_DISABLED"] = "1" - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - # Set model parameters based on chosen task and model configuration if config == "sm": model_name = "hf_hub:timm/mobilenetv3_small_100.lamb_in1k" diff --git a/benchmark/models/open_pose/open_pose.py b/benchmark/models/open_pose/open_pose.py index 6101fcf..5ac6a9f 100644 --- a/benchmark/models/open_pose/open_pose.py +++ b/benchmark/models/open_pose/open_pose.py @@ -20,7 +20,7 @@ def open_pose(training: bool, task: str, config: str, microbatch: int, device: s if device == "tt": import pybuda from pybuda._C.backend_api import BackendDevice - + compiler_cfg = pybuda.config._get_global_compiler_config() compiler_cfg.enable_auto_transposing_placement = True @@ -34,19 +34,6 @@ def open_pose(training: bool, task: str, config: str, microbatch: int, device: s os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "13" os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Fp16" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "13" - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - # Set model parameters based on chosen task and model configuration model_name = "" img_res = 224 diff --git a/benchmark/models/resnet/resnet.py b/benchmark/models/resnet/resnet.py index 1bcef67..52197e1 100644 --- a/benchmark/models/resnet/resnet.py +++ b/benchmark/models/resnet/resnet.py @@ -44,29 +44,6 @@ def resnet(training: bool, task: str, config: str, microbatch: int, device: str, if data_type == "Bfp8_b": pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b) - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - # os.environ["PYBUDA_ALLOW_MULTICOLUMN_SPARSE_MATMUL"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - - # if data_type == "Fp16_b": - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1" - - # if data_type == "Bfp8_b": - # pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b) - # Set model parameters based on chosen task and model configuration if config == "resnet18": model_name = "microsoft/resnet-18" diff --git a/benchmark/models/t5/t5.py b/benchmark/models/t5/t5.py index d8d6f30..511cdcf 100644 --- a/benchmark/models/t5/t5.py +++ b/benchmark/models/t5/t5.py @@ -24,7 +24,7 @@ def t5(training: bool, task: str, config: str, microbatch: int, device: str, dat os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64" os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64" os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1" - + compiler_cfg = pybuda.config._get_global_compiler_config() compiler_cfg.enable_t_streaming = True compiler_cfg.enable_tvm_cpu_fallback = False @@ -33,43 +33,6 @@ def t5(training: bool, task: str, config: str, microbatch: int, device: str, dat if pybuda.detect_available_devices()[0] == BackendDevice.Grayskull: compiler_cfg.enable_auto_fusing = False compiler_cfg.enable_amp_light() - # compiler_cfg.compile_subgraphs = True - # compiler_cfg.enable_link_past_cache_ios = True - - # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1" - # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" - # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64" - # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64" - # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1" - - # compiler_cfg = pybuda.config._get_global_compiler_config() - # compiler_cfg.enable_tvm_cpu_fallback = False - # compiler_cfg.default_df_override = pybuda._C.Float16_b - # compiler_cfg.default_dram_parameters = False - # compiler_cfg.enable_amp_light() - # compiler_cfg.compile_subgraphs = True - # compiler_cfg.enable_link_past_cache_ios = True - - # # Add PyBUDA configurations - # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1" - # os.environ["TT_BACKEND_MULTI_THREADED_PUSH"] = "1" - # os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1" - # os.environ["PYBUDA_EXTRA_L1_MARGIN"] = "120000" - # os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" - # os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "26000" - # os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64" - # os.environ["TT_BACKEND_PROFILER"] = "1" - # os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64" - # os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1" - # os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1" - - # compiler_cfg = pybuda.config._get_global_compiler_config() - # compiler_cfg.enable_t_streaming = True - # compiler_cfg.enable_tvm_cpu_fallback = False - # compiler_cfg.default_df_override = pybuda._C.Float16_b - # compiler_cfg.default_dram_parameters = False - # compiler_cfg.enable_auto_fusing = False - # compiler_cfg.enable_amp_light() # Set model parameters based on chosen task and model configuration if task in ["na", "text_classification", "text_summarization"]: diff --git a/benchmark/models/t5/t5_past_cache_enc_dec.py b/benchmark/models/t5/t5_past_cache_enc_dec.py index 4dcf913..fd88ec3 100644 --- a/benchmark/models/t5/t5_past_cache_enc_dec.py +++ b/benchmark/models/t5/t5_past_cache_enc_dec.py @@ -85,24 +85,6 @@ def t5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch: in pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b) pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - # os.environ["PYBUDA_EXP_APPROX"] = "1" - - # if data_type == "Bfp8_b": - # pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b) - # pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) - # ---------------------------------------------------------------------------------------- # # T5, END # ---------------------------------------------------------------------------------------- # diff --git a/benchmark/models/unet/unet.py b/benchmark/models/unet/unet.py index 7c94822..080fe4c 100644 --- a/benchmark/models/unet/unet.py +++ b/benchmark/models/unet/unet.py @@ -15,7 +15,7 @@ def unet(training: bool, task: str, config: str, microbatch: int, device: str, data_type: str): if device == "tt": - + import pybuda from pybuda._C.backend_api import BackendDevice @@ -39,33 +39,8 @@ def unet(training: bool, task: str, config: str, microbatch: int, device: str, d os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60" # These are about to be enabled by default. - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - # compiler_cfg.enable_tvm_constant_prop = True - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # # Manually enable amp light for Ribbon - # if compiler_cfg.balancer_policy == "Ribbon": - # compiler_cfg.enable_amp_light() - - # os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" - # os.environ["PYBUDA_ALLOW_MULTICOLUMN_SPARSE_MATMUL"] = "1" - # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - - # Set model parameters based on chosen task and model configuration if config == "256": model = torch.hub.load( @@ -124,7 +99,7 @@ def eval_fn(**kwargs): n_samples=n_samples, ) - def collate_fn(batch): + def collate_fn(batch): # Separate inputs and labels inputs = [item[0] for item in batch] labels = [item[1] for item in batch] diff --git a/benchmark/models/vit/vit.py b/benchmark/models/vit/vit.py index b988bdb..cff8616 100644 --- a/benchmark/models/vit/vit.py +++ b/benchmark/models/vit/vit.py @@ -40,28 +40,6 @@ def vit(training: bool, task: str, config: str, microbatch: int, device: str, da pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0" - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - - # if data_type == "Fp16_b": - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1" - - # if data_type == "Bfp8_b": - # os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" - # pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b) - # os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0" - # Set model parameters based on chosen task and model configuration img_res = 224 target_microbatch = 32 diff --git a/benchmark/models/vovnet/vovnet_v2.py b/benchmark/models/vovnet/vovnet_v2.py index 1b055c2..cf5b488 100644 --- a/benchmark/models/vovnet/vovnet_v2.py +++ b/benchmark/models/vovnet/vovnet_v2.py @@ -24,7 +24,7 @@ def vovnet_v2(training: bool, task: str, config: str, microbatch: int, device: s available_devices = pybuda.detect_available_devices() if available_devices[0] != BackendDevice.Grayskull: compiler_cfg.enable_auto_transposing_placement = True - + if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" @@ -37,34 +37,10 @@ def vovnet_v2(training: bool, task: str, config: str, microbatch: int, device: s os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60" # These are about to be enabled by default. - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" if config == "39" and data_type != "Bfp8_b": compiler_cfg.enable_amp_light() - - # available_devices = pybuda.detect_available_devices() - # if available_devices[0] != BackendDevice.Grayskull: - # compiler_cfg.enable_auto_transposing_placement = True - - # if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # os.environ["PYBUDA_ALLOW_MULTICOLUMN_SPARSE_MATMUL"] = "1" - # os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" - # os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "60" - - # # These are about to be enabled by default. - # # - # os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1" - - # if config == "39" and data_type != "Bfp8_b": - # compiler_cfg.enable_amp_light() # Set model parameters based on chosen task and model configuration img_res = 224 diff --git a/benchmark/models/whisper/whisper_enc_dec.py b/benchmark/models/whisper/whisper_enc_dec.py index 9541517..49e2c57 100644 --- a/benchmark/models/whisper/whisper_enc_dec.py +++ b/benchmark/models/whisper/whisper_enc_dec.py @@ -29,27 +29,13 @@ def whisper_enc_dec(training: bool, task: str, config: str, microbatch: int, dev if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - + available_devices = pybuda.detect_available_devices() if available_devices: if available_devices[0] == BackendDevice.Grayskull: pybuda.config.set_epoch_break("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2") pybuda.config.override_op_size("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", (1, 12)) - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # available_devices = pybuda.detect_available_devices() - # if available_devices: - # if available_devices[0] == BackendDevice.Grayskull: - # pybuda.config.set_epoch_break("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2") - # pybuda.config.override_op_size("conv2d_9.dc.sparse_matmul.9.dc.sparse_matmul.1.lc2", (1, 12)) - # Determine model variant if config == "small": variant = "openai/whisper-small" diff --git a/benchmark/models/yolo_v5/utils/common.py b/benchmark/models/yolo_v5/utils/common.py index 5bcc8dd..d71907b 100644 --- a/benchmark/models/yolo_v5/utils/common.py +++ b/benchmark/models/yolo_v5/utils/common.py @@ -5,9 +5,9 @@ import cv2 import numpy as np +import PIL import torch import yolov5 -import PIL from PIL import Image from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval diff --git a/benchmark/models/yolo_v5/yolo_v5.py b/benchmark/models/yolo_v5/yolo_v5.py index 9e63ec7..64ccf9a 100644 --- a/benchmark/models/yolo_v5/yolo_v5.py +++ b/benchmark/models/yolo_v5/yolo_v5.py @@ -49,38 +49,6 @@ def yolo_v5(training: bool, task: str, config: str, microbatch: int, device: str compiler_cfg.enable_auto_fusing = False # required to fix accuracy os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1" - - # compiler_cfg.enable_auto_transposing_placement = True - - # if compiler_cfg.balancer_policy == "default": - # compiler_cfg.balancer_policy = "Ribbon" - # os.environ["PYBUDA_RIBBON2"] = "1" - - # from pybuda._C.backend_api import BackendDevice - # available_devices = pybuda.detect_available_devices() - - # if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0: - # os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1" - # os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1" - - # # Temp perf workaround for tenstorrent/bbe#2595 - # os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1" - - # if data_type == "Fp16_b": - # if available_devices[0] != BackendDevice.Grayskull: - # os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1" - - # if data_type == "Bfp8_b": - # os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" - # # Temp workaround for tenstorrent/bbe#2595, output BW is unpredictable. - # os.environ["PYBUDA_DISABLE_STREAM_OUTPUT"] = "1" - - # if available_devices[0] == BackendDevice.Grayskull: - # compiler_cfg.enable_tm_cpu_fallback = True - # compiler_cfg.enable_tm_cpu_fallback = True - # compiler_cfg.enable_auto_fusing = False # required to fix accuracy - # os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1" - # Set model parameters based on chosen task and model configuration if config == "s": # Load model