Skip to content

Commit

Permalink
Code clean-up and refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
milank94 committed Jul 18, 2024
1 parent 7c72848 commit 7a1d0b4
Show file tree
Hide file tree
Showing 21 changed files with 24 additions and 494 deletions.
17 changes: 0 additions & 17 deletions benchmark/models/bert/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,6 @@ def bert(training: bool, task: str, config: str, microbatch: int, device: str, d
if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# compiler_cfg.enable_auto_transposing_placement = True
# if compiler_cfg.balancer_policy == "default":
# compiler_cfg.balancer_policy = "Ribbon"
# os.environ["PYBUDA_RIBBON2"] = "1"
# os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
# os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"
# if data_type == "Bfp8_b":
# if pybuda.detect_available_devices()[0] != BackendDevice.Grayskull:
# os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
# os.environ["PYBUDA_EXP_APPROX"] = "1"
# pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
# pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b)
# pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)

# if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
# os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# Set model parameters based on chosen task and model configuration
if task == "na":
if config == "tiny":
Expand Down
23 changes: 0 additions & 23 deletions benchmark/models/deit/deit.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,29 +40,6 @@ def deit(training: bool, task: str, config: str, microbatch: int, device: str, d
pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"

# compiler_cfg.enable_auto_transposing_placement = True

# if compiler_cfg.balancer_policy == "default":
# compiler_cfg.balancer_policy = "Ribbon"
# os.environ["PYBUDA_RIBBON2"] = "1"

# if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
# os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
# os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# # These are about to be enabled by default.
# #
# os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"

# if data_type == "Fp16_b":
# os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1"

# if data_type == "Bfp8_b":
# os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
# pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
# os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"


# Set model parameters based on chosen task and model configuration
img_res = 224
target_microbatch = 32
Expand Down
3 changes: 2 additions & 1 deletion benchmark/models/falcon/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from torch.nn import functional as F
from transformers import AutoTokenizer
from transformers.generation.utils import TopKLogitsWarper, TopPLogitsWarper

from benchmark.common.benchmark_run import OutputType
from benchmark.models.falcon.utils.configuration_RW import RWConfig
from benchmark.models.falcon.utils.pybudify import PyBudify
Expand Down Expand Up @@ -337,7 +338,7 @@ def top_k_top_p_filtering(


def sample_kp_logits(logits, k, p):

next_token_logscores = top_k_top_p_filtering(logits, top_k=k, top_p=p)
probs = F.softmax(next_token_logscores, dim=-1)
next_token = torch.multinomial(probs, num_samples=1).squeeze(1)
Expand Down
65 changes: 0 additions & 65 deletions benchmark/models/falcon/utils/pybudify.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,6 @@ def __init__(
os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"] = "0" # faster compile times... why would this ever be 1?
os.environ["TT_BACKEND_TIMEOUT"] = "0" # default is too aggressive for large models?

# os.environ["PYBUDA_ENABLE_BROADCAST_SPLITTING"] = "1"
# os.environ["PYBUDA_DISABLE_FORK_JOIN_BUF"] = "1"
# os.environ["PYBUDA_DRAM_PICK_CAPACITY"] = "1"
# os.environ["WHA0_DISABLE_RELAY_BUFS"] = "1"
# os.environ["PYBUDA_FUSE_STOP_ON_RECIPROCAL"] = "1"
# os.environ["PYBUDA_PLACER_SNAKE"] = "1" Not what we want for dual chip placement
# os.environ["PYBUDA_DISABLE_INTERACTIVE_PLACER"] = "1" # Until interactive placer supports multi-chip placement overrides
# os.environ["PYBUDA_PLACER_SNAKE"] = "1"
# os.environ["PYBUDA_ETH_LINKS_NEBULA"] = "1"
# os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1"

if self.odkv or self.masked_odkv:
os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1" # much better performance, not sure why?

Expand Down Expand Up @@ -123,12 +112,6 @@ def __init__(
},
)

# pybuda.config.configure_mixed_precision(
# name_regex="matmul_.*",
# input_df={1: [pybuda.DataFormat.Bfp8_b, True]})

# pybuda.override_op_size('matmul_61', (1,2))

if lora:
os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "147456"

Expand Down Expand Up @@ -300,21 +283,6 @@ def __init__(
compiler_cfg.input_queues_on_host = host_queues

if self.masked_odkv:
# print('masked_odkv')

# compiler_cfg.enable_t_streaming = True
# compiler_cfg.manual_t_streaming = True

# pybuda.config.override_t_stream_dir(f"concatenate_50.dc.sparse_matmul.4.lc2", "c")
# pybuda.config.override_t_stream_dir(f"concatenate_67.dc.sparse_matmul.4.lc2", "c")

# import pdb; pdb.set_trace()

# pybuda.config.set_epoch_break("transpose_58.dc.sparse_matmul.4.lc2")

# pybuda.config.set_epoch_break("matmul_64")

# pybuda.config.add_schedule_constraint(['transpose_58.dc.sparse_matmul.4.lc2', 'add_59'])

if num_layers == 1:
names = "input__56, input__57"
Expand All @@ -329,28 +297,10 @@ def __init__(
names_dict = {name: (i + 1) for i, name in enumerate(names)}

compiler_cfg = pybuda.config._get_global_compiler_config()

# pybuda.config.insert_fracture_group([(f"concatenate_50", 2, 2)])
# pybuda.config.insert_fracture_group([(f"concatenate_67", 2, 2)])

# pybuda.config.configure_mixed_precision(
# name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
# input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})

# pybuda.config.configure_mixed_precision(
# name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
# input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})

compiler_cfg.loopback_outputs = names_dict

elif self.odkv:

# compiler_cfg.enable_t_streaming = True
# compiler_cfg.manual_t_streaming = True

# pybuda.config.override_t_stream_dir(f"concatenate_50.dc.sparse_matmul.4.lc2", "c")
# pybuda.config.override_t_stream_dir(f"concatenate_67.dc.sparse_matmul.4.lc2", "c")

if num_layers == 1:
names = "input__54, input__55"
elif num_layers == 32:
Expand All @@ -363,18 +313,6 @@ def __init__(
names_dict = {name: (i + 1) for i, name in enumerate(names)}

compiler_cfg = pybuda.config._get_global_compiler_config()

# pybuda.config.insert_fracture_group([(f"concatenate_50", 2, 2)])
# pybuda.config.insert_fracture_group([(f"concatenate_67", 2, 2)])

# pybuda.config.configure_mixed_precision(
# name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
# input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})

# pybuda.config.configure_mixed_precision(
# name_regex="concatenate_50.dc.sparse_matmul.4.lc2",
# input_df={0: [pybuda.DataFormat.Bfp8_b, True], 1: [pybuda.DataFormat.Bfp8_b, True], 2: [pybuda.DataFormat.Bfp8_b, True]})

compiler_cfg.loopback_outputs = names_dict
else:
compiler_cfg.enable_t_streaming = True
Expand Down Expand Up @@ -504,8 +442,5 @@ def add_sched(self, pybuda, entries, exits, ops, factor, constr):
print(f"[add_sched]: Override op spatial epoch: {fop}, chip {f}")
pybuda.config.override_op_placement(fop, chip_id=f, spatial_epoch_break=True)
constr.append(fop)
# for elem in exits:
# constr.append(elem)
# pybuda.config.override_op_placement(exits[0], temporal_epoch_break=True)
print(f"[add_sched] sched: {constr}")
return constr
39 changes: 1 addition & 38 deletions benchmark/models/flant5/flant5.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,51 +24,14 @@ def flant5(training: bool, task: str, config: str, microbatch: int, device: str,
os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"

compiler_cfg = pybuda.config._get_global_compiler_config()
compiler_cfg.enable_tvm_cpu_fallback = False
compiler_cfg.default_df_override = pybuda._C.Float16_b
compiler_cfg.default_dram_parameters = False
if pybuda.detect_available_devices()[0] == BackendDevice.Grayskull:
compiler_cfg.enable_auto_fusing = False
compiler_cfg.enable_amp_light()
# compiler_cfg.compile_subgraphs = True
# compiler_cfg.enable_link_past_cache_ios = True

# os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
# os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1"
# os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
# os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
# os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"

# compiler_cfg = pybuda.config._get_global_compiler_config()
# compiler_cfg.enable_tvm_cpu_fallback = False
# compiler_cfg.default_df_override = pybuda._C.Float16_b
# compiler_cfg.default_dram_parameters = False
# compiler_cfg.enable_amp_light()
# compiler_cfg.compile_subgraphs = True
# compiler_cfg.enable_link_past_cache_ios = True

# # Add PyBUDA configurations
# os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
# os.environ["TT_BACKEND_MULTI_THREADED_PUSH"] = "1"
# os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1"
# os.environ["PYBUDA_EXTRA_L1_MARGIN"] = "120000"
# os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1"
# os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "35000"
# os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
# os.environ["TT_BACKEND_PROFILER"] = "1"
# os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
# os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"
# os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1"

# compiler_cfg = pybuda.config._get_global_compiler_config()
# compiler_cfg.enable_t_streaming = True
# compiler_cfg.enable_tvm_cpu_fallback = False
# compiler_cfg.default_df_override = pybuda._C.Float16_b
# compiler_cfg.default_dram_parameters = False
# compiler_cfg.enable_auto_fusing = False
# compiler_cfg.enable_amp_light()

# Set model parameters based on chosen task and model configuration
if task in ["na", "text_classification", "text_summarization"]:
Expand Down
21 changes: 1 addition & 20 deletions benchmark/models/flant5/flant5_past_cache_enc_dec.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def flant5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch

if device == "tt":
import pybuda
from pybuda.pybudaglobal import TILE_DIM
from pybuda._C.backend_api import BackendDevice
from pybuda.pybudaglobal import TILE_DIM

# ---------------------------------------------------------------------------------------- #
# flan-T5, START
Expand All @@ -76,28 +76,9 @@ def flant5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch
os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# These are about to be enabled by default.
#
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_EXP_APPROX"] = "1"

# if compiler_cfg.balancer_policy == "default":
# compiler_cfg.balancer_policy = "Ribbon"
# os.environ["PYBUDA_RIBBON2"] = "1"

# if data_type == "Fp16_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
# os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
# os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# # These are about to be enabled by default.
# #
# os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
# os.environ["PYBUDA_EXP_APPROX"] = "1"

# if data_type == "Bfp8_b":
# pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
# pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b)
# pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)

# ---------------------------------------------------------------------------------------- #
# flan-T5, END
# ---------------------------------------------------------------------------------------- #
Expand Down
29 changes: 2 additions & 27 deletions benchmark/models/hrnet/hrnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139
os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139
os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"

# These are about to be enabled by default.
Expand All @@ -57,31 +57,6 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
if compiler_cfg.balancer_policy == "Ribbon":
compiler_cfg.enable_amp_light()

# compiler_cfg.enable_auto_transposing_placement = True

# if compiler_cfg.balancer_policy == "default":
# compiler_cfg.balancer_policy = "Ribbon"
# os.environ["PYBUDA_RIBBON2"] = "1"

# if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
# os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
# os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139
# os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"

# # These are about to be enabled by default.
# #
# os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
# if data_type == "Fp16_b":
# # Hangs with autotranspose on #2542
# compiler_cfg.enable_auto_transposing_placement = False

# # Manually enable amp light for Ribbon
# if compiler_cfg.balancer_policy == "Ribbon":
# compiler_cfg.enable_amp_light()


# Set model parameters based on chosen task and model configuration
img_res = 224
target_microbatch = 32
Expand All @@ -108,7 +83,7 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
available_devices = pybuda.detect_available_devices()
if available_devices:
if available_devices[0] == BackendDevice.Grayskull:
pybuda.config._internal_insert_fj_buffering_nop('add_312', ['add_341'], nop_count=2)
pybuda.config._internal_insert_fj_buffering_nop("add_312", ["add_341"], nop_count=2)
pybuda.config.set_epoch_break("resize2d_3176.dc.sparse_matmul.3.lc2")
else:
raise RuntimeError("Unknown config")
Expand Down
10 changes: 0 additions & 10 deletions benchmark/models/inception_v4/inception_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,6 @@ def inception_v4(training: bool, task: str, config: str, microbatch: int, device
os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# compiler_cfg.enable_auto_transposing_placement = True

# if compiler_cfg.balancer_policy == "default":
# compiler_cfg.balancer_policy = "Ribbon"
# os.environ["PYBUDA_RIBBON2"] = "1"

# if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
# os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
# os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

if config == "224":
model_name = "inception_v4"
img_res = 224 # https://github.com/huggingface/pytorch-image-models/blob/main/train.py#L122
Expand Down
33 changes: 4 additions & 29 deletions benchmark/models/mobilenet_v1/mobilenet_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
def mobilenetv1(training: bool, task: str, config: str, microbatch: int, device: str, data_type: str):

if device == "tt":

import pybuda
from pybuda._C.backend_api import BackendDevice

Expand Down Expand Up @@ -44,34 +44,9 @@ def mobilenetv1(training: bool, task: str, config: str, microbatch: int, device:
pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2)

# compiler_cfg.enable_auto_transposing_placement = True

# if compiler_cfg.balancer_policy == "default":
# compiler_cfg.balancer_policy = "Ribbon"
# os.environ["PYBUDA_RIBBON2"] = "1"

# if data_type == "Bfp8_b" and pybuda.detect_available_devices()[0] == BackendDevice.Wormhole_B0:
# os.environ["PYBUDA_ENABLE_DRAM_IO_BUFFER_SCALING"] = "1"
# os.environ["PYBUDA_ENABLE_INPUT_BUFFER_SCALING_FOR_NOC_READERS"] = "1"

# os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"

# # These are about to be enabled by default.
# #
# os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"

# if data_type == "Fp16_b":
# os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "40"
# os.environ["PYBUDA_TEMP_DISABLE_MODEL_KB_PROLOGUE_BW"] = "1"

# if data_type == "Bfp8_b":
# os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"
# pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
# pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
# pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
# pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2)
pybuda.config.configure_mixed_precision(
op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2
)

# Set model parameters based on chosen task and model configuration
if config == "192":
Expand Down
Loading

0 comments on commit 7a1d0b4

Please sign in to comment.