From a5617029b8baad15b6ca9234d1a62aae08b1224a Mon Sep 17 00:00:00 2001 From: Jack Lo <36210336+jackl-xilinx@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:43:05 -0700 Subject: [PATCH] Update trace scripts and vector_scalar_mul to use new scripts (#1853) --- .../basic/vector_scalar_mul/aie2.py | 14 +- .../basic/vector_scalar_mul/test.py | 31 +- programming_examples/utils/parse_trace.py | 15 +- python/utils/trace.py | 452 +++++++++++++++++- 4 files changed, 491 insertions(+), 21 deletions(-) diff --git a/programming_examples/basic/vector_scalar_mul/aie2.py b/programming_examples/basic/vector_scalar_mul/aie2.py index 6040d60387..1d367e5aab 100644 --- a/programming_examples/basic/vector_scalar_mul/aie2.py +++ b/programming_examples/basic/vector_scalar_mul/aie2.py @@ -51,9 +51,10 @@ def device_body(): ) of_out = object_fifo("out", ComputeTile2, ShimTile, buffer_depth, tile_ty) - # Set up a circuit-switched flow from core to shim for tracing information + # Set up a packet-switched flow from core to shim for tracing information + tiles_to_trace = [ComputeTile2] if trace_size > 0: - flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) + trace_utils.configure_packet_tracing_flow(tiles_to_trace, ShimTile) # Set up compute tiles @@ -77,13 +78,10 @@ def core_body(): def sequence(A, F, C): if trace_size > 0: - trace_utils.configure_simple_tracing_aie2( - ComputeTile2, - ShimTile, - ddr_id=2, - size=trace_size, - offset=N_in_bytes, + trace_utils.configure_packet_tracing_aie2( + tiles_to_trace, ShimTile, trace_size, N_in_bytes ) + npu_dma_memcpy_nd( metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True ) diff --git a/programming_examples/basic/vector_scalar_mul/test.py b/programming_examples/basic/vector_scalar_mul/test.py index 8ddb7f6659..9ffa7dab1d 100644 --- a/programming_examples/basic/vector_scalar_mul/test.py +++ b/programming_examples/basic/vector_scalar_mul/test.py @@ -7,7 +7,8 @@ # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates import numpy as np import sys -from aie.utils.xrt import setup_aie, execute as execute_on_aie +import time +from aie.utils.xrt import setup_aie, write_out_trace, execute import aie.utils.test as test_utils @@ -18,6 +19,10 @@ def main(opts): vector_dtype = np.int16 scalar_dtype = np.int32 scale_factor = 3 + size_out = data_size * 2 + print("output buffer size: " + str(size_out)) + + enable_trace = opts.trace_size > 0 app = setup_aie( opts.xclbin, @@ -28,12 +33,34 @@ def main(opts): scalar_dtype, data_size, vector_dtype, + enable_trace=enable_trace, + trace_size=opts.trace_size, ) input_vector = np.arange(1, data_size + 1, dtype=vector_dtype) input_factor = np.array([3], dtype=scalar_dtype) - aie_output = execute_on_aie(app, input_vector, input_factor) + # aie_output = execute_on_aie(app, input_vector, input_factor) + + start = time.time_ns() + full_output = execute(app, input_vector, input_factor) + stop = time.time_ns() + npu_time = stop - start + print("npu_time: ", npu_time) + + # aie_output = full_output[:size_out].view(np.int8) + # aie_output = full_output[:size_out].view(np.uint8) + aie_output = full_output[:size_out].view(np.int16) + if enable_trace: + trace_buffer = full_output[size_out:].view(np.uint32) + ref = np.arange(1, data_size + 1, dtype=vector_dtype) * scale_factor + if enable_trace: + # trace_buffer = full_output[3920:] + print("trace_buffer shape: ", trace_buffer.shape) + print("trace_buffer dtype: ", trace_buffer.dtype) + # write_out_trace(trace_buffer, str(opts.trace_file)) + write_out_trace(trace_buffer, "trace.txt") + # Copy output results and verify they are correct errors = 0 if opts.verify: diff --git a/programming_examples/utils/parse_trace.py b/programming_examples/utils/parse_trace.py index 534500d29f..7f6eae57d6 100755 --- a/programming_examples/utils/parse_trace.py +++ b/programming_examples/utils/parse_trace.py @@ -3,6 +3,7 @@ import argparse import sys import re + from aie.utils.trace_events_enum import CoreEvent, MemEvent, PLEvent, MemTileEvent # Number of different trace types, currently 4 @@ -13,7 +14,8 @@ NumTraceTypes = 4 NUM_EVENTS = 8 # number of events we can view per trace -DEBUG = False +# DEBUG = False +# DEBUG = True def parse_args(): @@ -23,6 +25,7 @@ def parse_args(): parser.add_argument( "--colshift", help="column shift adjustment to source mlir", required=False ) + parser.add_argument("--debug", help="debug mode", required=False) # TODO tracelabels removed since we can have multiple sets of labels for each pkt_type & loc combination # parser.add_argument('--tracelabels', # nargs='+', @@ -610,7 +613,7 @@ def parse_mlir_trace_events(lines): row = int(result.group(3 * i2 + 3)) elif var == "column": col = int(result.group(3 * i2 + 3)) + colshift - col = 1 if col == 0 else col + # col = 1 if col == 0 else col elif var == "value": if result.group(3 * i2 + 2) == "0x": value = int(result.group(3 * i2 + 3), 16) @@ -757,6 +760,10 @@ def lookup_event_name_by_type(trace_type, code): events_enum = CoreEvent elif trace_type == 1: # Mem traces events_enum = MemEvent + elif trace_type == 2: # Shim traces + events_enum = PLEvent + elif trace_type == 3: # MemTile traces + events_enum = MemTileEvent if events_enum is not None and code in set(x.value for x in events_enum): event = events_enum(code).name else: @@ -871,6 +878,10 @@ def setup_trace_metadata(trace_events, pid_events): opts = parse_args() +DEBUG = opts.debug +if DEBUG: + print("Debug mode enable\n") + # set colshift based on optional argument colshift = int(opts.colshift) if opts.colshift else 0 diff --git a/python/utils/trace.py b/python/utils/trace.py index 0f6bc8c4e9..668455881e 100644 --- a/python/utils/trace.py +++ b/python/utils/trace.py @@ -5,17 +5,21 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. + +import typing +from aie.dialects.aie import * from aie.dialects.aiex import * from aie.dialects.aie import get_target_model from aie.utils.trace_events_enum import CoreEvent, MemEvent, PLEvent, MemTileEvent +from enum import IntEnum class GenericEvent: - def __init__(self, code: CoreEvent | MemEvent | PLEvent | MemTileEvent): + def __init__(self, code: typing.Union[CoreEvent, MemEvent, PLEvent, MemTileEvent]): # For backwards compatibility, allow integer as event if isinstance(code, int): code = CoreEvent(code) - self.code: CoreEvent | MemEvent | PLEvent | MemTileEvent = code + self.code: typing.Union[CoreEvent, MemEvent, PLEvent, MemTileEvent] = code def get_register_writes(self): """ @@ -47,9 +51,35 @@ def get_register_writes(self): CoreEvent.PORT_TLAST_2, CoreEvent.PORT_TLAST_3, CoreEvent.PORT_TLAST_4, CoreEvent.PORT_TLAST_5, CoreEvent.PORT_TLAST_6, CoreEvent.PORT_TLAST_7, } + +MemTilePortEventCodes = { MemTileEvent.PORT_IDLE_0, MemTileEvent.PORT_IDLE_1, + MemTileEvent.PORT_IDLE_2, MemTileEvent.PORT_IDLE_3, + MemTileEvent.PORT_IDLE_4, MemTileEvent.PORT_IDLE_5, + MemTileEvent.PORT_IDLE_6, MemTileEvent.PORT_IDLE_7, + MemTileEvent.PORT_RUNNING_0, MemTileEvent.PORT_RUNNING_1, + MemTileEvent.PORT_RUNNING_2, MemTileEvent.PORT_RUNNING_3, + MemTileEvent.PORT_RUNNING_4, MemTileEvent.PORT_RUNNING_5, + MemTileEvent.PORT_RUNNING_6, MemTileEvent.PORT_RUNNING_7, + MemTileEvent.PORT_STALLED_0, MemTileEvent.PORT_STALLED_1, + MemTileEvent.PORT_STALLED_2, MemTileEvent.PORT_STALLED_3, + MemTileEvent.PORT_STALLED_4, MemTileEvent.PORT_STALLED_5, + MemTileEvent.PORT_STALLED_6, MemTileEvent.PORT_STALLED_7, + MemTileEvent.PORT_TLAST_0, MemTileEvent.PORT_TLAST_1, + MemTileEvent.PORT_TLAST_2, MemTileEvent.PORT_TLAST_3, + MemTileEvent.PORT_TLAST_4, MemTileEvent.PORT_TLAST_5, + MemTileEvent.PORT_TLAST_6, MemTileEvent.PORT_TLAST_7, } + + # fmt: on +class PacketType(IntEnum): + CORE = 0 + MEM = 1 + SHIMTILE = 2 + MEMTILE = 3 + + class PortEvent(GenericEvent): def __init__(self, code, port_number, master=True): # For backwards compatibility, allow integer as event @@ -99,6 +129,55 @@ def slave(port): return ret +class MemTilePortEvent(GenericEvent): + def __init__(self, code, port_number, master=True): + # For backwards compatibility, allow integer as event + if isinstance(code, int): + code = MemTileEvent(code) + assert code in MemTilePortEventCodes + # fmt: off + self.event_number = ( + 0 if code in { MemTileEvent.PORT_IDLE_0, MemTileEvent.PORT_RUNNING_0, + MemTileEvent.PORT_STALLED_0, MemTileEvent.PORT_TLAST_0 } + else 1 if code in { MemTileEvent.PORT_IDLE_1, MemTileEvent.PORT_RUNNING_1, + MemTileEvent.PORT_STALLED_1, MemTileEvent.PORT_TLAST_1, } + else 2 if code in { MemTileEvent.PORT_IDLE_2, MemTileEvent.PORT_RUNNING_2, + MemTileEvent.PORT_STALLED_2, MemTileEvent.PORT_TLAST_2 } + else 3 if code in { MemTileEvent.PORT_IDLE_3, MemTileEvent.PORT_RUNNING_3, + MemTileEvent.PORT_STALLED_3, MemTileEvent.PORT_TLAST_3 } + else 4 if code in { MemTileEvent.PORT_IDLE_4, MemTileEvent.PORT_RUNNING_4, + MemTileEvent.PORT_STALLED_4, MemTileEvent.PORT_TLAST_4 } + else 5 if code in { MemTileEvent.PORT_IDLE_5, MemTileEvent.PORT_RUNNING_5, + MemTileEvent.PORT_STALLED_5, MemTileEvent.PORT_TLAST_5 } + else 6 if code in { MemTileEvent.PORT_IDLE_6, MemTileEvent.PORT_RUNNING_6, + MemTileEvent.PORT_STALLED_6, MemTileEvent.PORT_TLAST_6 } + else 7 + ) + # fmt: on + self.port_number = port_number + self.master = master + super().__init__(code) + + def get_register_writes(self): + def master(port): + return port | (1 << 5) + + def slave(port): + return port + + # 0x3FF00: Stream switch event port selection 0 + # 0x3FF04: Stream switch event port selection 1 + address = 0xB0F00 if self.event_number < 4 else 0xB0F04 + value = master(self.port_number) if self.master else slave(self.port_number) + + value = (value & 0xFF) << 8 * (self.event_number % 4) + + ret = {0xB0F00: 0, 0xB0F04: 0} + ret[address] = value + + return ret + + def extract_trace(out_buf, out_buf_shape, out_buf_dtype, trace_size): trace_size_words = trace_size // 4 out_buf_flat = out_buf.reshape((-1,)).view(np.uint32) @@ -174,6 +253,9 @@ def configure_coretile_tracing_aie2( CoreEvent.INSTR_LOCK_RELEASE_REQ, CoreEvent.LOCK_STALL, ], + enable_packet=0, + packet_id=0, + packet_type=PacketType.CORE, ): # For backwards compatibility, allow integers for start/stop events if isinstance(start, int): @@ -221,13 +303,11 @@ def configure_coretile_tracing_aie2( value=pack4bytes(stop.value, start.value, 0, 0), ) # 0x340D4: Trace Control 1 - # This is used to control packet routing. For the moment - # only deal with the simple case of circuit routing. npu_write32( column=int(tile.col), row=int(tile.row), address=0x340D4, - value=0, + value=((packet_type & 0x7) << 12) | (packet_id & 0x1F) if enable_packet else 0, ) # 0x340E0: Trace Event Group 1 (Which events to trace) # 0xAABBCCDD AA, BB, CC, DD <- four event slots @@ -259,6 +339,135 @@ def configure_coretile_tracing_aie2( npu_write32(column=int(tile.col), row=int(tile.row), address=addr, value=value) +# Configures the memtile for tracing given start/stop events, trace events, and optional +# packet config as applicalbe. +def configure_memtile_tracing_aie2( + tile, + start=MemTileEvent.TRUE, + stop=MemTileEvent.NONE, + events=[ + MemTilePortEvent(MemTileEvent.PORT_RUNNING_0, 0, True), # master(0) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_1, 1, True), # master(1) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_2, 0, False), # slave(0) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_3, 1, False), # slave(1) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_4, 2, False), # slave(2) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_5, 3, False), # slave(3) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_6, 4, False), # slave(4) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_7, 5, False), # slave(5) + ], + enable_packet=0, + packet_id=0, + packet_type=PacketType.MEMTILE, +): + # For backwards compatibility, allow integers for start/stop events + if isinstance(start, int): + start = MemTileEvent(start) + if isinstance(stop, int): + stop = MemTileEvent(stop) + + # Pad the input so we have exactly 8 events. + if len(events) > 8: + raise RuntimeError( + f"At most 8 events can be traced at once, have {len(events)}." + ) + events = (events + [MemTileEvent.NONE] * 8)[:8] + + # Reorder events so they match the event order for display + ordered_events = [events[p] for p in [3, 2, 1, 0, 7, 6, 5, 4]] + + # Assure all selected events are valid + ordered_events = [ + e if isinstance(e, GenericEvent) else GenericEvent(e) for e in ordered_events + ] + + # Require ports to be specifically given for port events. + for event in ordered_events: + if event.code in PortEventCodes and not isinstance(event, PortEvent): + raise RuntimeError( + f"Tracing: {event.code.name} is a PortEvent and requires a port to be specified alongside it. \n" + "To select master port N, specify the event as follows: " + f"PortEvent(CoreEvent.{event.code.name}, N, master=True), " + "and analogously with master=False for slave ports. " + "For example: " + f"configure_simple_tracing_aie2( ..., events=[PortEvent(CoreEvent.{event.code.name}, 1, master=True)])" + ) + + # 0x340D0: Trace Control 0 + # 0xAABB---C + # AA <- Event to stop trace capture + # BB <- Event to start trace capture + # C <- Trace mode, 00=event=time, 01=event-PC, 10=execution + # Configure so that "Event 1" (always true) causes tracing to start + npu_write32( + column=int(tile.col), + row=int(tile.row), + address=0x940D0, + value=pack4bytes(stop.value, start.value, 0, 0), + ) + # 0x340D4: Trace Control 1 + npu_write32( + column=int(tile.col), + row=int(tile.row), + address=0x940D4, + value=((packet_type & 0x7) << 12) | (packet_id & 0x1F) if enable_packet else 0, + ) + # 0x340E0: Trace Event Group 1 (Which events to trace) + # 0xAABBCCDD AA, BB, CC, DD <- four event slots + npu_write32( + column=int(tile.col), + row=int(tile.row), + address=0x940E0, + value=pack4bytes(*(e.code.value for e in ordered_events[0:4])), + ) + # 0x340E4: Trace Event Group 2 (Which events to trace) + # 0xAABBCCDD AA, BB, CC, DD <- four event slots + npu_write32( + column=int(tile.col), + row=int(tile.row), + address=0x940E4, + value=pack4bytes(*(e.code.value for e in ordered_events[4:8])), + ) + + # Event specific register writes + all_reg_writes = {} + for e in ordered_events: + reg_writes = e.get_register_writes() + for addr, value in reg_writes.items(): + if addr in all_reg_writes: + all_reg_writes[addr] |= value + else: + all_reg_writes[addr] = value + for addr, value in all_reg_writes.items(): + npu_write32(column=int(tile.col), row=int(tile.row), address=addr, value=value) + + +# Configure timer in core tile to reset based on `event` +def configure_timer_ctrl_core_aie2(tile, event): + addr = 0x34000 + event = (event & 0x7F) << 8 + npu_write32( + column=int(tile.col), + row=int(tile.row), + address=addr, + value=event, + ) + + +# Configure timer in memtile to reset based on `event` +def configure_timer_ctrl_memtile_aie2(tile, event): + addr = 0x94000 + event = (event & 0x7F) << 8 + npu_write32( + column=int(tile.col), + row=int(tile.row), + address=addr, + value=event, + ) + + +# Configure broadcast event based on an internal triggered event. +# `num` is the broadcaast number we want to broadcast on +# and `event` is the triggering broadcast event. def configure_broadcast_core_aie2(tile, num, event): addr = 0x34010 + num * 4 npu_write32( @@ -269,6 +478,22 @@ def configure_broadcast_core_aie2(tile, num, event): ) +# Create an event generation at the shim tile +# This is used to create a custom event to synchronize over +def configure_event_gen_core_aie2(tile, event): + addr = 0x34008 + event = event & 0x7F + npu_write32( + column=int(tile.col), + row=int(tile.row), + address=addr, + value=event, + ) + + +# Configure shim tile for tracing. +# This configures the shim tile / bd to process a specficic packet id and packet type. +# It also configures the address patch. def configure_shimtile_tracing_aie2( shim, channel=1, @@ -276,6 +501,10 @@ def configure_shimtile_tracing_aie2( ddr_id=2, size=8192, offset=0, + enable_token=0, + enable_packet=0, + packet_id=0, + packet_type=PacketType.CORE, ): dev = shim.parent.attributes["device"] @@ -284,16 +513,17 @@ def configure_shimtile_tracing_aie2( # Shim has to be a shim tile assert tm.is_shim_noc_tile(shim.col, shim.row) + # configure_shimtile_bd_aie2(shim, channel, bd_id, ddr_id, size, offset, 1, 0, 0) # Configure a buffer descriptor to write tracing information that has been routed into this shim tile # out to host DDR memory npu_writebd( bd_id=bd_id, buffer_length=size, buffer_offset=offset, - enable_packet=0, + enable_packet=enable_packet, out_of_order_id=0, - packet_id=0, - packet_type=0, + packet_id=packet_id, + packet_type=packet_type, column=int(shim.col), d0_size=0, d0_stride=0, @@ -315,15 +545,63 @@ def configure_shimtile_tracing_aie2( ) addr = (int(shim.col) << tm.get_column_shift()) | (0x1D004 + bd_id * 0x20) npu_address_patch(addr=addr, arg_idx=ddr_id, arg_plus=offset) + # configure S2MM channel npu_write32( column=int(shim.col), row=int(shim.row), address=0x1D204 if channel == 0 else 0x1D20C, - value=bd_id, + value=((enable_token & 0x1) << 31) | bd_id, ) +# def configure_shimtile_bd_aie2( +# shim, +# channel=1, +# bd_id=13, +# ddr_id=2, +# size=8192, +# offset=0, +# enable_packet=0, +# packet_id=0, +# packet_type=0 +# ): +# npu_writebd( +# bd_id=bd_id, +# buffer_length=size, +# buffer_offset=offset, +# enable_packet=enable_packet, +# out_of_order_id=0, +# packet_id=packet_id, +# packet_type=packet_type, +# column=int(shim.col), +# d0_size=0, +# d0_stride=0, +# d1_size=0, +# d1_stride=0, +# d2_stride=0, +# iteration_current=0, +# iteration_size=0, +# iteration_stride=0, +# lock_acq_enable=0, +# lock_acq_id=0, +# lock_acq_val=0, +# lock_rel_id=0, +# lock_rel_val=0, +# next_bd=0, +# row=0, +# use_next_bd=0, +# valid_bd=1, +# ) +# addr = (int(shim.col) << tm.get_column_shift()) | (0x1D004 + bd_id * 0x20) +# npu_address_patch(addr=addr, arg_idx=ddr_id, arg_plus=offset) + + +# This does a simple circuit switched trace config for a given tile +# and shim. Since we're not doing packete switching, we're not synchronizing +# any timers. This works fine for a trace of a single tile though it does use +# a stream for routing the trace (which is the same as multi-tile tracing +# except that can be shared with trace packets) def configure_simple_tracing_aie2( tile, shim, @@ -347,3 +625,159 @@ def configure_simple_tracing_aie2( ): configure_coretile_tracing_aie2(tile, start, stop, events) configure_shimtile_tracing_aie2(shim, channel, bd_id, ddr_id, size, offset) + + +# Wrapper to configure the core tile and shim tile for packet tracing. This does +# the following: +# 1. Configure core tile based on start/ stop, events, and flow id. The flow id +# needs to be unique per flow. +# 2. Configure timer based on broadcast event (default is 15). This ensures all +# tiles keying off this event has a synchronized timer so their trace are +# synchronized. This event is also used as the start event for tracing. +# 3. Configure shim tile to receive this flow and move the data to offset/ size. +# +def configure_core_packet_tracing_aie2( + tile, + shim, + flow_id=0, + bd_id=15, + size=8192, + offset=0, + enable_token=0, + brdcst_event=0x7A, # event 122 - broadcast 15 + channel=1, + ddr_id=2, + stop=CoreEvent.NONE, + events=[ + CoreEvent.INSTR_EVENT_0, + CoreEvent.INSTR_EVENT_1, + CoreEvent.INSTR_VECTOR, + PortEvent(CoreEvent.PORT_RUNNING_0, 1, True), # master(1) + PortEvent(CoreEvent.PORT_RUNNING_1, 1, False), # slave(1) + CoreEvent.INSTR_LOCK_ACQUIRE_REQ, + CoreEvent.INSTR_LOCK_RELEASE_REQ, + CoreEvent.LOCK_STALL, + ], +): + configure_coretile_tracing_aie2( + tile, brdcst_event, stop, events, 1, flow_id, PacketType.CORE + ) + configure_timer_ctrl_core_aie2(tile, brdcst_event) + configure_shimtile_tracing_aie2( + shim, + channel, + bd_id, + ddr_id, + size, + offset, + enable_token, + 1, + flow_id, + PacketType.CORE, + ) + + +# Configures mem tile for packet trcing. This is very simila rot configure_core_packet_tracing_aie2 +# and maybe they can be combined if we pass the tile type to select the correct address offsets. +# As it stands, we call configure_memtile_tracing_aie2 and configure_timer_ctrl_memtile_aie2 instead +# of the core tile variants. The default events we care about are also different for the memtile. +def configure_memtile_packet_tracing_aie2( + tile, + shim, + flow_id=0, + bd_id=15, + size=8192, + offset=0, + enable_token=0, + brdcst_event=0x9D, # event 157 - broadcast 15 + channel=1, + ddr_id=2, + stop=MemTileEvent.NONE, + events=[ + MemTilePortEvent(MemTileEvent.PORT_RUNNING_0, 0, True), # master(0) + # MemTilePortEvent(MemTileEvent.PORT_RUNNING_1, 1, True), # master(1) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_1, 14, False), # slave(14/ north1) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_2, 0, False), # slave(0) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_3, 1, False), # slave(1) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_4, 2, False), # slave(2) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_5, 3, False), # slave(3) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_6, 4, False), # slave(4) + MemTilePortEvent(MemTileEvent.PORT_RUNNING_7, 5, False), # slave(5) + # MemTilePortEvent(MemTileEvent.PORT_RUNNING_4, 13, False), # slave(13/ north0) + # MemTilePortEvent(MemTileEvent.PORT_RUNNING_5, 14, False), # slave(14/ north1) + # MemTilePortEvent(MemTileEvent.PORT_RUNNING_6, 17, False), # slave(17/ trace) + # MemTilePortEvent(MemTileEvent.PORT_RUNNING_7, 8, True), # masteer(9/ south1) + ], +): + configure_memtile_tracing_aie2( + tile, brdcst_event, stop, events, 1, flow_id, PacketType.MEMTILE + ) + configure_timer_ctrl_memtile_aie2(tile, brdcst_event) + configure_shimtile_tracing_aie2( + shim, + channel, + bd_id, + ddr_id, + size, + offset, + enable_token, + 1, + flow_id, + PacketType.MEMTILE, + ) + + +# Wrapper around packeflows to itereate over tiles to trace and route them +# to the shim for outputing the trace to L3 memory. This uses default values for the packet id +# that increases for each tile we trace. This should match the tile trace config that's set by +# configure_core_packet_tracing_aie2 +def configure_packet_tracing_flow(tiles_to_trace, shim): + for i in range(len(tiles_to_trace)): + packetflow( + i + 1, + tiles_to_trace[i], + WireBundle.Trace, + 0, + shim, + WireBundle.DMA, + 1, + keep_pkt_header=True, + ) + + +# Configure the shim tile to support packet tracing via: +# 1. Set an event generation to create a custom user event 1 (127, 0x7f) +# 2. Custom event also triggers a broadcast event (by default broadcast 15) +# 3. Custom event also resets timer (will be true for all tiles) so all timers are synchronized +# The actual shim dma config is done via configure_shimtile_tracing_aie2 but this tends to be done +# for each tile we're tracing. +def configure_shim_packet_tracing_aie2( + shim, + brdcst_num=15, + user_event=0x7F, # 127: user even t#1 +): + configure_timer_ctrl_core_aie2(shim, user_event) + configure_broadcast_core_aie2(shim, brdcst_num, user_event) + configure_event_gen_core_aie2(shim, user_event) + + +# Wrapper to iterate over tiles to trace and configure their default packet tracing config +# along with the shim config for packet tracing +def configure_packet_tracing_aie2( + tiles_to_trace, + shim, + trace_size, + trace_offset, + enable_token=0, +): + for i in range(len(tiles_to_trace)): + configure_core_packet_tracing_aie2( + tiles_to_trace[i], + shim, + i + 1, + 15 - i, + trace_size, + trace_offset, + enable_token, + ) + configure_shim_packet_tracing_aie2(shim)