Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tracing support for Conv2D example #1920

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions programming_examples/ml/conv2d/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ include ${srcdir}/../../makefile-common

mlirFileName = aie

trace_size = 16384

VPATH := ${srcdir}/../../../aie_kernels/aie2

all: build/conv2dk1_i8.o build/final.xclbin
Expand All @@ -23,15 +25,29 @@ build/${mlirFileName}.mlir: ${srcdir}/aie2.py
mkdir -p ${@D}
python3 $< > $@

build/${mlirFileName}_trace.mlir: ${srcdir}/aie2.py
mkdir -p ${@D}
python3 $< ${trace_size} > $@

build/final.xclbin: build/${mlirFileName}.mlir build/conv2dk1_i8.o
mkdir -p ${@D}
cd ${@D} && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \
--no-xchesscc --no-xbridge \
--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)

build/final_trace.xclbin: build/${mlirFileName}_trace.mlir build/conv2dk1_i8.o
mkdir -p ${@D}
cd ${@D} && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \
--no-xchesscc --no-xbridge \
--xclbin-name=${@F} --npu-insts-name=insts_trace.txt $(<:%=../%)

run_py: build/final.xclbin
${powershell} python3 ${srcdir}/test.py -x build/final.xclbin -i build/insts.txt -k MLIR_AIE

trace_py: build/final_trace.xclbin
${powershell} python3 ${srcdir}/test.py -x build/final_trace.xclbin -i build/insts_trace.txt -k MLIR_AIE -t ${trace_size}
${srcdir}/../../utils/parse_trace.py --filename log/trace_conv2d.txt --mlir build/aie_trace.mlir --colshift 1 > log/trace_conv2d.json

clean:
rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \
chess* *.o insts.txt \
Expand Down
21 changes: 18 additions & 3 deletions programming_examples/ml/conv2d/aie2.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

N_in_bytes = 32*32*8*8 is the output buffer size in bytes which serves as the offset for where trace is written to. In this example, you can rewrite this to be N_in_bytes = tensorSize since that is what we want. In the vector_scalar_mul example, it is x2 because the output datatype for that example is int16 whereas this one is int8. So we need to scale the output since N_in_bytes is in terms of bytes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay that makes sense! I have updated it.

Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from aie.dialects.aiex import *
from aie.extras.context import mlir_mod_ctx
from aie.helpers.dialects.ext.scf import _for as range_
import aie.utils.trace as trace_utils

width = 32
height = 32
Expand All @@ -32,8 +33,10 @@

tensorSize = width * height * in_channels

N_in_bytes = tensorSize # Number of bytes of output data (1 byte/elem)

def conv2dk1():

def conv2dk1(trace_size: int):
with mlir_mod_ctx() as ctx:

@device(AIEDevice.npu1_1col)
Expand Down Expand Up @@ -85,6 +88,11 @@ def device_body():
of_outOFL2L3 = object_fifo("outOFL2L3", MemTile, [ShimTile], 2, bufOut_ty)
object_fifo_link(of_out_02_L2, of_outOFL2L3)

# Set up a packet-switched flow from core to shim for tracing information
tiles_to_trace = [ComputeTile2]
if trace_size > 0:
trace_utils.configure_packet_tracing_flow(tiles_to_trace, ShimTile)

# Set up compute tiles

rtp2 = buffer(
Expand Down Expand Up @@ -120,6 +128,12 @@ def core_body():
# To/from AIE-array data movement
@runtime_sequence(tensor_ty, weights_ty, tensor_ty)
def sequence(I, W, O):

if trace_size > 0:
trace_utils.configure_packet_tracing_aie2(
tiles_to_trace, ShimTile, trace_size, N_in_bytes
)

rtp2[0] = 10

npu_dma_memcpy_nd(
Expand Down Expand Up @@ -148,5 +162,6 @@ def sequence(I, W, O):
# print(ctx.module.operation.verify())
print(ctx.module)


conv2dk1()
if __name__ == "__main__":
trace_size = 0 if (len(sys.argv) != 2) else int(sys.argv[1])
conv2dk1(trace_size=trace_size)
2 changes: 2 additions & 0 deletions programming_examples/ml/conv2d/run_makefile.lit
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@
// RUN: make -f %S/Makefile clean
// RUN: make -f %S/Makefile
// RUN: %run_on_npu make -f %S/Makefile run_py | FileCheck %s
// RUN: make -f %S/Makefile clean
// RUN: %run_on_npu make -f %S/Makefile trace_py | FileCheck %s
// CHECK: PASS!
25 changes: 18 additions & 7 deletions programming_examples/ml/conv2d/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def main(opts):
npu_time_total = 0
npu_time_min = 9999999
npu_time_max = 0
trace_size = 16384
enable_trace = False
trace_size = opts.trace_size
enable_trace = False if not trace_size else True
trace_file = "log/trace_" + design + ".txt"
# ------------------------------------------------------
# Configure this to match your design's buffer size
Expand Down Expand Up @@ -118,24 +118,35 @@ def forward(self, x):
# ------------------------------------------------------
for i in range(num_iter):
start = time.time_ns()
aie_output = execute(app, ifm_mem_fmt, total_wts) * int8_scale
entire_buffer = execute(app, ifm_mem_fmt, total_wts)
stop = time.time_ns()

if enable_trace:
aie_output, trace = extract_trace(
aie_output, shape_out, dtype_out, trace_size
# Separate data and trace
data_buffer, trace_buffer = extract_trace(
entire_buffer, shape_out, dtype_out, trace_size
)
write_out_trace(trace, trace_file)
# Scale the data
data_buffer = data_buffer * int8_scale
# Write out the trace
write_out_trace(trace_buffer, trace_file)
else:
data_buffer = entire_buffer * int8_scale
trace_buffer = None

npu_time = stop - start
npu_time_total = npu_time_total + npu_time

# ------------------------------------------------------
# Reorder output data-layout
# ------------------------------------------------------
temp_out = aie_output.reshape(32, 8, 32, 8)
temp_out = data_buffer.reshape(32, 8, 32, 8)
temp_out = ds.reorder_mat(temp_out, "CDYX", "YCXD")
ofm_mem_fmt = temp_out.reshape(64, 32, 32)
if enable_trace:
ofm_log_filename = "/after_ofm_mem_fmt_final_trace.txt"
else:
ofm_log_filename = "/after_ofm_mem_fmt_final.txt"
ofm_mem_fmt.tofile(
log_folder + "/after_ofm_mem_fmt_final.txt", sep=",", format="%d"
)
Expand Down
Loading