From 602c5b1e70e86c67ba96f538e2fd77d443ef5c71 Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Tue, 27 Aug 2024 16:37:46 -0600 Subject: [PATCH] Support new python officially in passthrough kernel --- .../basic/passthrough_kernel/Makefile | 4 +- .../basic/passthrough_kernel/aie2.py | 120 +++++++----------- .../basic/passthrough_kernel/iron_objs.py | 74 ----------- 3 files changed, 46 insertions(+), 152 deletions(-) delete mode 100644 programming_examples/basic/passthrough_kernel/iron_objs.py diff --git a/programming_examples/basic/passthrough_kernel/Makefile b/programming_examples/basic/passthrough_kernel/Makefile index 3406c9d89b..11f2824a42 100644 --- a/programming_examples/basic/passthrough_kernel/Makefile +++ b/programming_examples/basic/passthrough_kernel/Makefile @@ -22,11 +22,11 @@ PASSTHROUGH_SIZE = ${data_size} all: build/final_${data_size}.xclbin -build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/iron_objs.py +build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< ${data_size} 0 > $@ -build/aie_trace__lineBased_8b_${data_size}.mlir: ${srcdir}/iron_objs.py +build/aie_trace__lineBased_8b_${data_size}.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< ${data_size} ${trace_size} > $@ diff --git a/programming_examples/basic/passthrough_kernel/aie2.py b/programming_examples/basic/passthrough_kernel/aie2.py index 39ef9106bd..f6c0ce9d99 100644 --- a/programming_examples/basic/passthrough_kernel/aie2.py +++ b/programming_examples/basic/passthrough_kernel/aie2.py @@ -5,94 +5,62 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates - import sys +import numpy as np -from aie.dialects.aie import * -from aie.dialects.aiex import * -from aie.dialects.scf import * -from aie.extras.context import mlir_mod_ctx - -import aie.utils.trace as trace_utils - +from aie.dialects.scf import for_ as range_ +from aie.dialects.scf import yield_ -def passthroughKernel(vector_size, trace_size): - N = vector_size - lineWidthInBytes = N // 4 # chop input in 4 sub-tensors +from aie.api.dataflow.inout.simplefifoinout import SimpleFifoInOutProgram +from aie.api.dataflow.objectfifo import MyObjectFifo +from aie.api.kernels.binkernel import BinKernel +from aie.api.phys.device import NPU1Col1 +from aie.api.program import MyProgram +from aie.api.worker import MyWorker - @device(AIEDevice.npu1_1col) - def device_body(): - # define types - memRef_ty = T.memref(lineWidthInBytes, T.ui8()) - - # AIE Core Function declarations - passThroughLine = external_func( - "passThroughLine", inputs=[memRef_ty, memRef_ty, T.i32()] - ) +try: + vector_size = int(sys.argv[1]) + if vector_size % 64 != 0 or vector_size < 512: + print("Vector size must be a multiple of 64 and greater than or equal to 512") + raise ValueError +except ValueError: + print("Argument has inappropriate value") - # Tile declarations - ShimTile = tile(0, 0) - ComputeTile2 = tile(0, 2) +assert vector_size % 4 == 0 +line_size = vector_size // 4 - # Set up a circuit-switched flow from core to shim for tracing information - if trace_size > 0: - flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) +inout_type = ((vector_size,), np.uint8) +fifo_memref_type = ((line_size,), np.uint8) - # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty) - of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) +of0 = MyObjectFifo(2, memref_type=fifo_memref_type, name="out") +of1 = MyObjectFifo(2, memref_type=fifo_memref_type, name="in") - # Set up compute tiles +passthrough_fn = BinKernel( + "passThroughLine", + "passThrough.cc.o", + [fifo_memref_type, fifo_memref_type, np.int32], +) - # Compute tile 2 - @core(ComputeTile2, "passThrough.cc.o") - def core_body(): - for _ in for_(sys.maxsize): - elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - call(passThroughLine, [elemIn, elemOut, lineWidthInBytes]) - of_in.release(ObjectFifoPort.Consume, 1) - of_out.release(ObjectFifoPort.Produce, 1) - yield_([]) - # print(ctx.module.operation.verify()) +def core_fn(ofs_end1, ofs_end2, external_functions): + of_out = ofs_end1[0] + of_in = ofs_end2[0] + passThroughLine = external_functions[0] - tensor_ty = T.memref(N, T.ui8()) + for _ in range_(vector_size // line_size): + elemOut = of_out.acquire_produce(1) + elemIn = of_in.acquire_consume(1) + passThroughLine(elemIn, elemOut, line_size) + of_in.release_consume(1) + of_out.release_produce(1) + yield_([]) - @runtime_sequence(tensor_ty, tensor_ty, tensor_ty) - def sequence(inTensor, outTensor, notUsed): - if trace_size > 0: - trace_utils.configure_simple_tracing_aie2( - ComputeTile2, - ShimTile, - ddr_id=1, - size=trace_size, - offset=N, - ) - npu_dma_memcpy_nd( - metadata="in", - bd_id=0, - mem=inTensor, - sizes=[1, 1, 1, N], - ) - npu_dma_memcpy_nd( - metadata="out", - bd_id=1, - mem=outTensor, - sizes=[1, 1, 1, N], - ) - npu_sync(column=0, row=0, direction=0, channel=0) +worker_program = MyWorker(core_fn, [of0], [of1], [passthrough_fn], coords=(0, 2)) +inout_program = SimpleFifoInOutProgram(of0, vector_size, of1, vector_size) -try: - vector_size = int(sys.argv[1]) - if vector_size % 64 != 0 or vector_size < 512: - print("Vector size must be a multiple of 64 and greater than or equal to 512") - raise ValueError - trace_size = 0 if (len(sys.argv) != 3) else int(sys.argv[2]) -except ValueError: - print("Argument has inappropriate value") -with mlir_mod_ctx() as ctx: - passthroughKernel(vector_size, trace_size) - print(ctx.module) +my_program = MyProgram( + NPU1Col1(), worker_programs=[worker_program], inout_program=inout_program +) +my_program.resolve_program() diff --git a/programming_examples/basic/passthrough_kernel/iron_objs.py b/programming_examples/basic/passthrough_kernel/iron_objs.py deleted file mode 100644 index f2ab409feb..0000000000 --- a/programming_examples/basic/passthrough_kernel/iron_objs.py +++ /dev/null @@ -1,74 +0,0 @@ -# passthrough_kernel/aie2.py -*- Python -*- -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -""" -Problems for clarify/conciseness: -* ObjectFifo needs (ordered) endpoints at instantiation -* Need introspection to declare functions/fifos on-the-fly so they still land in the symbol table -* Can remove type data if we're okay with inferring it through use (also required introspection) => but less verification if we go this route - - Could we fix this somehow? e.g. loop emulation or something like that? -""" - -from aie.dialects.scf import for_ as range_ -from aie.dialects.scf import yield_ - -from aie.api.dataflow.inout.simplefifoinout import SimpleFifoInOutProgram -from aie.api.dataflow.objectfifo import MyObjectFifo -from aie.api.kernels.binkernel import BinKernel -from aie.api.phys.device import NPU1Col1 -from aie.api.program import MyProgram -from aie.api.worker import MyWorker - -import sys -import numpy as np - -try: - vector_size = int(sys.argv[1]) - if vector_size % 64 != 0 or vector_size < 512: - print("Vector size must be a multiple of 64 and greater than or equal to 512") - raise ValueError -except ValueError: - print("Argument has inappropriate value") - -assert vector_size % 4 == 0 -line_size = vector_size // 4 - -inout_type = ((vector_size,), np.uint8) -fifo_memref_type = ((line_size,), np.uint8) - -of0 = MyObjectFifo(2, memref_type=fifo_memref_type, name="out") -of1 = MyObjectFifo(2, memref_type=fifo_memref_type, name="in") - -passthrough_fn = BinKernel( - "passThroughLine", - "passThrough.cc.o", - [fifo_memref_type, fifo_memref_type, np.int32], -) - - -def core_fn(ofs_end1, ofs_end2, external_functions): - of_out = ofs_end1[0] - of_in = ofs_end2[0] - passThroughLine = external_functions[0] - - for _ in range_(vector_size // line_size): - elemOut = of_out.acquire_produce(1) - elemIn = of_in.acquire_consume(1) - passThroughLine(elemIn, elemOut, line_size) - of_in.release_consume(1) - of_out.release_produce(1) - yield_([]) - - -worker_program = MyWorker(core_fn, [of0], [of1], [passthrough_fn], coords=(0, 2)) -inout_program = SimpleFifoInOutProgram(of0, vector_size, of1, vector_size) - - -my_program = MyProgram( - NPU1Col1(), worker_programs=[worker_program], inout_program=inout_program -) -my_program.resolve_program()