Support new python officially in passthrough kernel

Xilinx · Sep 13, 2024 · 602c5b1 · 602c5b1
1 parent 57c53a2
commit 602c5b1
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 152 deletions.
diff --git a/programming_examples/basic/passthrough_kernel/Makefile b/programming_examples/basic/passthrough_kernel/Makefile
@@ -22,11 +22,11 @@ PASSTHROUGH_SIZE = ${data_size}
 
 all: build/final_${data_size}.xclbin
 
-build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/iron_objs.py
+build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/aie2.py
 	mkdir -p ${@D}
 	python3 $< ${data_size} 0 > $@
 
-build/aie_trace__lineBased_8b_${data_size}.mlir: ${srcdir}/iron_objs.py
+build/aie_trace__lineBased_8b_${data_size}.mlir: ${srcdir}/aie2.py
 	mkdir -p ${@D}
 	python3 $< ${data_size} ${trace_size} > $@
 

diff --git a/programming_examples/basic/passthrough_kernel/aie2.py b/programming_examples/basic/passthrough_kernel/aie2.py
@@ -5,94 +5,62 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
 # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
-
 import sys
+import numpy as np
 
-from aie.dialects.aie import *
-from aie.dialects.aiex import *
-from aie.dialects.scf import *
-from aie.extras.context import mlir_mod_ctx
-
-import aie.utils.trace as trace_utils
-
+from aie.dialects.scf import for_ as range_
+from aie.dialects.scf import yield_
 
-def passthroughKernel(vector_size, trace_size):
-    N = vector_size
-    lineWidthInBytes = N // 4  # chop input in 4 sub-tensors
+from aie.api.dataflow.inout.simplefifoinout import SimpleFifoInOutProgram
+from aie.api.dataflow.objectfifo import MyObjectFifo
+from aie.api.kernels.binkernel import BinKernel
+from aie.api.phys.device import NPU1Col1
+from aie.api.program import MyProgram
+from aie.api.worker import MyWorker
 
-    @device(AIEDevice.npu1_1col)
-    def device_body():
-        # define types
-        memRef_ty = T.memref(lineWidthInBytes, T.ui8())
-
-        # AIE Core Function declarations
-        passThroughLine = external_func(
-            "passThroughLine", inputs=[memRef_ty, memRef_ty, T.i32()]
-        )
+try:
+    vector_size = int(sys.argv[1])
+    if vector_size % 64 != 0 or vector_size < 512:
+        print("Vector size must be a multiple of 64 and greater than or equal to 512")
+        raise ValueError
+except ValueError:
+    print("Argument has inappropriate value")
 
-        # Tile declarations
-        ShimTile = tile(0, 0)
-        ComputeTile2 = tile(0, 2)
+assert vector_size % 4 == 0
+line_size = vector_size // 4
 
-        # Set up a circuit-switched flow from core to shim for tracing information
-        if trace_size > 0:
-            flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1)
+inout_type = ((vector_size,), np.uint8)
+fifo_memref_type = ((line_size,), np.uint8)
 
-        # AIE-array data movement with object fifos
-        of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty)
-        of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty)
+of0 = MyObjectFifo(2, memref_type=fifo_memref_type, name="out")
+of1 = MyObjectFifo(2, memref_type=fifo_memref_type, name="in")
 
-        # Set up compute tiles
+passthrough_fn = BinKernel(
+    "passThroughLine",
+    "passThrough.cc.o",
+    [fifo_memref_type, fifo_memref_type, np.int32],
+)
 
-        # Compute tile 2
-        @core(ComputeTile2, "passThrough.cc.o")
-        def core_body():
-            for _ in for_(sys.maxsize):
-                elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
-                elemIn = of_in.acquire(ObjectFifoPort.Consume, 1)
-                call(passThroughLine, [elemIn, elemOut, lineWidthInBytes])
-                of_in.release(ObjectFifoPort.Consume, 1)
-                of_out.release(ObjectFifoPort.Produce, 1)
-                yield_([])
 
-        #    print(ctx.module.operation.verify())
+def core_fn(ofs_end1, ofs_end2, external_functions):
+    of_out = ofs_end1[0]
+    of_in = ofs_end2[0]
+    passThroughLine = external_functions[0]
 
-        tensor_ty = T.memref(N, T.ui8())
+    for _ in range_(vector_size // line_size):
+        elemOut = of_out.acquire_produce(1)
+        elemIn = of_in.acquire_consume(1)
+        passThroughLine(elemIn, elemOut, line_size)
+        of_in.release_consume(1)
+        of_out.release_produce(1)
+        yield_([])
 
-        @runtime_sequence(tensor_ty, tensor_ty, tensor_ty)
-        def sequence(inTensor, outTensor, notUsed):
-            if trace_size > 0:
-                trace_utils.configure_simple_tracing_aie2(
-                    ComputeTile2,
-                    ShimTile,
-                    ddr_id=1,
-                    size=trace_size,
-                    offset=N,
-                )
 
-            npu_dma_memcpy_nd(
-                metadata="in",
-                bd_id=0,
-                mem=inTensor,
-                sizes=[1, 1, 1, N],
-            )
-            npu_dma_memcpy_nd(
-                metadata="out",
-                bd_id=1,
-                mem=outTensor,
-                sizes=[1, 1, 1, N],
-            )
-            npu_sync(column=0, row=0, direction=0, channel=0)
+worker_program = MyWorker(core_fn, [of0], [of1], [passthrough_fn], coords=(0, 2))
+inout_program = SimpleFifoInOutProgram(of0, vector_size, of1, vector_size)
 
 
-try:
-    vector_size = int(sys.argv[1])
-    if vector_size % 64 != 0 or vector_size < 512:
-        print("Vector size must be a multiple of 64 and greater than or equal to 512")
-        raise ValueError
-    trace_size = 0 if (len(sys.argv) != 3) else int(sys.argv[2])
-except ValueError:
-    print("Argument has inappropriate value")
-with mlir_mod_ctx() as ctx:
-    passthroughKernel(vector_size, trace_size)
-    print(ctx.module)
+my_program = MyProgram(
+    NPU1Col1(), worker_programs=[worker_program], inout_program=inout_program
+)
+my_program.resolve_program()
diff --git a/programming_examples/basic/passthrough_kernel/iron_objs.py b/programming_examples/basic/passthrough_kernel/iron_objs.py