Finish rewriting programming examples to use dma task

Xilinx · Nov 13, 2024 · 774e0e6 · 774e0e6
1 parent 88a033f
commit 774e0e6
Show file tree

Hide file tree

Showing 24 changed files with 1,721 additions and 7 deletions.
diff --git a/programming_examples/basic/dma_transpose/Makefile b/programming_examples/basic/dma_transpose/Makefile
@@ -20,7 +20,14 @@ targetname = dmaTranspose
 M ?= 64
 K ?= 32
 
-build/aie.mlir: ${srcdir}/aie2.py
+aie_py_src=aie2.py
+use_alt?=0
+
+ifeq (${use_alt}, 1)
+aie_py_src=aie2_alt.py
+endif
+
+build/aie.mlir: ${srcdir}/${aie_py_src}
 	mkdir -p ${@D}
 	python3 $< ${M} ${K} > $@
 

diff --git a/programming_examples/basic/dma_transpose/aie2_alt.py b/programming_examples/basic/dma_transpose/aie2_alt.py
@@ -0,0 +1,77 @@
+# dma_transpose/aie2.py -*- Python -*-
+#
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
+import numpy as np
+import sys
+
+from aie.dialects.aie import *
+from aie.dialects.aiex import *
+from aie.extras.context import mlir_mod_ctx
+from aie.helpers.dialects.ext.scf import _for as range_
+
+N = 4096
+M = 64
+K = 64
+
+if len(sys.argv) == 3:
+    M = int(sys.argv[1])
+    K = int(sys.argv[2])
+    N = M * K
+
+tensor_ty = np.ndarray[(M, K), np.dtype[np.int32]]
+
+
+def my_passthrough():
+    with mlir_mod_ctx() as ctx:
+
+        @device(AIEDevice.npu1_1col)
+        def device_body():
+            # Tile declarations
+            ShimTile = tile(0, 0)
+            ComputeTile2 = tile(0, 2)
+
+            # AIE-array data movement with object fifos
+            of_in = object_fifo("in", ShimTile, ComputeTile2, 2, tensor_ty)
+            of_out = object_fifo("out", ComputeTile2, ShimTile, 2, tensor_ty)
+            object_fifo_link(of_in, of_out)
+
+            # Set up compute tiles
+
+            # Compute tile 2
+            @core(ComputeTile2)
+            def core_body():
+                for _ in range_(sys.maxsize):
+                    pass
+
+            # To/from AIE-array data movement
+            @runtime_sequence(tensor_ty, tensor_ty, tensor_ty)
+            def sequence(A, B, C):
+                # The strides below are configured to read across all rows in the same column
+                # Stride of K in dim/wrap 2 skips an entire row to read a full column
+                in_task = dma_configure_task_for(of_in, issue_token=True)
+                with bds(in_task) as bd:
+                    with bd[0]:
+                        shim_dma_bd(
+                            A,
+                            sizes=[1, 1, K, M],
+                            strides=[1, 1, 1, K],
+                        )
+                        EndOp()
+
+                out_task = dma_configure_task_for(of_out, issue_token=True)
+                with bds(out_task) as bd:
+                    with bd[0]:
+                        shim_dma_bd(C, sizes=[1, 1, 1, N])
+                        EndOp()
+
+                dma_start_task(in_task, out_task)
+                dma_await_task(in_task, out_task)
+
+    print(ctx.module)
+
+
+my_passthrough()
diff --git a/programming_examples/basic/dma_transpose/run_makefile_alt.lit b/programming_examples/basic/dma_transpose/run_makefile_alt.lit
@@ -0,0 +1,12 @@
+// (c) Copyright 2024 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: ryzen_ai, peano 
+//
+// RUN: mkdir -p test_alt
+// RUN: cd test_alt
+// RUN: make -f %S/Makefile clean
+// RUN: env use_alt=1 make -f %S/Makefile 
+// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s
+// CHECK: PASS!
+
diff --git a/programming_examples/basic/matrix_multiplication/cascade/Makefile b/programming_examples/basic/matrix_multiplication/cascade/Makefile
@@ -22,6 +22,11 @@ n_aie_cols?=4
 kernels=mm_${m}x${k}x${n}
 aieargs+=-m $m -k $k -n $n --n-aie-cols ${n_aie_cols}
 target_suffix=${M}x${K}x${N}_${m}x${k}x${n}_${n_aie_cols}c
+use_alt?=0
+
+ifeq (${use_alt}, 1)
+aie_py_src=aie2_alt.py
+endif
 
 include ${srcdir}/../makefile-common