From f0a60bacc912f194596211f305d15973f5f3fa93 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 11:58:31 -0700 Subject: [PATCH 01/23] Adding flag to run command --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/reduction/aie2.py | 2 +- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 8c41a9868e..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py index 03a25b90db..0a8c1112d8 100644 --- a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py index 3f04ed0f1f..eb5440e4cd 100644 --- a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 8b91d2e434..84a1f12db7 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # XFAIL: * diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index d7eae0bc31..d22c2848bf 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # XFAIL: * diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index 4fba84bb83..0ecb7adcd2 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # XFAIL: * From 21b5a0fdf443ce609b1a25f1fc3e9c8758360ae9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 12:26:24 -0700 Subject: [PATCH 02/23] Correcting the CHECK messsage --- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 84a1f12db7..37222b8a78 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -12,7 +12,7 @@ # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s -# XFAIL: * +# CHECK: PASS! from aie.dialects.aie import * from aie.dialects.aiex import * from aie.helpers.dialects.ext.scf import _for as range_ diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index d22c2848bf..c93e1b21c9 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -12,7 +12,7 @@ # RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s -# XFAIL: * +# CHECK: PASS! import numpy as np from aie.dialects.aie import * diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index 0ecb7adcd2..d0b0f53d36 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -12,7 +12,7 @@ # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s -# XFAIL: * +# CHECK: PASS! import numpy as np from aie.dialects.aie import * From f5d41ebdd84afd174db5a6b40f5927a8eda114a6 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 12:57:05 -0700 Subject: [PATCH 03/23] Verifying as programming example --- .../dyn_objFifo/nested_loops/Makefile | 66 +++++++++ .../dyn_objFifo/nested_loops/aie2.py | 73 +++++++++ .../dyn_objFifo/nested_loops/kernel.cc | 22 +++ .../dyn_objFifo/nested_loops/test.cpp | 139 ++++++++++++++++++ 4 files changed, 300 insertions(+) create mode 100644 programming_examples/dyn_objFifo/nested_loops/Makefile create mode 100644 programming_examples/dyn_objFifo/nested_loops/aie2.py create mode 100644 programming_examples/dyn_objFifo/nested_loops/kernel.cc create mode 100644 programming_examples/dyn_objFifo/nested_loops/test.cpp diff --git a/programming_examples/dyn_objFifo/nested_loops/Makefile b/programming_examples/dyn_objFifo/nested_loops/Makefile new file mode 100644 index 0000000000..0216ac75da --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/nested_loops/aie2.py b/programming_examples/dyn_objFifo/nested_loops/aie2.py new file mode 100644 index 0000000000..dee6558c3e --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/aie2.py @@ -0,0 +1,73 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 AMD Inc. + +# REQUIRES: ryzen_ai, valid_xchess_license +# +# RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o +# RUN: %python %S/aie2.py > ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: %run_on_npu ./test.exe | FileCheck %s +# CHECK: PASS! +import numpy as np + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.helpers.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 50 +O = 250 +n_rows = 5 +dev = AIEDevice.npu1_1col +col = 0 + + +def nested_loops(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + tensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 2, tensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty) + + # AIE Core Function declarations + passthrough_10_i32 = external_func( + "passthrough_10_i32", inputs=[tensor_ty, tensor_ty] + ) + + # Set up compute tiles + @core(ComputeTile, "kernel.o") + def core_body(): + for _ in range_(5): + elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + for _ in range_(5): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + passthrough_10_i32(elemIn, elemOut) + of_out.release(ObjectFifoPort.Produce, 1) + of_in.release(ObjectFifoPort.Consume, 1) + + # To/from AIE-array data movement + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd( + metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N], issue_token=True + ) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, O]) + dma_wait(of_in, of_out) + + print(ctx.module) + + +nested_loops() diff --git a/programming_examples/dyn_objFifo/nested_loops/kernel.cc b/programming_examples/dyn_objFifo/nested_loops/kernel.cc new file mode 100644 index 0000000000..d5a796add2 --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/kernel.cc @@ -0,0 +1,22 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void passthrough(const T_in *__restrict in, T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = in[i]; + } +} + +extern "C" { + +void passthrough_10_i32(const int *__restrict in, int *__restrict out) { + passthrough(in, out); +} +} \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/nested_loops/test.cpp b/programming_examples/dyn_objFifo/nested_loops/test.cpp new file mode 100644 index 0000000000..ecd9a90d51 --- /dev/null +++ b/programming_examples/dyn_objFifo/nested_loops/test.cpp @@ -0,0 +1,139 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (50 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (250 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH; j++) { + buf_input[i * WIDTH + j] = i; + std::cout << buf_input[i * WIDTH + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + int expected_output = 0; + int five_repetitions = 0; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + if (five_repetitions == 5) { + expected_output++; + five_repetitions = 0; + } + for (int j = 0; j < WIDTH; j++) { + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * WIDTH + j] << std::endl; + pass &= buf_output[i * WIDTH + j] == expected_output; + } + std::cout << std::endl << std::endl; + five_repetitions++; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} \ No newline at end of file From 77513c8a9346feaa52c6e17528556624013ed778 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 13:48:24 -0700 Subject: [PATCH 04/23] Checking the flags that caused the issue --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index dee6558c3e..19945d46c2 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! From 29cf679c03f97883c335ea7a0e9cb4e660c62dac Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 14:24:53 -0700 Subject: [PATCH 05/23] Revert "Checking the flags that caused the issue" This reverts commit 77513c8a9346feaa52c6e17528556624013ed778. --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 19945d46c2..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! From 1db8fbf8bacc500fc004b4dd76bdd51672b94fcf Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 14:29:26 -0700 Subject: [PATCH 06/23] Maybe clang version --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index dee6558c3e..8f1146f34a 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,8 +10,8 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! import numpy as np From a3bd5192038d0f0b81a58a831b573ee62a7529fc Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Tue, 5 Nov 2024 14:39:52 -0700 Subject: [PATCH 07/23] Revert "Maybe clang version" This reverts commit 1db8fbf8bacc500fc004b4dd76bdd51672b94fcf. --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 8f1146f34a..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,8 +10,8 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np From 8945f79d57cc38d5e3cda41ef35b9f865adccf2a Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 09:33:00 -0700 Subject: [PATCH 08/23] may be compiler --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index dee6558c3e..c390a745fd 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,7 +10,7 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: g++ %S/test.cpp -o test.exe -std=c++13 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np From 46b1a921e7ffca26a70d2f916a3dda0dc10e67f9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 09:47:56 -0700 Subject: [PATCH 09/23] one other check --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index c390a745fd..a1338bcf39 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,7 +10,7 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: g++ %S/test.cpp -o test.exe -std=c++13 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: g++-13 -ggdb %S/test.cpp -o test.exe -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np From 7fee57d1579bbbdd085da3a4f0f6dae8b5b8f983 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 10:42:07 -0700 Subject: [PATCH 10/23] Same run command for all --- test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index a1338bcf39..dee6558c3e 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -10,7 +10,7 @@ # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir -# RUN: g++-13 -ggdb %S/test.cpp -o test.exe -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! import numpy as np diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index c93e1b21c9..366552907b 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! From 60fc2918bcd0ffd3fabbf592a5c02c313dec97f7 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 11:41:15 -0700 Subject: [PATCH 11/23] change chess --- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 37222b8a78..c6dd53838d 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -5,7 +5,7 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_xchess_license +# REQUIRES: ryzen_ai, chess # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index c93e1b21c9..366552907b 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -9,7 +9,7 @@ # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir -# RUN: %python --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir +# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags # RUN: %run_on_npu ./test.exe | FileCheck %s # CHECK: PASS! diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index d0b0f53d36..f9539fecfe 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -5,7 +5,7 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_xchess_license +# REQUIRES: ryzen_ai, chess # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir From da418086ff1f98c74cfea8b9178ade2e17882b87 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 11:42:41 -0700 Subject: [PATCH 12/23] Missed file --- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index 366552907b..1c8922df1b 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -5,7 +5,7 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_xchess_license +# REQUIRES: ryzen_ai, chess # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir From d13e6076dadb314a129f4650dc08481ada0570a9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 12:10:19 -0700 Subject: [PATCH 13/23] Checking --- test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py | 2 +- .../dynamic_object_fifo/sliding_window_conditional/aie2.py | 2 +- .../npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index c6dd53838d..7baa366452 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -11,7 +11,7 @@ # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! from aie.dialects.aie import * from aie.dialects.aiex import * diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index 1c8922df1b..3f1159da13 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -11,7 +11,7 @@ # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! import numpy as np diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index f9539fecfe..3c53c21cd8 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -11,7 +11,7 @@ # RUN: %python %S/aie2.py > ./aie2.mlir # RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --no-compile-host --aie-generate-xclbin --xclbin-name=final.xclbin --dynamic-objFifos --aie-generate-npu --npu-insts-name=insts.txt ./aie2.mlir # RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags -# RUN: %run_on_npu ./test.exe | FileCheck %s +# RUN: %run_on_npu ./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s # CHECK: PASS! import numpy as np From dd87d0b71544cb453dd0a5c3fda0cd5fe2389dc9 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 14:38:08 -0700 Subject: [PATCH 14/23] Checking with working test file --- .../two_core_sliding_window/test.cpp | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp index 648924ac4f..ccf951d8dd 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "final.xclbin" +#define XCLBIN "build/final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "insts.txt" +#define INSTS_TXT "build/insts.txt" #endif #ifndef KERNEL_NAME @@ -33,9 +33,24 @@ #include "test_utils.h" +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + int main(int argc, const char *argv[]) { - std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); + std::vector instr_v = load_instr_sequence(INSTS_TXT); assert(instr_v.size() > 0); // Get a device handle From 30b7f84e25b139f386b97733adc06b51cab927c0 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 15:04:20 -0700 Subject: [PATCH 15/23] Missing:: --- .../sliding_window/test.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp index 648924ac4f..2ffbd6ba56 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "final.xclbin" +#define XCLBIN "build/final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "insts.txt" +#define INSTS_TXT "build/insts.txt" #endif #ifndef KERNEL_NAME @@ -33,6 +33,21 @@ #include "test_utils.h" +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + int main(int argc, const char *argv[]) { std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); From 8f4df8a5fe44dee60f570ed52ffb655954f67a43 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 16:07:31 -0700 Subject: [PATCH 16/23] Instr load problem --- .../dynamic_object_fifo/sliding_window_conditional/test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp index 648924ac4f..6668c96421 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp @@ -25,9 +25,10 @@ #define KERNEL_NAME "MLIR_AIE" #endif -#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE From 401a55b61d4121239b0b4750cb0fd677165dfb0a Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 16:10:36 -0700 Subject: [PATCH 17/23] format --- .../dynamic_object_fifo/sliding_window_conditional/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp index 6668c96421..c25d9358f6 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/test.cpp @@ -25,7 +25,7 @@ #define KERNEL_NAME "MLIR_AIE" #endif -#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes #define WIDTH 10 From 2b6e8ec7c58cddf009781e80b2f8f85a85d16e6c Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 16:24:02 -0700 Subject: [PATCH 18/23] Rest of them --- .../sliding_window/test.cpp | 20 +++-------------- .../two_core_sliding_window/test.cpp | 22 ++++--------------- 2 files changed, 7 insertions(+), 35 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp index 2ffbd6ba56..c25d9358f6 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "build/final.xclbin" +#define XCLBIN "final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" +#define INSTS_TXT "insts.txt" #endif #ifndef KERNEL_NAME @@ -28,26 +28,12 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE #include "test_utils.h" -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} - int main(int argc, const char *argv[]) { std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp index ccf951d8dd..c25d9358f6 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "build/final.xclbin" +#define XCLBIN "final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "build/insts.txt" +#define INSTS_TXT "insts.txt" #endif #ifndef KERNEL_NAME @@ -28,29 +28,15 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE #include "test_utils.h" -std::vector load_instr_sequence(std::string instr_path) { - std::ifstream instr_file(instr_path); - std::string line; - std::vector instr_v; - while (std::getline(instr_file, line)) { - std::istringstream iss(line); - uint32_t a; - if (!(iss >> std::hex >> a)) { - throw std::runtime_error("Unable to parse instruction file\n"); - } - instr_v.push_back(a); - } - return instr_v; -} - int main(int argc, const char *argv[]) { - std::vector instr_v = load_instr_sequence(INSTS_TXT); + std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); assert(instr_v.size() > 0); // Get a device handle From a8fabc07aaad4fea1f70af8db4c66be64706511b Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 6 Nov 2024 17:00:46 -0700 Subject: [PATCH 19/23] Old test --- .../sliding_window/test.cpp | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp index c25d9358f6..5c78b0e986 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/test.cpp @@ -14,11 +14,11 @@ #include "xrt/xrt_kernel.h" #ifndef XCLBIN -#define XCLBIN "final.xclbin" +#define XCLBIN "build/final.xclbin" #endif #ifndef INSTS_TXT -#define INSTS_TXT "insts.txt" +#define INSTS_TXT "build/insts.txt" #endif #ifndef KERNEL_NAME @@ -28,15 +28,27 @@ #define INPUT_SIZE (100 * sizeof(int)) // in bytes #define OUTPUT_SIZE (100 * sizeof(int)) // in bytes #define WIDTH_SIZE (10 * sizeof(int)) // in bytes -#define WIDTH 10 #define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE #define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE -#include "test_utils.h" +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} int main(int argc, const char *argv[]) { - std::vector instr_v = test_utils::load_instr_sequence(INSTS_TXT); + std::vector instr_v = load_instr_sequence(INSTS_TXT); assert(instr_v.size() > 0); // Get a device handle From 3b4f1afaf137e0f00d2bec02a60b4d3d5c84d18c Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Tue, 12 Nov 2024 13:25:00 -0700 Subject: [PATCH 20/23] Update makefile-common --- test/npu-xrt/makefile-common | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/test/npu-xrt/makefile-common b/test/npu-xrt/makefile-common index 51e9a19245..bdde6760b6 100644 --- a/test/npu-xrt/makefile-common +++ b/test/npu-xrt/makefile-common @@ -1,17 +1,19 @@ -# Contains common definitions used across the Makefiles of npu-xrt tests. - # VITIS related variables -VITIS_ROOT ?= $(shell realpath $(dir $(shell which vitis))/../) -VITIS_AIETOOLS_DIR ?= ${VITIS_ROOT}/aietools -VITIS_AIE_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/versal_prod/lib -VITIS_AIE2_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/aie_ml/lib +AIETOOLS_DIR ?= $(shell realpath $(dir $(shell which xchesscc))/../) +AIE_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/versal_prod/lib +AIE2_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/aie_ml/lib + +AIEOPT_DIR ?= $(shell realpath $(dir $(shell which aie-opt))/..) + +WARNING_FLAGS = -Wno-parentheses -Wno-attributes -Wno-macro-redefined -CHESSCC1_FLAGS = -f -p me -P ${VITIS_AIE_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include -CHESSCC2_FLAGS = -f -p me -P ${VITIS_AIE2_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 -CHESS_FLAGS = -P ${VITIS_AIE_INCLUDE_DIR} +CHESSCC1_FLAGS = -f -p me -P ${AIE_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include +CHESSCC2_FLAGS = -f -p me -P ${AIE2_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 +CHESS_FLAGS = -P ${AIE_INCLUDE_DIR} -CHESSCCWRAP1_FLAGS = aie -I ${VITIS_AIETOOLS_DIR}/include -CHESSCCWRAP2_FLAGS = aie2 -I ${VITIS_AIETOOLS_DIR}/include +CHESSCCWRAP1_FLAGS = aie -I ${AIETOOLS_DIR}/include +CHESSCCWRAP2_FLAGS = aie2 -I ${AIETOOLS_DIR}/include +PEANOWRAP2_FLAGS = -O2 -v -std=c++20 --target=aie2-none-unknown-elf ${WARNING_FLAGS} -DNDEBUG -I ${AIEOPT_DIR}/include TEST_POWERSHELL := $(shell command -v powershell.exe >/dev/null 2>&1 && echo yes || echo no) ifeq ($(TEST_POWERSHELL),yes) From dced95e6e71ce1b3b1984ae97194372ecf50338a Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Wed, 13 Nov 2024 13:19:27 -0700 Subject: [PATCH 21/23] Revert "Update makefile-common" This reverts commit 3b4f1afaf137e0f00d2bec02a60b4d3d5c84d18c. --- test/npu-xrt/makefile-common | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/test/npu-xrt/makefile-common b/test/npu-xrt/makefile-common index bdde6760b6..51e9a19245 100644 --- a/test/npu-xrt/makefile-common +++ b/test/npu-xrt/makefile-common @@ -1,19 +1,17 @@ -# VITIS related variables -AIETOOLS_DIR ?= $(shell realpath $(dir $(shell which xchesscc))/../) -AIE_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/versal_prod/lib -AIE2_INCLUDE_DIR ?= ${AIETOOLS_DIR}/data/aie_ml/lib - -AIEOPT_DIR ?= $(shell realpath $(dir $(shell which aie-opt))/..) +# Contains common definitions used across the Makefiles of npu-xrt tests. -WARNING_FLAGS = -Wno-parentheses -Wno-attributes -Wno-macro-redefined +# VITIS related variables +VITIS_ROOT ?= $(shell realpath $(dir $(shell which vitis))/../) +VITIS_AIETOOLS_DIR ?= ${VITIS_ROOT}/aietools +VITIS_AIE_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/versal_prod/lib +VITIS_AIE2_INCLUDE_DIR ?= ${VITIS_ROOT}/aietools/data/aie_ml/lib -CHESSCC1_FLAGS = -f -p me -P ${AIE_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include -CHESSCC2_FLAGS = -f -p me -P ${AIE2_INCLUDE_DIR} -I ${AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 -CHESS_FLAGS = -P ${AIE_INCLUDE_DIR} +CHESSCC1_FLAGS = -f -p me -P ${VITIS_AIE_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include +CHESSCC2_FLAGS = -f -p me -P ${VITIS_AIE2_INCLUDE_DIR} -I ${VITIS_AIETOOLS_DIR}/include -D__AIENGINE__=2 -D__AIEARCH__=20 +CHESS_FLAGS = -P ${VITIS_AIE_INCLUDE_DIR} -CHESSCCWRAP1_FLAGS = aie -I ${AIETOOLS_DIR}/include -CHESSCCWRAP2_FLAGS = aie2 -I ${AIETOOLS_DIR}/include -PEANOWRAP2_FLAGS = -O2 -v -std=c++20 --target=aie2-none-unknown-elf ${WARNING_FLAGS} -DNDEBUG -I ${AIEOPT_DIR}/include +CHESSCCWRAP1_FLAGS = aie -I ${VITIS_AIETOOLS_DIR}/include +CHESSCCWRAP2_FLAGS = aie2 -I ${VITIS_AIETOOLS_DIR}/include TEST_POWERSHELL := $(shell command -v powershell.exe >/dev/null 2>&1 && echo yes || echo no) ifeq ($(TEST_POWERSHELL),yes) From 98f8b0d2cd78666e7f4aef2a6712a6b2347ec708 Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 13 Nov 2024 19:32:06 -0700 Subject: [PATCH 22/23] Testing sliding window test cases as examples --- .../dyn_objFifo/sliding_window/Makefile | 66 +++++++++ .../dyn_objFifo/sliding_window/aie2.py | 76 ++++++++++ .../dyn_objFifo/sliding_window/kernel.cc | 24 +++ .../dyn_objFifo/sliding_window/test.cpp | 138 ++++++++++++++++++ .../sliding_window_conditional/Makefile | 66 +++++++++ .../sliding_window_conditional/aie2.py | 74 ++++++++++ .../sliding_window_conditional/kernel.cc | 24 +++ .../sliding_window_conditional/test.cpp | 138 ++++++++++++++++++ .../two_core_sliding_window/Makefile | 66 +++++++++ .../two_core_sliding_window/aie2.py | 90 ++++++++++++ .../two_core_sliding_window/kernel.cc | 38 +++++ .../two_core_sliding_window/test.cpp | 138 ++++++++++++++++++ 12 files changed, 938 insertions(+) create mode 100644 programming_examples/dyn_objFifo/sliding_window/Makefile create mode 100644 programming_examples/dyn_objFifo/sliding_window/aie2.py create mode 100644 programming_examples/dyn_objFifo/sliding_window/kernel.cc create mode 100644 programming_examples/dyn_objFifo/sliding_window/test.cpp create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/Makefile create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc create mode 100644 programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/Makefile create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc create mode 100644 programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp diff --git a/programming_examples/dyn_objFifo/sliding_window/Makefile b/programming_examples/dyn_objFifo/sliding_window/Makefile new file mode 100644 index 0000000000..0216ac75da --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window/aie2.py b/programming_examples/dyn_objFifo/sliding_window/aie2.py new file mode 100644 index 0000000000..08d92c73e1 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/aie2.py @@ -0,0 +1,76 @@ +# dynamic_object_fifo/sliding_window/aie2.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 100 +n_rows = 10 +dev = AIEDevice.npu1_1col +col = 0 + + +def sliding_window(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(N // n_rows, T.i32()) + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + + # AIE Core Function declarations + add_10_i32 = external_func( + "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + ) + + # Set up compute tiles + + @core(ComputeTile, "kernel.o") + def core_body(): + elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) + elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) + call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + of_out.release(ObjectFifoPort.Produce, 1) + + for _ in range_(8): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + of_in.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) + + elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + of_in.release(ObjectFifoPort.Consume, 2) + of_out.release(ObjectFifoPort.Produce, 1) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +sliding_window() diff --git a/programming_examples/dyn_objFifo/sliding_window/kernel.cc b/programming_examples/dyn_objFifo/sliding_window/kernel.cc new file mode 100644 index 0000000000..ddb474e102 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/kernel.cc @@ -0,0 +1,24 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void add(const T_in *__restrict inA, const T_in *__restrict inB, + T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = inA[i] + inB[i]; + } +} + +extern "C" { + +void add_10_i32(const int *__restrict inA, const int *__restrict inB, + int *__restrict out) { + add(inA, inB, out); +} +} diff --git a/programming_examples/dyn_objFifo/sliding_window/test.cpp b/programming_examples/dyn_objFifo/sliding_window/test.cpp new file mode 100644 index 0000000000..3cd72ab880 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window/test.cpp @@ -0,0 +1,138 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { + buf_input[i * INPUT_ROWS + j] = i; + std::cout << buf_input[i * INPUT_ROWS + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { + int expected_output = 0; + if (i == 0) { + expected_output = buf_input[i * INPUT_ROWS] * 2; + } else { + expected_output = + buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; + } + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; + pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; + } + std::cout << std::endl << std::endl; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile b/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile new file mode 100644 index 0000000000..0216ac75da --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build \ No newline at end of file diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py new file mode 100644 index 0000000000..8ab2dfa636 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py @@ -0,0 +1,74 @@ +# dynamic_object_fifo/sliding_window/aie2.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 100 +n_rows = 10 +dev = AIEDevice.npu1_1col +col = 0 + + +def sliding_window(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(N // n_rows, T.i32()) + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + + # AIE Core Function declarations + add_10_i32 = external_func( + "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + ) + + # Set up compute tiles + + @core(ComputeTile, "kernel.o") + def core_body(): + for i in range_(10): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + if i == 0: + elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) + call(add_10_i32, [elemInPre, elemInPre, elemOut]) + elif i == 9: + elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOut]) + of_in.release(ObjectFifoPort.Consume, 2) + else: + elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + of_in.release(ObjectFifoPort.Consume, 1) + + of_out.release(ObjectFifoPort.Produce, 1) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +sliding_window() diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc b/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc new file mode 100644 index 0000000000..ddb474e102 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/kernel.cc @@ -0,0 +1,24 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void add(const T_in *__restrict inA, const T_in *__restrict inB, + T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = inA[i] + inB[i]; + } +} + +extern "C" { + +void add_10_i32(const int *__restrict inA, const int *__restrict inB, + int *__restrict out) { + add(inA, inB, out); +} +} diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp b/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp new file mode 100644 index 0000000000..3cd72ab880 --- /dev/null +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/test.cpp @@ -0,0 +1,138 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { + buf_input[i * INPUT_ROWS + j] = i; + std::cout << buf_input[i * INPUT_ROWS + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { + int expected_output = 0; + if (i == 0) { + expected_output = buf_input[i * INPUT_ROWS] * 2; + } else { + expected_output = + buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; + } + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; + pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; + } + std::cout << std::endl << std::endl; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile b/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile new file mode 100644 index 0000000000..4e423e1df1 --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/Makefile @@ -0,0 +1,66 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +# --- + +# The following environment variables that point to the Xilinx runtime (XRT) +# should be set up by an environment setup script already. +XILINX_XRT?=/opt/xilinx/xrt +XILINX_VITIS?=$(shell realpath $(dir $(shell which vitis))/../) + +# --- + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +XILINX_XRT_INCLUDE?=${XILINX_XRT}/include +XILINX_XRT_LIB?=${XILINX_XRT}/lib + +CHESSCCWRAP2_FLAGS=aie2 -I${XILINX_VITIS}/aietools/include +XRT_FLAGS=-I${XILINX_XRT_INCLUDE} -L${XILINX_XRT_LIB} +XRT_LIBS=-lxrt_coreutil +CXX=g++-13 -ggdb + +#mlir_target?=build/aie.mlir +xclbin_target?=build/final.xclbin +insts_target?=build/insts.txt +host_target?=build/test + +.PHONY: all +all: ${xclbin_target} ${host_target} + +build/aie.mlir: ${srcdir}/aie2.py + mkdir -p ${@D} + python3 $< > $@ + +build/kernel.o: ${srcdir}/kernel.cc + mkdir -p ${@D} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} + +${xclbin_target}: build/aie.mlir build/kernel.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ + --dynamic-objFifos --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} ${<:%=../%} + +${host_target}: ${srcdir}/test.cpp ${xclbin_target} + mkdir -p ${@D} + ${CXX} ${XRT_FLAGS} -DM=$M -DN=$N -o $@ $< ${XRT_LIBS} + +.PHONY: run +run: ${host_target} + ./${host_target} + +xclbin_sign=${XILINX_XRT}/amdxdna/setup_xclbin_firmware.sh +.PHONY: sign +sign: ${xclbin_target} + ${xclbin_sign} -dev Phoenix -xclbin $< + +.PHONY: clean +clean: + -rm -r build diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py new file mode 100644 index 0000000000..e815fada7c --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py @@ -0,0 +1,90 @@ +# dynamic_object_fifo/two_core_sliding_window/aie2.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext.scf import _for as range_ +from aie.extras.context import mlir_mod_ctx + +N = 100 +n_rows = 10 +dev = AIEDevice.npu1_1col +col = 0 + + +def two_core_sliding_window(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(N // n_rows, T.i32()) + + # Tile declarations + ShimTile = tile(col, 0) + ComputeTile = tile(col, 2) + ComputeTile2 = tile(col, 4) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty) + of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, memRef_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) + + # AIE Core Function declarations + passthrough_10_i32 = external_func( + "passthrough_10_i32", inputs=[memRef_ty, memRef_ty] + ) + add_10_i32 = external_func( + "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + ) + + # Set up compute tiles + + @core(ComputeTile, "kernel.o") + def core_body(): + for _ in range_(10): + elemOut = of_in2.acquire(ObjectFifoPort.Produce, 1) + elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + call(passthrough_10_i32, [elemIn, elemOut]) + of_in.release(ObjectFifoPort.Consume, 1) + of_in2.release(ObjectFifoPort.Produce, 1) + + @core(ComputeTile2, "kernel.o") + def core_body(): + elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) + elemInPre = of_in2.acquire(ObjectFifoPort.Consume, 1) + call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + of_out.release(ObjectFifoPort.Produce, 1) + + for _ in range_(8): + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsIn = of_in2.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + of_in2.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) + + elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) + elemsInPost = of_in2.acquire(ObjectFifoPort.Consume, 2) + call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + of_in2.release(ObjectFifoPort.Consume, 2) + of_out.release(ObjectFifoPort.Produce, 1) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @runtime_sequence(tensor_ty, tensor_ty) + def sequence(A, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +two_core_sliding_window() diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc b/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc new file mode 100644 index 0000000000..7e4515193c --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/kernel.cc @@ -0,0 +1,38 @@ +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include + +template +void passthrough(const T_in *__restrict in, T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = in[i]; + } +} + +extern "C" { + +void passthrough_10_i32(const int *__restrict in, int *__restrict out) { + passthrough(in, out); +} +} + +template +void add(const T_in *__restrict inA, const T_in *__restrict inB, + T_out *__restrict out) { + for (int i = 0; i < N; i++) { + out[i] = inA[i] + inB[i]; + } +} + +extern "C" { + +void add_10_i32(const int *__restrict inA, const int *__restrict inB, + int *__restrict out) { + add(inA, inB, out); +} +} diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp b/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp new file mode 100644 index 0000000000..3cd72ab880 --- /dev/null +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/test.cpp @@ -0,0 +1,138 @@ +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD Inc. + +#include +#include +#include +#include + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +#ifndef XCLBIN +#define XCLBIN "build/final.xclbin" +#endif + +#ifndef INSTS_TXT +#define INSTS_TXT "build/insts.txt" +#endif + +#ifndef KERNEL_NAME +#define KERNEL_NAME "MLIR_AIE" +#endif + +#define INPUT_SIZE (100 * sizeof(int)) // in bytes +#define OUTPUT_SIZE (100 * sizeof(int)) // in bytes +#define WIDTH_SIZE (10 * sizeof(int)) // in bytes +#define INPUT_ROWS INPUT_SIZE / WIDTH_SIZE +#define OUTPUT_ROWS OUTPUT_SIZE / WIDTH_SIZE + +std::vector load_instr_sequence(std::string instr_path) { + std::ifstream instr_file(instr_path); + std::string line; + std::vector instr_v; + while (std::getline(instr_file, line)) { + std::istringstream iss(line); + uint32_t a; + if (!(iss >> std::hex >> a)) { + throw std::runtime_error("Unable to parse instruction file\n"); + } + instr_v.push_back(a); + } + return instr_v; +} + +int main(int argc, const char *argv[]) { + + std::vector instr_v = load_instr_sequence(INSTS_TXT); + assert(instr_v.size() > 0); + + // Get a device handle + unsigned int device_index = 0; + xrt::device device = xrt::device(device_index); + + // Load the xclbin + xrt::xclbin xclbin = xrt::xclbin(XCLBIN); + + // Get the kernel from the xclbin + std::vector xkernels = xclbin.get_kernels(); + xrt::xclbin::kernel xkernel = *std::find_if( + xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel &k) { + return k.get_name().rfind(KERNEL_NAME, 0) == 0; + }); + std::string kernel_name = xkernel.get_name(); + assert(strcmp(kernel_name.c_str(), KERNEL_NAME) == 0); + + device.register_xclbin(xclbin); + + // get a hardware context + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + auto kernel = xrt::kernel(context, kernel_name); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_input = + xrt::bo(device, INPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_output = + xrt::bo(device, OUTPUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + + int *buf_input = bo_input.map(); + std::cout << std::endl << std::endl << "Input: " << std::endl; + for (int i = 0; i < INPUT_ROWS; i++) { + std::cout << "row " << i << " : "; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_input[0]); j++) { + buf_input[i * INPUT_ROWS + j] = i; + std::cout << buf_input[i * INPUT_ROWS + j] << " "; + } + std::cout << std::endl << std::endl; + } + int *buf_output = bo_output.map(); + memset(buf_output, 0, OUTPUT_SIZE); + + // Instruction buffer for DMA configuration + void *buf_instr = bo_instr.map(); + memcpy(buf_instr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_input.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_output.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_input, bo_output); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_output.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + bool pass = true; + std::cout << std::endl << "Output: " << std::endl; + for (int i = 0; i < OUTPUT_ROWS; i++) { + std::cout << "row " << i << std::endl; + for (int j = 0; j < WIDTH_SIZE / sizeof(buf_output[0]); j++) { + int expected_output = 0; + if (i == 0) { + expected_output = buf_input[i * INPUT_ROWS] * 2; + } else { + expected_output = + buf_input[(i - 1) * INPUT_ROWS] + buf_input[i * INPUT_ROWS]; + } + std::cout << "expected: " << expected_output << ", "; + std::cout << "got: " << buf_output[i * OUTPUT_ROWS + j] << std::endl; + pass &= buf_output[i * OUTPUT_ROWS + j] == expected_output; + } + std::cout << std::endl << std::endl; + } + std::cout << std::endl << std::endl; + std::cout << (pass ? "PASS!" : "FAIL.") << std::endl; + + return 0; +} From 130211597211f7db93c7c9475bd1e0f4867793fb Mon Sep 17 00:00:00 2001 From: Pranathi Vasireddy Date: Wed, 13 Nov 2024 19:54:55 -0700 Subject: [PATCH 23/23] Updated aie2.py with the latest python bindings --- .../dyn_objFifo/sliding_window/aie2.py | 4 +-- .../sliding_window_conditional/aie2.py | 27 ++++++++-------- .../two_core_sliding_window/aie2.py | 32 +++++++++---------- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/programming_examples/dyn_objFifo/sliding_window/aie2.py b/programming_examples/dyn_objFifo/sliding_window/aie2.py index 08d92c73e1..57d5efb1a5 100644 --- a/programming_examples/dyn_objFifo/sliding_window/aie2.py +++ b/programming_examples/dyn_objFifo/sliding_window/aie2.py @@ -6,11 +6,9 @@ # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -import sys - from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.extras.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.ext.scf import _for as range_ from aie.extras.context import mlir_mod_ctx N = 100 diff --git a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py index 8ab2dfa636..83719bc8e8 100644 --- a/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py +++ b/programming_examples/dyn_objFifo/sliding_window_conditional/aie2.py @@ -6,11 +6,11 @@ # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.extras.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.ext.scf import _for as range_ from aie.extras.context import mlir_mod_ctx N = 100 @@ -24,49 +24,48 @@ def sliding_window(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) ComputeTile = tile(col, 2) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 3, subtensor_ty) + of_out = object_fifo("out", ComputeTile, ShimTile, 2, subtensor_ty) # AIE Core Function declarations add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] ) # Set up compute tiles - @core(ComputeTile, "kernel.o") def core_body(): for i in range_(10): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) if i == 0: elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOut]) + add_10_i32(elemInPre, elemInPre, elemOut) elif i == 9: elemsInPost = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOut]) + add_10_i32(elemsInPost[0], elemsInPost[1], elemOut) of_in.release(ObjectFifoPort.Consume, 2) else: elemsIn = of_in.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_out) print(ctx.module) diff --git a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py index e815fada7c..c0d7c805ee 100644 --- a/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py +++ b/programming_examples/dyn_objFifo/two_core_sliding_window/aie2.py @@ -6,11 +6,11 @@ # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -import sys +import numpy as np from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.extras.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.ext.scf import _for as range_ from aie.extras.context import mlir_mod_ctx N = 100 @@ -24,7 +24,7 @@ def two_core_sliding_window(): @device(dev) def device_body(): - memRef_ty = T.memref(N // n_rows, T.i32()) + subtensor_ty = np.ndarray[(N // n_rows,), np.dtype[np.int32]] # Tile declarations ShimTile = tile(col, 0) @@ -32,16 +32,16 @@ def device_body(): ComputeTile2 = tile(col, 4) # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile, 2, memRef_ty) - of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, memRef_ty) - of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) + of_in = object_fifo("in", ShimTile, ComputeTile, 2, subtensor_ty) + of_in2 = object_fifo("in2", ComputeTile, ComputeTile2, 3, subtensor_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, subtensor_ty) # AIE Core Function declarations passthrough_10_i32 = external_func( - "passthrough_10_i32", inputs=[memRef_ty, memRef_ty] + "passthrough_10_i32", inputs=[subtensor_ty, subtensor_ty] ) add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] ) # Set up compute tiles @@ -51,7 +51,7 @@ def core_body(): for _ in range_(10): elemOut = of_in2.acquire(ObjectFifoPort.Produce, 1) elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) - call(passthrough_10_i32, [elemIn, elemOut]) + passthrough_10_i32(elemIn, elemOut) of_in.release(ObjectFifoPort.Consume, 1) of_in2.release(ObjectFifoPort.Produce, 1) @@ -59,30 +59,30 @@ def core_body(): def core_body(): elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) elemInPre = of_in2.acquire(ObjectFifoPort.Consume, 1) - call(add_10_i32, [elemInPre, elemInPre, elemOutPre]) + add_10_i32(elemInPre, elemInPre, elemOutPre) of_out.release(ObjectFifoPort.Produce, 1) for _ in range_(8): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) elemsIn = of_in2.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsIn[0], elemsIn[1], elemOut]) + add_10_i32(elemsIn[0], elemsIn[1], elemOut) of_in2.release(ObjectFifoPort.Consume, 1) of_out.release(ObjectFifoPort.Produce, 1) elemOutPost = of_out.acquire(ObjectFifoPort.Produce, 1) elemsInPost = of_in2.acquire(ObjectFifoPort.Consume, 2) - call(add_10_i32, [elemsInPost[0], elemsInPost[1], elemOutPost]) + add_10_i32(elemsInPost[0], elemsInPost[1], elemOutPost) of_in2.release(ObjectFifoPort.Consume, 2) of_out.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement - tensor_ty = T.memref(N, T.i32()) + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] @runtime_sequence(tensor_ty, tensor_ty) def sequence(A, C): - npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - npu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata=of_in, bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata=of_out, bd_id=0, mem=C, sizes=[1, 1, 1, N]) + dma_wait(of_out) print(ctx.module)