Skip to content

Commit

Permalink
Support new python officially in passthrough kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
hunhoffe committed Sep 13, 2024
1 parent 57c53a2 commit 602c5b1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 152 deletions.
4 changes: 2 additions & 2 deletions programming_examples/basic/passthrough_kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ PASSTHROUGH_SIZE = ${data_size}

all: build/final_${data_size}.xclbin

build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/iron_objs.py
build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/aie2.py
mkdir -p ${@D}
python3 $< ${data_size} 0 > $@

build/aie_trace__lineBased_8b_${data_size}.mlir: ${srcdir}/iron_objs.py
build/aie_trace__lineBased_8b_${data_size}.mlir: ${srcdir}/aie2.py
mkdir -p ${@D}
python3 $< ${data_size} ${trace_size} > $@

Expand Down
120 changes: 44 additions & 76 deletions programming_examples/basic/passthrough_kernel/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,94 +5,62 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

import sys
import numpy as np

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.context import mlir_mod_ctx

import aie.utils.trace as trace_utils

from aie.dialects.scf import for_ as range_
from aie.dialects.scf import yield_

def passthroughKernel(vector_size, trace_size):
N = vector_size
lineWidthInBytes = N // 4 # chop input in 4 sub-tensors
from aie.api.dataflow.inout.simplefifoinout import SimpleFifoInOutProgram
from aie.api.dataflow.objectfifo import MyObjectFifo
from aie.api.kernels.binkernel import BinKernel
from aie.api.phys.device import NPU1Col1
from aie.api.program import MyProgram
from aie.api.worker import MyWorker

@device(AIEDevice.npu1_1col)
def device_body():
# define types
memRef_ty = T.memref(lineWidthInBytes, T.ui8())

# AIE Core Function declarations
passThroughLine = external_func(
"passThroughLine", inputs=[memRef_ty, memRef_ty, T.i32()]
)
try:
vector_size = int(sys.argv[1])
if vector_size % 64 != 0 or vector_size < 512:
print("Vector size must be a multiple of 64 and greater than or equal to 512")
raise ValueError
except ValueError:
print("Argument has inappropriate value")

# Tile declarations
ShimTile = tile(0, 0)
ComputeTile2 = tile(0, 2)
assert vector_size % 4 == 0
line_size = vector_size // 4

# Set up a circuit-switched flow from core to shim for tracing information
if trace_size > 0:
flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1)
inout_type = ((vector_size,), np.uint8)
fifo_memref_type = ((line_size,), np.uint8)

# AIE-array data movement with object fifos
of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty)
of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty)
of0 = MyObjectFifo(2, memref_type=fifo_memref_type, name="out")
of1 = MyObjectFifo(2, memref_type=fifo_memref_type, name="in")

# Set up compute tiles
passthrough_fn = BinKernel(
"passThroughLine",
"passThrough.cc.o",
[fifo_memref_type, fifo_memref_type, np.int32],
)

# Compute tile 2
@core(ComputeTile2, "passThrough.cc.o")
def core_body():
for _ in for_(sys.maxsize):
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
elemIn = of_in.acquire(ObjectFifoPort.Consume, 1)
call(passThroughLine, [elemIn, elemOut, lineWidthInBytes])
of_in.release(ObjectFifoPort.Consume, 1)
of_out.release(ObjectFifoPort.Produce, 1)
yield_([])

# print(ctx.module.operation.verify())
def core_fn(ofs_end1, ofs_end2, external_functions):
of_out = ofs_end1[0]
of_in = ofs_end2[0]
passThroughLine = external_functions[0]

tensor_ty = T.memref(N, T.ui8())
for _ in range_(vector_size // line_size):
elemOut = of_out.acquire_produce(1)
elemIn = of_in.acquire_consume(1)
passThroughLine(elemIn, elemOut, line_size)
of_in.release_consume(1)
of_out.release_produce(1)
yield_([])

@runtime_sequence(tensor_ty, tensor_ty, tensor_ty)
def sequence(inTensor, outTensor, notUsed):
if trace_size > 0:
trace_utils.configure_simple_tracing_aie2(
ComputeTile2,
ShimTile,
ddr_id=1,
size=trace_size,
offset=N,
)

npu_dma_memcpy_nd(
metadata="in",
bd_id=0,
mem=inTensor,
sizes=[1, 1, 1, N],
)
npu_dma_memcpy_nd(
metadata="out",
bd_id=1,
mem=outTensor,
sizes=[1, 1, 1, N],
)
npu_sync(column=0, row=0, direction=0, channel=0)
worker_program = MyWorker(core_fn, [of0], [of1], [passthrough_fn], coords=(0, 2))
inout_program = SimpleFifoInOutProgram(of0, vector_size, of1, vector_size)


try:
vector_size = int(sys.argv[1])
if vector_size % 64 != 0 or vector_size < 512:
print("Vector size must be a multiple of 64 and greater than or equal to 512")
raise ValueError
trace_size = 0 if (len(sys.argv) != 3) else int(sys.argv[2])
except ValueError:
print("Argument has inappropriate value")
with mlir_mod_ctx() as ctx:
passthroughKernel(vector_size, trace_size)
print(ctx.module)
my_program = MyProgram(
NPU1Col1(), worker_programs=[worker_program], inout_program=inout_program
)
my_program.resolve_program()
74 changes: 0 additions & 74 deletions programming_examples/basic/passthrough_kernel/iron_objs.py

This file was deleted.

0 comments on commit 602c5b1

Please sign in to comment.