Skip to content

Commit

Permalink
Some fun with types, working on getting matrix vector working still
Browse files Browse the repository at this point in the history
  • Loading branch information
hunhoffe committed Sep 23, 2024
1 parent 6cc7ccb commit 1c22e32
Show file tree
Hide file tree
Showing 22 changed files with 133 additions and 142 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from aie.extras.dialects.ext.scf import _for as range_
from aie.dialects.aiex import npu_dma_memcpy_nd, npu_sync

from aie.api.dataflow.inout.inout import MyInOutProgram
from aie.api.dataflow.inout.inout import MyInOutSequence
from aie.api.dataflow.objectfifo import MyObjectFifo
from aie.api.dataflow.objectfifolink import MyObjectFifoLink
from aie.api.kernels.binkernel import BinKernel
Expand Down Expand Up @@ -46,9 +46,9 @@
dtype_out_str = "i32"

# Input/output tensor definitions # TODO: can simplify if single value?
inA_ty = np.ndarray(dtype_in, (M * K))
inB_ty = np.ndarray(dtype_in, (K,))
outC_ty = np.ndarray(dtype_out, (M,))
inA_ty = np.ndarray[dtype_in, (M * K,)]
inB_ty = np.ndarray[dtype_in, (K,)]
outC_ty = np.ndarray[dtype_out, (M,)]
a_ty = np.ndarray[dtype_in, (m, k)]
a_flat_ty = np.ndarray[dtype_in, (m * k,)]
b_ty = np.ndarray[dtype_in, (k,)]
Expand All @@ -59,7 +59,7 @@
zero = BinKernel(f"zero_{scalar_str}{dtype_out_str}", f"mv_{m}x{k}.o", [c_ty])
matvec = BinKernel(
f"matvec_{scalar_str}{dtype_in_str}_{dtype_out_str}",
f"mm_{m}x{k}x{n}.o",
f"mv_{m}x{k}.o",
[a_ty, b_ty, c_ty],
)

Expand Down Expand Up @@ -88,13 +88,13 @@ def core_body(a_in, b_in, c_out, zero, matvec):


# Setup workers + per-worker dataflow
inB_fifo = MyObjectFifo(2, b_ty, name="inB", end_first=(1, 0))
inB_fifo = MyObjectFifo(2, b_ty, name="inB", shim_endpoint=(1, 0))
for i in range(n_cores):
# Create object fifos for per-code dataflow
memA = MyObjectFifo(2, a_flat_ty, name=f"memA{i}", end_first=(i, 0))
toStreamA = [(k // 2 // 2, 2), (m, k), (2, 1)] if vectorized else []
inA = MyObjectFifo(2, a_ty, name=f"inA{i}", toStream=toStreamA)
outC = MyObjectFifo(2, c_ty, end_second=(i, 0))
memA = MyObjectFifo(2, a_flat_ty, name=f"memA{i}", shim_endpoint=(i, 0))
dimensionsToStreamA = [(k // 2 // 2, 2), (m, k), (2, 1)] if vectorized else []
inA = MyObjectFifo(2, a_ty, name=f"inA{i}", dimensionsToStream=dimensionsToStreamA)
outC = MyObjectFifo(2, c_ty, shim_endpoint=(i, 0))

# Create per-core worker program
worker_programs.append(
Expand Down Expand Up @@ -147,7 +147,7 @@ def sequence_fn(A, B, C, memA, inB, memC):
npu_sync(column=i, row=0, direction=0, channel=0)


inout_program = MyInOutProgram(
inout_sequence = MyInOutSequence(
sequence_fn,
[inA_ty, inB_ty, outC_ty],
[memA_fifos, inB_fifo, outC_fifos],
Expand All @@ -157,7 +157,7 @@ def sequence_fn(A, B, C, memA, inB, memC):
NPU1Col4(),
worker_programs=worker_programs,
links=A_links,
inout_program=inout_program,
inout_sequence=inout_sequence,
)

my_program.resolve_program()
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from aie.extras.dialects.ext.scf import _for as range_
from aie.dialects.aiex import npu_dma_memcpy_nd, npu_sync

from aie.api.dataflow.inout.inout import MyInOutProgram
from aie.api.dataflow.inout.inout import MyInOutSequence
from aie.api.dataflow.objectfifo import MyObjectFifo
from aie.api.dataflow.objectfifolink import MyObjectFifoLink
from aie.api.kernels.binkernel import BinKernel
Expand Down Expand Up @@ -112,26 +112,24 @@ def my_matmul(M, K, N, m, k, n, dtype_in_str, dtype_out_str, vectorized):
[a_ty, b_ty, c_ty],
)

inA = MyObjectFifo(2, a_ty)
inA = MyObjectFifo(2, a_ty, shim_endpoint=(0, 0))
memAToStream = [(m // r, r * k), (k // s, s), (r, k), (s, 1)] if vectorized else []
memA = MyObjectFifo(2, a_ty, dimensionsToStream=memAToStream)
inALink = MyObjectFifoLink([inA.second], [memA.first], coords=(0, 1)) # AnyMemtile
inALink = MyObjectFifoLink([inA.second], [memA.first], coords=(0, 1))

# Input B
inB = MyObjectFifo(2, b_ty)
inB = MyObjectFifo(2, b_ty, shim_endpoint=(0, 0))
memBToStream = [(k // s, s * n), (n // t, t), (s, n), (t, 1)] if vectorized else []
memB = MyObjectFifo(2, b_ty, dimensionsToStream=memBToStream)
inBLink = MyObjectFifoLink([inB.second], [memB.first], coords=(0, 1)) # AnyMemtile
inBLink = MyObjectFifoLink([inB.second], [memB.first], coords=(0, 1))

# Output C
memC = MyObjectFifo(2, c_ty)
memCToStream = (
[(m // r, r * n), (r, t), (n // t, r * t), (t, 1)] if vectorized else []
)
outC = MyObjectFifo(2, c_ty, dimensionsToStream=memCToStream)
outCLink = MyObjectFifoLink(
[memC.second], [outC.first], coords=(0, 1)
) # AnyMemtile
outC = MyObjectFifo(2, c_ty, dimensionsToStream=memCToStream, shim_endpoint=(0, 0))
outCLink = MyObjectFifoLink([memC.second], [outC.first], coords=(0, 1))

def core_fn(a, b, c, zero, matmul):
for _ in range_(0xFFFFFFFF):
Expand Down Expand Up @@ -199,28 +197,25 @@ def sequence_fn(A, B, C, inA, inB, outC):
npu_sync(column=0, row=0, direction=0, channel=0)
npu_sync(column=0, row=0, direction=0, channel=0)

inout_program = MyInOutProgram(
inout_sequence = MyInOutSequence(
sequence_fn,
[A_ty, B_ty, C_ty],
[inA.first, inB.first, outC.second],
coords=(0, 0), # AnyShim
)

worker_program = MyWorker(
core_fn,
[memA.second, memB.second, memC.first, zero, matmul],
coords=(0, 2), # AnyCore
coords=(0, 2),
)

my_program = MyProgram(
NPU1Col1(),
worker_programs=[worker_program],
links=[inALink, inBLink, outCLink],
inout_program=inout_program,
# placer=SequentialPlacer(pack=True)
inout_sequence=inout_sequence,
)

# g = my_program.get_dataflow_graph()
my_program.resolve_program()


Expand Down
22 changes: 8 additions & 14 deletions programming_examples/basic/matrix_scalar_add/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from aie.extras.dialects.ext.arith import constant
from aie.extras.dialects.ext.func import func
from aie.extras.dialects.ext.scf import _for as range_
from aie.api.dataflow.inout.simplefifoinout import SimpleFifoInOutProgram
from aie.api.dataflow.inout.simplefifoinout import SimpleFifoInOutSequence
from aie.api.dataflow.objectfifo import MyObjectFifo
from aie.api.phys.device import NPU1Col1, XCVC1902
from aie.api.program import MyProgram
Expand Down Expand Up @@ -42,12 +42,13 @@
else:
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))

col = int(sys.argv[2])
my_dtype = np.int32
tile_ty = np.ndarray[my_dtype, (TILE_SIZE,)]

# AIE-array data movement with object fifos
of_in = MyObjectFifo(objfifo_capacity, tile_ty)
of_out = MyObjectFifo(objfifo_capacity, tile_ty)
of_in = MyObjectFifo(objfifo_capacity, tile_ty, shim_endpoint=(col, 0))
of_out = MyObjectFifo(objfifo_capacity, tile_ty, shim_endpoint=(col, 0))


@func
Expand All @@ -66,15 +67,15 @@ def core_fn(of_in, of_out, add_kernel):
of_out.release(1)


# Set up compute tile 2 TODO: clean up placement
# Set up worker
worker_program = MyWorker(
core_fn,
[of_in.second, of_out.first, add_kernel],
coords=(int(sys.argv[2]), 2),
coords=(col, 2),
)

# To/from AIE-array data movement
inout_program = SimpleFifoInOutProgram(
inout_sequence = SimpleFifoInOutSequence(
of_in.first,
TILE_SIZE,
of_out.second,
Expand All @@ -84,16 +85,9 @@ def core_fn(of_in, of_out, add_kernel):
out_sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
out_strides=[1, 1, IMAGE_WIDTH, 1],
dtype=my_dtype,
coords=(int(sys.argv[2]), 0),
)

my_program = MyProgram(
dev, worker_programs=[worker_program], inout_program=inout_program
dev, worker_programs=[worker_program], inout_sequence=inout_sequence
)
my_program.resolve_program()

"""
TODOs:
* look into # @canonicalize(using=scf_canonicalizer) shoudl decorate this after func if we want control flow
* we need emit = true because must be emited in outer loop (not deferred) to have access to symbol table
"""
4 changes: 2 additions & 2 deletions python/api/dataflow/inout/inout.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def __init__(
):
self.sequence_fn = sequence_fn
self.inout_types = inout_types
self.fifos = fifos
self.fifos = fifos.copy()

def get_fifos(self) -> list[ObjectFifoHandle]:
return self.fifos
return self.fifos.copy()

def resolve(
self,
Expand Down
9 changes: 4 additions & 5 deletions python/api/dataflow/inout/simplefifoinout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""

import numpy as np
from typing import Optional

from .... import ir
from ....dialects.aiex import runtime_sequence, npu_sync, npu_dma_memcpy_nd
Expand All @@ -21,10 +20,10 @@ def __init__(
bytes_in: int,
fifo_out: ObjectFifoHandle,
bytes_out: int,
in_sizes: Optional[list[int]] = None,
in_strides: Optional[list[int]] = None,
out_sizes: Optional[list[int]] = None,
out_strides: Optional[list[int]] = None,
in_sizes: list[int] | None = None,
in_strides: list[int] | None = None,
out_sizes: list[int] | None = None,
out_strides: list[int] | None = None,
dtype: np.generic = np.uint8,
):
assert bytes_in % np.prod(get_np_ndarray_type_shape(fifo_in.obj_type)) == 0
Expand Down
5 changes: 2 additions & 3 deletions python/api/dataflow/objectfifo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

# Address circular dependency between MyObjectFifo and ObjectFifoHandle
from __future__ import annotations
from typing import Optional
import numpy as np

from ... import ir
Expand Down Expand Up @@ -39,7 +38,7 @@ def __init__(
end2: MyObjectFifoEndpoint = None,
dimensionsToStream=None, # TODO(erika): needs a type
dimensionsFromStreamPerConsumer=None, # TODO(erika): needs a type
shim_endpoint: Optional[tuple[int, int]] = None,
shim_endpoint: tuple[int, int] | None = None,
):
self.__depth = depth
self.__obj_type = obj_type
Expand All @@ -52,7 +51,7 @@ def __init__(
self.name = f"myof{MyObjectFifo.__get_index()}"
else:
self.name = name
self.__op: Optional[ObjectFifoCreateOp] = None
self.__op: ObjectFifoCreateOp | None = None
self.__first: ObjectFifoHandle = ObjectFifoHandle(self, True)
self.__second: ObjectFifoHandle = ObjectFifoHandle(self, False)
if shim_endpoint:
Expand Down
29 changes: 16 additions & 13 deletions python/api/dataflow/objectfifolink.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
from typing import Optional
from collections.abc import Sequence

from ... import ir
from ...dialects._aie_ops_gen import ObjectFifoLinkOp
Expand All @@ -8,31 +8,34 @@
from ..phys.tile import MyTile
from .endpoint import MyObjectFifoEndpoint
from .objectfifo import ObjectFifoHandle
from ...extras.util import single_elem_or_list_to_list


class MyObjectFifoLink(MyObjectFifoEndpoint):
def __init__(
self,
seconds: list[ObjectFifoHandle] = [],
firsts: list[ObjectFifoHandle] = [],
coords: Optional[tuple[int, int]] = None,
seconds: Sequence[ObjectFifoHandle] | ObjectFifoHandle = [],
firsts: Sequence[ObjectFifoHandle] | ObjectFifoHandle = [],
coords: tuple[int, int] | None = None,
):
column, row = coords
self.__tile = MyTile(column, row)

self.__seconds = []
self.__firsts = []
self.__seconds = single_elem_or_list_to_list(seconds)
self.__firsts = single_elem_or_list_to_list(firsts)
self.__op = None

self.__obj_type = seconds[0].obj_type
for s in seconds:
assert s.obj_type == self.__obj_type
s.set_endpoint(self)
self.__seconds.append(s)
for f in firsts:
assert len(self.__firsts) > 0
assert len(self.__seconds) > 0

self.__obj_type = self.__seconds[0].obj_type
for f in self.__firsts:
# TODO: need to check size not exactness
assert f.obj_type == self.__obj_type
f.set_endpoint(self)
self.__firsts.append(f)
for s in self.__seconds:
assert s.obj_type == self.__obj_type
s.set_endpoint(self)

@property
def tile(self) -> MyTile:
Expand Down
6 changes: 3 additions & 3 deletions python/api/kernels/binkernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""

import numpy as np
from typing import get_origin, Optional, Union
from typing import get_origin

from ... import ir

Expand All @@ -22,13 +22,13 @@ def __init__(
name: str,
bin_name: str,
inout_types: list[
Union[np.ndarray[np.generic.dtype, np.generic.shape], np.dtype]
np.ndarray[np.generic.dtype, np.generic.shape] | np.dtype
] = [],
) -> None:
self.__name = name
self.__bin_name = bin_name
self.__inout_types = inout_types
self.__op: Optional[FuncOp] = None
self.__op: FuncOp | None = None

@property
def bin_name(self) -> str:
Expand Down
4 changes: 1 addition & 3 deletions python/api/phys/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
}
"""

from typing import Optional

from ... import ir
from ...dialects.aie import AIEDevice, tile, TileOp
from ..resolvable import Resolvable
Expand All @@ -35,7 +33,7 @@ class __MyDeviceTile(Resolvable):
def __init__(self, col: int, row: int) -> None:
self.__col: int = col
self.__row: int = row
self.__op: Optional[TileOp] = None
self.__op: TileOp | None = None
super().__init__()

def resolve(
Expand Down
3 changes: 1 addition & 2 deletions python/api/phys/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
* tile types"
"""

from typing import Optional
from ...dialects.aie import TileOp


class MyTile:
def __init__(self, col: int, row: int) -> None:
self.col: int = col
self.row: int = row
self.__op: Optional[TileOp] = None
self.__op: TileOp | None = None

@property
def op(self) -> TileOp:
Expand Down
Loading

0 comments on commit 1c22e32

Please sign in to comment.