Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gauss Jordan Matrix Inversion #80

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
03a8625
[software] Add Moore Penrose inversion kernel
mbertuletti Jul 13, 2022
c56a055
[software] add singular value decomposition kernel
mbertuletti Jul 13, 2022
77becf1
[software] Clean Gauss Jordan inverse function
mbertuletti Jul 14, 2022
1577906
[software] Allow use of input N by M rectangular matrices
mbertuletti Jul 18, 2022
21ded46
[software] Parallelize Gauss-Jordan matrix inversion
mbertuletti Jul 19, 2022
faeca50
[software] Unroll single core
mbertuletti Jul 19, 2022
9728023
[software] Unroll parallel core
mbertuletti Jul 19, 2022
ee0119c
[software] Clean comments on single-core
mbertuletti Jul 21, 2022
b412c87
[software] Change kernel name
mbertuletti Jul 21, 2022
9138024
[software] Add different parallelization schemes
mbertuletti Jul 27, 2022
3aad7fd
[software] Shape memory accesses to mempool
mbertuletti Jul 27, 2022
a045b42
[software] Add folded kernel
mbertuletti Aug 2, 2022
4dca2cf
[software] Let single core handle exchange in parallel implementation
mbertuletti Aug 3, 2022
0ca5b68
[software] Add code for unrolling in single-core
mbertuletti Aug 3, 2022
b42e968
[software] Add parallelization schemes in memory shaped version
mbertuletti Aug 3, 2022
82f8f51
[software] Merge the two final steps of matrix inversion
mbertuletti Aug 4, 2022
8acd260
[software] Correct lint errors
mbertuletti Sep 27, 2022
ae56dc4
[software] Erase SVD folder
mbertuletti Oct 31, 2022
0fbf978
[software] Fix reading of the number of cores
mbertuletti Dec 14, 2022
4c42194
[software] Move the kernels to runtime/kernels folder
mbertuletti Apr 13, 2023
cc31b71
[software] Clean up
mbertuletti May 26, 2023
c04dea3
Update changelog
mbertuletti May 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Use custom compiler for VCS specified with `CC` and `CCX` environment variable
- Implement operand gating for SIMD and MAC Units in Snitch IPU's DSP Unit
- Add Channel Estimation application and kernels
- Add Gauss-Jordan matrix inversion kernel

### Fixed
- Fix type issue in `snitch_addr_demux`
Expand Down
95 changes: 95 additions & 0 deletions software/apps/mat_inv/initialization.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

// Author: Marco Bertuletti, ETH Zurich

#define FIXED_POINT 16
#define FIX_DIV(a, b) ((int32_t)((a << FIXED_POINT) / b))
#define FIX_MUL(a, b) ((int32_t)((a * b) >> FIXED_POINT))
#define MIN(a, b) (a < b ? a : b)

dump(l, 1);
dump(loopCnt, 2);
dump(i, 3);

void display(int32_t *A, int32_t n, int32_t m);

#ifdef FOLDED
void display_folded(int32_t *A, int32_t n, int32_t m);
#endif

void Transpose(int32_t *matrix, int32_t *t_matrix, int32_t n, int32_t m);

void MatrixMult(int32_t *matrix_1, int32_t *matrix_2, int32_t *matrix_product,
int32_t n, int32_t m, int32_t o);

void init_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns,
int32_t a, int32_t b, int32_t c, uint32_t core_id);

void init_matrix_zeros(int32_t *matrix, uint32_t num_rows, uint32_t num_columns,
uint32_t core_id);

void display(int32_t *A, int32_t n, int32_t m) {
int32_t i;
for (i = 0; i < n * m; i++) {
printf("Output[%d] = %8d\n", i, A[i]);
}
}

#ifdef FOLDED
void display_folded(int32_t *A, int32_t n, int32_t m) {
int32_t i, j, k, shift;
for (i = 0; i < n * m; i++) {
k = i / n;
j = i % n;
shift = N_BANKS * ((k * n) / N_USED_BANKS) + (k * n) % N_USED_BANKS;
printf("Output[%d] = %8d\n", i, A[shift + j]);
}
}
#endif

void Transpose(int32_t *matrix, int32_t *t_matrix, int32_t n, int32_t m) {
int32_t i, j;
for (i = 0; i < n; i++) {
for (j = 0; j < m; j++) {
t_matrix[j * n + i] = matrix[i * m + j];
}
}
}

void MatrixMult(int32_t *matrix_1, int32_t *matrix_2, int32_t *matrix_product,
int32_t n, int32_t m, int32_t o) {
int32_t i, j, k;
for (i = 0; i < n; i++) {
for (j = 0; j < o; j++) {
matrix_product[i * o + j] = 0;
for (k = 0; k < m; k++) {
matrix_product[i * o + j] +=
FIX_MUL(matrix_1[i * m + k], matrix_2[k * o + j]);
}
}
}
}
Comment on lines +52 to +73
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those are unused, right? Do we need them somewhere else?


void init_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns,
int32_t a, int32_t b, int32_t c, uint32_t core_id) {
if (core_id == 0) {
for (uint32_t j = 0; j < num_rows; j++) {
for (uint32_t i = 0; i < num_columns; i++) {
matrix[j * num_columns + i] = a * (int32_t)i + b * (int32_t)j + c;
}
}
}
}

void init_matrix_zeros(int32_t *matrix, uint32_t num_rows, uint32_t num_columns,
uint32_t core_id) {
if (core_id == 0) {
for (uint32_t i = 0; i < num_columns; i++) {
for (uint32_t j = 0; j < num_rows; j++) {
matrix[j * num_columns + i] = 0;
}
}
}
}
105 changes: 105 additions & 0 deletions software/apps/mat_inv/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

// Author: Marco Bertuletti, ETH Zurich

#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"

#define N 16
#define M 16
#define N_BANKS (1024)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the number of banks in the system? Because for this, we have a globally set define by now.

#define N_USED_BANKS (16)

#define VERBOSE
#define SINGLE
// #define PARALLEL
// #define FOLDED

#include "initialization.h"
#include "kernel/mempool_mat_inv_q32p.h"
#include "kernel/mempool_mat_inv_q32s.h"

#ifdef FOLDED
int32_t matrix[N * M] __attribute__((aligned(N_BANKS), section(".l1")));
int32_t folded_matrix[N_BANKS * ((N * M) / N_USED_BANKS)]
__attribute__((aligned(N_BANKS), section(".l1")));
int32_t inv[N_BANKS * ((N * M) / N_USED_BANKS)]
__attribute__((aligned(N_BANKS), section(".l1")));
uint32_t flag __attribute__((section(".l1")));
#else
int32_t matrix[N * M] __attribute__((aligned(N), section(".l1")));
int32_t inv[M * M] __attribute__((aligned(N), section(".l1")));
uint32_t flag __attribute__((section(".l1")));
#endif

int main() {

uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
// Initialize barrier and synchronize
mempool_barrier_init(core_id);

/* initialize the data */
#if defined(SINGLE) || defined(PARALLEL)
init_matrix(matrix, N, M, -156, 427, -219, core_id);
init_matrix_zeros(inv, M, M, core_id);
if (core_id == 0) {
flag = 0U;
}
mempool_barrier(num_cores);

#elif defined(FOLDED)
uint32_t nPE = N_USED_BANKS >> 2U;
init_matrix(matrix, N, M, -156, 427, -219, core_id);
init_matrix_zeros(folded_matrix, ((N * M) / N_USED_BANKS), N_BANKS, core_id);
init_matrix_zeros(inv, ((N * M) / N_USED_BANKS), N_BANKS, core_id);
if (core_id == 0) {
flag = 0U;
}
mempool_barrier(num_cores);

#endif

/* Execute the kernel */
#if defined(SINGLE)
if (core_id == 0) {
mempool_start_benchmark();
mempool_GJinv_q32s(matrix, inv, M);
mempool_stop_benchmark();
}
mempool_barrier(num_cores);

#elif defined(PARALLEL)
if (core_id < MIN(NUM_CORES, N / 4)) {
mempool_start_benchmark();
mempool_GJinv_q32p(matrix, inv, M, &flag);
mempool_stop_benchmark();
}
mempool_barrier(num_cores);

#elif defined(FOLDED)
mempool_start_benchmark();
fold_matrix(matrix, folded_matrix, N);
mempool_stop_benchmark();
if (core_id < nPE) {
mempool_start_benchmark();
mempool_GJinv_folded_q32p(folded_matrix, inv, M, &flag, nPE);
mempool_stop_benchmark();
}
mempool_barrier(num_cores);

#endif

/* Display the result of computation */
#ifdef VERBOSE
if (core_id == 0)
display(inv, M, N);
mempool_barrier(num_cores);
#endif

return 0;
}
Loading