Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#0: SFPU UT param sweep: #14422

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 132 additions & 18 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_golden_impls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,13 @@
//
// SPDX-License-Identifier: Apache-2.0

#include <algorithm>


#include "test_golden_impls.hpp"
#include "common/test_tiles.hpp"
#include "common/bfloat16.hpp"
#include "tt_metal/host_api.hpp"
#include "tt_metal/detail/tt_metal.hpp"
#include "tests/tt_metal/test_utils/packing.hpp"

using std::vector;

namespace unit_tests::compute {

std::vector<uint32_t> gold_standard_untilize(const std::vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> gold_standard_untilize(const vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> dst_vec;

int num_rows = config.num_tiles_r_dim * config.face_r_dim * (config.num_faces > 2 ? 2: 1);
Expand Down Expand Up @@ -74,7 +66,7 @@ std::vector<uint32_t> gold_standard_untilize(const std::vector<uint32_t> &src_ve
return dst_vec;
}

std::vector<uint32_t> gold_standard_tilize(const std::vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> gold_standard_tilize(const vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> dst_vec;

//TODO: RT update this one to use variable tile sizes
Expand Down Expand Up @@ -116,7 +108,7 @@ std::vector<uint32_t> gold_standard_tilize(const std::vector<uint32_t> &src_vec,
// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape) {
vector<uint16_t> gold_transpose_wh(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape) {
vector<uint32_t> shapeT{shape[0], shape[1], shape[3], shape[2]};
TensAddr addr(shape);
TensAddr addrt(shapeT);
Expand All @@ -138,7 +130,7 @@ std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, co
// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint16_t> gold_reduce_h(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], 1, shape[3]};
TT_FATAL(shape[2] > 0, "Error");
if (zeropad)
Expand Down Expand Up @@ -167,7 +159,7 @@ std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const
return reduced;
};

std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], shape[2], 1};
if (zeropad)
shape_dst[3] = 32;
Expand All @@ -194,7 +186,7 @@ std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::
return reduced;
}

std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint16_t> gold_reduce_hw(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], 1, 1};
if (zeropad) {
shape_dst[2] = 32;
Expand Down Expand Up @@ -225,12 +217,12 @@ std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const
return reduced;
}

std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t> &src0_vec, const std::vector<uint32_t> &src1_vec, const GoldenConfig &config) {
vector<uint32_t> gold_standard_tilize_w_elwadd(const vector<uint32_t> &src0_vec, const vector<uint32_t> &src1_vec, const GoldenConfig &config) {

std::vector<bfloat16> unpacked_tilize_src0_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(gold_standard_tilize(src0_vec, config));
std::vector<bfloat16> unpacked_src1_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(src1_vec);
vector<bfloat16> unpacked_tilize_src0_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(gold_standard_tilize(src0_vec, config));
vector<bfloat16> unpacked_src1_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(src1_vec);

std::vector<bfloat16> result_vec(unpacked_tilize_src0_vec.size());
vector<bfloat16> result_vec(unpacked_tilize_src0_vec.size());

std::transform(
unpacked_tilize_src0_vec.begin(),
Expand All @@ -244,5 +236,127 @@ std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t>
return tt::test_utils::pack_vector<uint32_t, bfloat16>(result_vec);
}

// A pointer to the appropriate function for generating random packed vector depending on the data format
using RandomVectorGenerator = std::function<vector<uint32_t>(uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset)>;
// A pointer to the appropriate function for unpacking the vector from the given data format to float vec
using VectorUnpacker = std::function<vector<float>(const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a)>;


vector<uint32_t> generate_random_vector_generalized(
const float lower,
const float upper,
const size_t num_bytes,
const tt::DataFormat data_format,
const int seed,
bool exclude_zeroes,
float golden_neg_epsilon,
float golden_pos_epsilon) {

RandomVectorGenerator vector_generator;

// Select the appropriate vector generator based on the data format
switch (data_format) {
case tt::DataFormat::Float16_b:
vector_generator = [&](uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset) {
return create_random_vector_of_bfloat16(num_bytes, max_float, seed, offset);
};
break;
case tt::DataFormat::Float32:
vector_generator = [&](uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset) {
auto rand_float = std::bind(std::uniform_real_distribution<float>(0, max_float), std::mt19937(seed));
vector<uint32_t> vec(num_bytes/sizeof(uint32_t), 0);
for (int i = 0; i < vec.size(); i++) {
float num_float = rand_float() + offset;
std::memcpy(&vec[i], &num_float, sizeof(float));
}
return vec;
};
break;
case tt::DataFormat::Bfp8_b:
vector_generator = [&](uint32_t num_bytes,bool is_exp_a, int max_float, int seed, float offset) {
return create_random_vector_of_bfp8(num_bytes, is_exp_a, max_float, seed, offset);
};
break;
case tt::DataFormat::Bfp4_b:
vector_generator = [&](uint32_t num_bytes,bool is_exp_a, int max_float, int seed, float offset) {
return create_random_vector_of_bfp4(num_bytes, is_exp_a, max_float, seed, offset);
};
break;
default:
TT_THROW("Unsupported DataFormat!");
return {};
}

if (exclude_zeroes) {
if (lower < 0 && upper > 0) {
vector<uint32_t> vec;

// Split into negative and positive parts, avoiding zero
auto negative_part = vector_generator(
num_bytes / 2,
false,
std::abs(lower - golden_neg_epsilon),
seed,
lower);
auto positive_part = vector_generator(
num_bytes - num_bytes / 2,
false,
upper - golden_pos_epsilon,
seed + 1, // Use a different seed for the positive part
golden_pos_epsilon);

// Combine both parts
vec.insert(vec.end(), negative_part.begin(), negative_part.end());
vec.insert(vec.end(), positive_part.begin(), positive_part.end());
return vec;
} else {
TT_THROW("Cannot create a vector without zeroes with selected input value range!");
}
} else {
// Use the generic generator for the entire range
return vector_generator(num_bytes, false, upper - lower, seed, lower);
}
}

vector<float> unpack_generalized(const tt::DataFormat data_format, const vector<uint32_t>& packed_input) {
VectorUnpacker unpacker_function;

// Select the appropriate vector generator based on the data format
switch (data_format) {
case tt::DataFormat::Float16_b:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
vector<bfloat16> vec = unpack_uint32_vec_into_bfloat16_vec(packed_input);
vector<float> vec_float(vec.size());
for (int i = 0; i < vec.size(); i++) {
vec_float[i] = vec[i].to_float();
}
return vec_float;
};
break;
case tt::DataFormat::Float32:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
vector<float> vec(packed_input.size(), 0);
for (int i = 0; i < packed_input.size(); i++) {
std::memcpy(&vec[i], &packed_input[i], sizeof(uint32_t));
}
return vec;
};
break;
case tt::DataFormat::Bfp8_b:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
return unpack_bfp8_tiles_into_float_vec(packed_input, row_major_output, is_exp_a);
};
break;
case tt::DataFormat::Bfp4_b:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
return unpack_bfp4_tiles_into_float_vec(packed_input, row_major_output, is_exp_a);
};
break;
default:
TT_THROW("Unsupported DataFormat!");
return {};
}
return unpacker_function(packed_input, true, false);
}

} // unit_tests::compute
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
#include <limits>
#include <random>
#include <vector>
#include <algorithm>

#include "common/test_tiles.hpp"
#include "tt_metal/common/bfloat16.hpp"
#include "tt_metal/common/bfloat8.hpp"
#include "tt_metal/common/bfloat4.hpp"
#include "tt_metal/host_api.hpp"
#include "tt_metal/detail/tt_metal.hpp"
#include "tests/tt_metal/test_utils/packing.hpp"

//TODO: RT these functions should be templated for different data formats
namespace unit_tests::compute {
Expand Down Expand Up @@ -54,4 +63,31 @@ std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const
// Assumes all elements in bfloat16
std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t> &src0_vec, const std::vector<uint32_t> &src1_vec, const GoldenConfig &config);

// Random packed uint32_t vector generator which is data-format agnostic.
// Takes the following parameters:
//
// lower - a lower limit of the input range
// upper - an upper limit of the input range
// num_bytes - number of bytes that the vector will occupy
// data_format - data format of each element, packed to uint32_t, currently supporting Bfloat16, Float32, Bfp8_b and Bfp4_b
// seed - randomization seed
// exclude_zeroes - if true, excludes values around zero, with the limits given by next two parameters
// golden_neg_epsilon - small negative value above which no elements of the vector will take value from
// golden_pos_epsilon - small positive value below which no elements of the vector will take value from
//
// Returns:
//
// a uint32_t vector of packed values depending on the data format and given limits
std::vector<uint32_t> generate_random_vector_generalized(const float lower, const float upper, const size_t num_bytes, const tt::DataFormat data_format, const int seed, bool exclude_zeroes = false, float golden_neg_epsilon = -0.0001f, float golden_pos_epsilon = 0.0001f);

// Unpacking function which is data-format agnostic
// Takes the following parameters:
//
// data_format - data format in which the vector was packed, currently supporting Bfloat16, Float32, Bfp8_b and Bfp4_b
// packed_input - a uint32_t packed vector
//
// Returns:
// a float vector of unpacked values depending on the data format
std::vector<float> unpack_generalized(const tt::DataFormat data_format, const std::vector<uint32_t>& packed_input);

} // unit_tests::compute
Loading
Loading