Skip to content

Commit

Permalink
#0: SFPU UT param sweep:
Browse files Browse the repository at this point in the history
- in_range [-7.0f, 7.0f]
- fp_32_dest_accum_en
- shape - from 1x1x1x1 up to 1x1x16x16
- num_tiles - from 1 uo to 256
- data_formats - Bfp4_b, Bfp8_b, Float16_b, Fp32
  • Loading branch information
ncvetkovicTT committed Nov 1, 2024
1 parent a0589f1 commit 6bc2a31
Show file tree
Hide file tree
Showing 3 changed files with 358 additions and 337 deletions.
150 changes: 132 additions & 18 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_golden_impls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,13 @@
//
// SPDX-License-Identifier: Apache-2.0

#include <algorithm>


#include "test_golden_impls.hpp"
#include "common/test_tiles.hpp"
#include "common/bfloat16.hpp"
#include "tt_metal/host_api.hpp"
#include "tt_metal/detail/tt_metal.hpp"
#include "tests/tt_metal/test_utils/packing.hpp"

using std::vector;

namespace unit_tests::compute {

std::vector<uint32_t> gold_standard_untilize(const std::vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> gold_standard_untilize(const vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> dst_vec;

int num_rows = config.num_tiles_r_dim * config.face_r_dim * (config.num_faces > 2 ? 2: 1);
Expand Down Expand Up @@ -74,7 +66,7 @@ std::vector<uint32_t> gold_standard_untilize(const std::vector<uint32_t> &src_ve
return dst_vec;
}

std::vector<uint32_t> gold_standard_tilize(const std::vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> gold_standard_tilize(const vector<uint32_t> &src_vec, const GoldenConfig &config) {
vector<uint32_t> dst_vec;

//TODO: RT update this one to use variable tile sizes
Expand Down Expand Up @@ -116,7 +108,7 @@ std::vector<uint32_t> gold_standard_tilize(const std::vector<uint32_t> &src_vec,
// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape) {
vector<uint16_t> gold_transpose_wh(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape) {
vector<uint32_t> shapeT{shape[0], shape[1], shape[3], shape[2]};
TensAddr addr(shape);
TensAddr addrt(shapeT);
Expand All @@ -138,7 +130,7 @@ std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, co
// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint16_t> gold_reduce_h(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], 1, shape[3]};
TT_FATAL(shape[2] > 0, "Error");
if (zeropad)
Expand Down Expand Up @@ -167,7 +159,7 @@ std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const
return reduced;
};

std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], shape[2], 1};
if (zeropad)
shape_dst[3] = 32;
Expand All @@ -194,7 +186,7 @@ std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::
return reduced;
}

std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint16_t> gold_reduce_hw(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], 1, 1};
if (zeropad) {
shape_dst[2] = 32;
Expand Down Expand Up @@ -225,12 +217,12 @@ std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const
return reduced;
}

std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t> &src0_vec, const std::vector<uint32_t> &src1_vec, const GoldenConfig &config) {
vector<uint32_t> gold_standard_tilize_w_elwadd(const vector<uint32_t> &src0_vec, const vector<uint32_t> &src1_vec, const GoldenConfig &config) {

std::vector<bfloat16> unpacked_tilize_src0_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(gold_standard_tilize(src0_vec, config));
std::vector<bfloat16> unpacked_src1_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(src1_vec);
vector<bfloat16> unpacked_tilize_src0_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(gold_standard_tilize(src0_vec, config));
vector<bfloat16> unpacked_src1_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(src1_vec);

std::vector<bfloat16> result_vec(unpacked_tilize_src0_vec.size());
vector<bfloat16> result_vec(unpacked_tilize_src0_vec.size());

std::transform(
unpacked_tilize_src0_vec.begin(),
Expand All @@ -244,5 +236,127 @@ std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t>
return tt::test_utils::pack_vector<uint32_t, bfloat16>(result_vec);
}

// A pointer to the appropriate function for generating random packed vector depending on the data format
using RandomVectorGenerator = std::function<vector<uint32_t>(uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset)>;
// A pointer to the appropriate function for unpacking the vector from the given data format to float vec
using VectorUnpacker = std::function<vector<float>(const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a)>;


vector<uint32_t> generate_random_vector_generalized(
const float lower,
const float upper,
const size_t num_bytes,
const tt::DataFormat data_format,
const int seed,
bool exclude_zeroes,
float golden_neg_epsilon,
float golden_pos_epsilon) {

RandomVectorGenerator vector_generator;

// Select the appropriate vector generator based on the data format
switch (data_format) {
case tt::DataFormat::Float16_b:
vector_generator = [&](uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset) {
return create_random_vector_of_bfloat16(num_bytes, max_float, seed, offset);
};
break;
case tt::DataFormat::Float32:
vector_generator = [&](uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset) {
auto rand_float = std::bind(std::uniform_real_distribution<float>(0, max_float), std::mt19937(seed));
vector<uint32_t> vec(num_bytes/sizeof(uint32_t), 0);
for (int i = 0; i < vec.size(); i++) {
float num_float = rand_float() + offset;
std::memcpy(&vec[i], &num_float, sizeof(float));
}
return vec;
};
break;
case tt::DataFormat::Bfp8_b:
vector_generator = [&](uint32_t num_bytes,bool is_exp_a, int max_float, int seed, float offset) {
return create_random_vector_of_bfp8(num_bytes, is_exp_a, max_float, seed, offset);
};
break;
case tt::DataFormat::Bfp4_b:
vector_generator = [&](uint32_t num_bytes,bool is_exp_a, int max_float, int seed, float offset) {
return create_random_vector_of_bfp4(num_bytes, is_exp_a, max_float, seed, offset);
};
break;
default:
TT_THROW("Unsupported DataFormat!");
return {};
}

if (exclude_zeroes) {
if (lower < 0 && upper > 0) {
vector<uint32_t> vec;

// Split into negative and positive parts, avoiding zero
auto negative_part = vector_generator(
num_bytes / 2,
false,
std::abs(lower - golden_neg_epsilon),
seed,
lower);
auto positive_part = vector_generator(
num_bytes - num_bytes / 2,
false,
upper - golden_pos_epsilon,
seed + 1, // Use a different seed for the positive part
golden_pos_epsilon);

// Combine both parts
vec.insert(vec.end(), negative_part.begin(), negative_part.end());
vec.insert(vec.end(), positive_part.begin(), positive_part.end());
return vec;
} else {
TT_THROW("Cannot create a vector without zeroes with selected input value range!");
}
} else {
// Use the generic generator for the entire range
return vector_generator(num_bytes, false, upper - lower, seed, lower);
}
}

vector<float> unpack_generalized(const tt::DataFormat data_format, const vector<uint32_t>& packed_input) {
VectorUnpacker unpacker_function;

// Select the appropriate vector generator based on the data format
switch (data_format) {
case tt::DataFormat::Float16_b:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
vector<bfloat16> vec = unpack_uint32_vec_into_bfloat16_vec(packed_input);
vector<float> vec_float(vec.size());
for (int i = 0; i < vec.size(); i++) {
vec_float[i] = vec[i].to_float();
}
return vec_float;
};
break;
case tt::DataFormat::Float32:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
vector<float> vec(packed_input.size(), 0);
for (int i = 0; i < packed_input.size(); i++) {
std::memcpy(&vec[i], &packed_input[i], sizeof(uint32_t));
}
return vec;
};
break;
case tt::DataFormat::Bfp8_b:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
return unpack_bfp8_tiles_into_float_vec(packed_input, row_major_output, is_exp_a);
};
break;
case tt::DataFormat::Bfp4_b:
unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
return unpack_bfp4_tiles_into_float_vec(packed_input, row_major_output, is_exp_a);
};
break;
default:
TT_THROW("Unsupported DataFormat!");
return {};
}
return unpacker_function(packed_input, true, false);
}

} // unit_tests::compute
36 changes: 36 additions & 0 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_golden_impls.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
#include <limits>
#include <random>
#include <vector>
#include <algorithm>

#include "common/test_tiles.hpp"
#include "tt_metal/common/bfloat16.hpp"
#include "tt_metal/common/bfloat8.hpp"
#include "tt_metal/common/bfloat4.hpp"
#include "tt_metal/host_api.hpp"
#include "tt_metal/detail/tt_metal.hpp"
#include "tests/tt_metal/test_utils/packing.hpp"

//TODO: RT these functions should be templated for different data formats
namespace unit_tests::compute {
Expand Down Expand Up @@ -54,4 +63,31 @@ std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const
// Assumes all elements in bfloat16
std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t> &src0_vec, const std::vector<uint32_t> &src1_vec, const GoldenConfig &config);

// Random packed uint32_t vector generator which is data-format agnostic.
// Takes the following parameters:
//
// lower - a lower limit of the input range
// upper - an upper limit of the input range
// num_bytes - number of bytes that the vector will occupy
// data_format - data format of each element, packed to uint32_t, currently supporting Bfloat16, Float32, Bfp8_b and Bfp4_b
// seed - randomization seed
// exclude_zeroes - if true, excludes values around zero, with the limits given by next two parameters
// golden_neg_epsilon - small negative value above which no elements of the vector will take value from
// golden_pos_epsilon - small positive value below which no elements of the vector will take value from
//
// Returns:
//
// a uint32_t vector of packed values depending on the data format and given limits
std::vector<uint32_t> generate_random_vector_generalized(const float lower, const float upper, const size_t num_bytes, const tt::DataFormat data_format, const int seed, bool exclude_zeroes = false, float golden_neg_epsilon = -0.0001f, float golden_pos_epsilon = 0.0001f);

// Unpacking function which is data-format agnostic
// Takes the following parameters:
//
// data_format - data format in which the vector was packed, currently supporting Bfloat16, Float32, Bfp8_b and Bfp4_b
// packed_input - a uint32_t packed vector
//
// Returns:
// a float vector of unpacked values depending on the data format
std::vector<float> unpack_generalized(const tt::DataFormat data_format, const std::vector<uint32_t>& packed_input);

} // unit_tests::compute
Loading

0 comments on commit 6bc2a31

Please sign in to comment.