tenstorrent · ncvetkovicTT · Oct 29, 2024 · Nov 1, 2024
@@ -2,21 +2,13 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include <algorithm>
-
-
 #include "test_golden_impls.hpp"
-#include "common/test_tiles.hpp"
-#include "common/bfloat16.hpp"
-#include "tt_metal/host_api.hpp"
-#include "tt_metal/detail/tt_metal.hpp"
-#include "tests/tt_metal/test_utils/packing.hpp"
 
 using std::vector;
 
 namespace unit_tests::compute {
 
-std::vector<uint32_t> gold_standard_untilize(const std::vector<uint32_t> &src_vec, const GoldenConfig &config) {
+vector<uint32_t> gold_standard_untilize(const vector<uint32_t> &src_vec, const GoldenConfig &config) {
     vector<uint32_t> dst_vec;
 
     int num_rows = config.num_tiles_r_dim * config.face_r_dim * (config.num_faces > 2 ? 2: 1);
@@ -74,7 +66,7 @@ std::vector<uint32_t> gold_standard_untilize(const std::vector<uint32_t> &src_ve
     return dst_vec;
 }
 
-std::vector<uint32_t> gold_standard_tilize(const std::vector<uint32_t> &src_vec, const GoldenConfig &config) {
+vector<uint32_t> gold_standard_tilize(const vector<uint32_t> &src_vec, const GoldenConfig &config) {
     vector<uint32_t> dst_vec;
 
     //TODO: RT update this one to use variable tile sizes
@@ -116,7 +108,7 @@ std::vector<uint32_t> gold_standard_tilize(const std::vector<uint32_t> &src_vec,
 // input shape.x is assumed to have the full number of elements in bfloat16
 // src_vec is expected to be untilized
 // result is also untilized
-std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape) {
+vector<uint16_t> gold_transpose_wh(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape) {
     vector<uint32_t> shapeT{shape[0], shape[1], shape[3], shape[2]};
     TensAddr addr(shape);
     TensAddr addrt(shapeT);
@@ -138,7 +130,7 @@ std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, co
 // input shape.x is assumed to have the full number of elements in bfloat16
 // src_vec is expected to be untilized
 // result is also untilized
-std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
+vector<uint16_t> gold_reduce_h(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
     vector<uint32_t> shape_dst{shape[0], shape[1], 1, shape[3]};
     TT_FATAL(shape[2] > 0, "Error");
     if (zeropad)
@@ -167,7 +159,7 @@ std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const
     return reduced;
 };
 
-std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
+vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
     vector<uint32_t> shape_dst{shape[0], shape[1], shape[2], 1};
     if (zeropad)
         shape_dst[3] = 32;
@@ -194,7 +186,7 @@ std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::
     return reduced;
 }
 
-std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
+vector<uint16_t> gold_reduce_hw(const vector<uint16_t> &src_vec, const vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
     vector<uint32_t> shape_dst{shape[0], shape[1], 1, 1};
     if (zeropad) {
         shape_dst[2] = 32;
@@ -225,12 +217,12 @@ std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const
     return reduced;
 }
 
-std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t> &src0_vec, const std::vector<uint32_t> &src1_vec, const GoldenConfig &config) {
+vector<uint32_t> gold_standard_tilize_w_elwadd(const vector<uint32_t> &src0_vec, const vector<uint32_t> &src1_vec, const GoldenConfig &config) {
 
-    std::vector<bfloat16> unpacked_tilize_src0_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(gold_standard_tilize(src0_vec, config));
-    std::vector<bfloat16> unpacked_src1_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(src1_vec);
+    vector<bfloat16> unpacked_tilize_src0_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(gold_standard_tilize(src0_vec, config));
+    vector<bfloat16> unpacked_src1_vec = tt::test_utils::unpack_vector<bfloat16, uint32_t>(src1_vec);
 
-    std::vector<bfloat16> result_vec(unpacked_tilize_src0_vec.size());
+    vector<bfloat16> result_vec(unpacked_tilize_src0_vec.size());
 
     std::transform(
         unpacked_tilize_src0_vec.begin(),
@@ -244,5 +236,127 @@ std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t>
     return tt::test_utils::pack_vector<uint32_t, bfloat16>(result_vec);
 }
 
+// A pointer to the appropriate function for generating random packed vector depending on the data format
+using RandomVectorGenerator = std::function<vector<uint32_t>(uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset)>;
+// A pointer to the appropriate function for unpacking the vector from the given data format to float vec
+using VectorUnpacker = std::function<vector<float>(const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a)>;
+
+
+vector<uint32_t> generate_random_vector_generalized(
+    const float lower,
+    const float upper,
+    const size_t num_bytes,
+    const tt::DataFormat data_format,
+    const int seed,
+    bool exclude_zeroes,
+    float golden_neg_epsilon,
+    float golden_pos_epsilon) {
+
+    RandomVectorGenerator vector_generator;
+
+    // Select the appropriate vector generator based on the data format
+    switch (data_format) {
+        case tt::DataFormat::Float16_b:
+            vector_generator = [&](uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset) {
+                return create_random_vector_of_bfloat16(num_bytes, max_float, seed, offset);
+            };
+            break;
+        case tt::DataFormat::Float32:
+            vector_generator = [&](uint32_t num_bytes, bool is_exp_a, int max_float, int seed, float offset) {
+                auto rand_float = std::bind(std::uniform_real_distribution<float>(0, max_float), std::mt19937(seed));
+                vector<uint32_t> vec(num_bytes/sizeof(uint32_t), 0);
+                for (int i = 0; i < vec.size(); i++) {
+                    float num_float = rand_float() + offset;
+                    std::memcpy(&vec[i], &num_float, sizeof(float));
+                }
+                return vec;
+            };
+            break;
+        case tt::DataFormat::Bfp8_b:
+            vector_generator = [&](uint32_t num_bytes,bool is_exp_a, int max_float, int seed, float offset) {
+                return create_random_vector_of_bfp8(num_bytes, is_exp_a, max_float, seed, offset);
+            };
+            break;
+        case tt::DataFormat::Bfp4_b:
+            vector_generator = [&](uint32_t num_bytes,bool is_exp_a, int max_float, int seed, float offset) {
+                return create_random_vector_of_bfp4(num_bytes, is_exp_a, max_float, seed, offset);
+            };
+            break;
+        default:
+            TT_THROW("Unsupported DataFormat!");
+            return {};
+    }
+
+    if (exclude_zeroes) {
+        if (lower < 0 && upper > 0) {
+            vector<uint32_t> vec;
+
+            // Split into negative and positive parts, avoiding zero
+            auto negative_part = vector_generator(
+                num_bytes / 2,
+                false,
+                std::abs(lower - golden_neg_epsilon),
+                seed,
+                lower);
+            auto positive_part = vector_generator(
+                num_bytes - num_bytes / 2,
+                false,
+                upper - golden_pos_epsilon,
+                seed + 1, // Use a different seed for the positive part
+                golden_pos_epsilon);
+
+            // Combine both parts
+            vec.insert(vec.end(), negative_part.begin(), negative_part.end());
+            vec.insert(vec.end(), positive_part.begin(), positive_part.end());
+            return vec;
+        } else {
+            TT_THROW("Cannot create a vector without zeroes with selected input value range!");
+        }
+    } else {
+        // Use the generic generator for the entire range
+        return vector_generator(num_bytes, false, upper - lower, seed, lower);
+    }
+}
+
+vector<float> unpack_generalized(const tt::DataFormat data_format, const vector<uint32_t>& packed_input) {
+    VectorUnpacker unpacker_function;
+
+    // Select the appropriate vector generator based on the data format
+    switch (data_format) {
+        case tt::DataFormat::Float16_b:
+            unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
+                vector<bfloat16> vec = unpack_uint32_vec_into_bfloat16_vec(packed_input);
+                vector<float> vec_float(vec.size());
+                for (int i = 0; i < vec.size(); i++) {
+                    vec_float[i] = vec[i].to_float();
+                }
+                return vec_float;
+            };
+            break;
+        case tt::DataFormat::Float32:
+            unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
+                vector<float> vec(packed_input.size(), 0);
+                for (int i = 0; i < packed_input.size(); i++) {
+                    std::memcpy(&vec[i], &packed_input[i], sizeof(uint32_t));
+                }
+                return vec;
+            };
+            break;
+        case tt::DataFormat::Bfp8_b:
+            unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
+                return unpack_bfp8_tiles_into_float_vec(packed_input, row_major_output, is_exp_a);
+            };
+            break;
+        case tt::DataFormat::Bfp4_b:
+            unpacker_function = [&](const vector<uint32_t> &packed_input, bool row_major_output, bool is_exp_a) {
+                return unpack_bfp4_tiles_into_float_vec(packed_input, row_major_output, is_exp_a);
+            };
+            break;
+        default:
+            TT_THROW("Unsupported DataFormat!");
+            return {};
+    }
+    return unpacker_function(packed_input, true, false);
+}
 
 }   // unit_tests::compute
@@ -9,6 +9,15 @@
 #include <limits>
 #include <random>
 #include <vector>
+#include <algorithm>
+
+#include "common/test_tiles.hpp"
+#include "tt_metal/common/bfloat16.hpp"
+#include "tt_metal/common/bfloat8.hpp"
+#include "tt_metal/common/bfloat4.hpp"
+#include "tt_metal/host_api.hpp"
+#include "tt_metal/detail/tt_metal.hpp"
+#include "tests/tt_metal/test_utils/packing.hpp"
 
 //TODO: RT these functions should be templated for different data formats
 namespace unit_tests::compute {
@@ -54,4 +63,31 @@ std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const
 // Assumes all elements in bfloat16
 std::vector<uint32_t> gold_standard_tilize_w_elwadd(const std::vector<uint32_t> &src0_vec, const std::vector<uint32_t> &src1_vec, const GoldenConfig &config);
 
+// Random packed uint32_t vector generator which is data-format agnostic.
+// Takes the following parameters:
+//
+// lower - a lower limit of the input range
+// upper - an upper limit of the input range
+// num_bytes - number of bytes that the vector will occupy
+// data_format - data format of each element, packed to uint32_t, currently supporting Bfloat16, Float32, Bfp8_b and Bfp4_b
+// seed - randomization seed
+// exclude_zeroes - if true, excludes values around zero, with the limits given by next two parameters
+// golden_neg_epsilon - small negative value above which no elements of the vector will take value from
+// golden_pos_epsilon - small positive value below which no elements of the vector will take value from
+//
+// Returns:
+//
+// a uint32_t vector of packed values depending on the data format and given limits
+std::vector<uint32_t> generate_random_vector_generalized(const float lower, const float upper, const size_t num_bytes, const tt::DataFormat data_format, const int seed, bool exclude_zeroes = false, float golden_neg_epsilon = -0.0001f, float golden_pos_epsilon = 0.0001f);
+
+// Unpacking function which is data-format agnostic
+// Takes the following parameters:
+//
+// data_format -  data format in which the vector was packed, currently supporting Bfloat16, Float32, Bfp8_b and Bfp4_b
+// packed_input - a uint32_t packed vector
+//
+// Returns:
+// a float vector of unpacked values depending on the data format
+std::vector<float> unpack_generalized(const tt::DataFormat data_format, const std::vector<uint32_t>& packed_input);
+
 }   // unit_tests::compute