Skip to content

Commit

Permalink
#0: Faster builds by enabling Unity build for TTNN and tests (#14461)
Browse files Browse the repository at this point in the history
* #0: Unity builds

* #0: Unity build for tests

* #0: More unity builds

* #0: Cleanup

* #0: Rename ANON_NAMESPACE to CMAKE_UNIQUE_NAMESPACE

* #0: Allow to disable unity builds

* #0: Disable unity builds if export commands is on

* #0: Raise min cmake version to 3.20

* #0: CMake fixes

* #0: Review fixes - cmake cleanup

* #0: Disable unity builds on older cmake

* #0: Build script fixup

* #0: Build fix
  • Loading branch information
sminakov-tt authored Oct 30, 2024
1 parent 5e590a3 commit 948fafb
Show file tree
Hide file tree
Showing 114 changed files with 495 additions and 295 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ endif()
list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

include(project_options)
include(unity)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down Expand Up @@ -121,6 +122,7 @@ message(STATUS "Build Python bindings: ${WITH_PYTHON_BINDINGS}")
message(STATUS "Build Programming Examples: ${BUILD_PROGRAMMING_EXAMPLES}")
message(STATUS "Build TT METAL Tests: ${TT_METAL_BUILD_TESTS}")
message(STATUS "Build TTNN Tests: ${TTNN_BUILD_TESTS}")
message(STATUS "Build with Unity builds: ${TT_UNITY_BUILDS}")
############################################################################################################################

if(ENABLE_BUILD_TIME_TRACE)
Expand Down
17 changes: 15 additions & 2 deletions build_metal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ show_help() {
echo " --debug Set the build type as Debug."
echo " --clean Remove build workspaces."
echo " --build-static-libs Build tt_metal (not ttnn) as a static lib (BUILD_SHARED_LIBS=OFF)"
echo " --disable-unity-builds Disable Unity builds"
}

clean() {
Expand All @@ -49,11 +50,12 @@ build_metal_tests="OFF"
build_umd_tests="OFF"
build_programming_examples="OFF"
build_static_libs="OFF"
unity_builds="ON"

declare -a cmake_args

OPTIONS=h,e,c,t,a,m,s,u,b:,p
LONGOPTIONS=help,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-static-libs,release,development,debug,clean
LONGOPTIONS=help,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-static-libs,disable-unity-builds,release,development,debug,clean

# Parse the options
PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTIONS --name "$0" -- "$@")
Expand All @@ -70,7 +72,7 @@ while true; do
-h|--help)
show_help;exit 0;;
-e|--export-compile-commands)
export_compile_commands="ON";;
export_compile_commands="ON";unity_builds="OFF";;
-c|--enable-ccache)
enable_ccache="ON";;
-t|--enable-time-trace)
Expand Down Expand Up @@ -101,6 +103,8 @@ while true; do
build_programming_examples="ON";;
--build-static-libs)
build_static_libs="ON";;
--disable-unity-builds)
unity_builds="OFF";;
--release)
build_type="Release";;
--development)
Expand Down Expand Up @@ -156,6 +160,7 @@ echo "INFO: Enable UndefinedBehaviorSanitizer: $enable_ubsan"
echo "INFO: Build directory: $build_dir"
echo "INFO: Install Prefix: $cmake_install_prefix"
echo "INFO: Build tests: $build_tests"
echo "INFO: Enable Unity builds: $unity_builds"

# Prepare cmake arguments
cmake_args+=("-B" "$build_dir")
Expand Down Expand Up @@ -194,6 +199,8 @@ fi

if [ "$export_compile_commands" = "ON" ]; then
cmake_args+=("-DCMAKE_EXPORT_COMPILE_COMMANDS=ON")
else
cmake_args+=("-DCMAKE_EXPORT_COMPILE_COMMANDS=OFF")
fi

if [ "$build_tests" = "ON" ]; then
Expand Down Expand Up @@ -222,6 +229,12 @@ if [ "$build_static_libs" = "ON" ]; then
cmake_args+=("-DBUILD_SHARED_LIBS=OFF")
fi

if [ "$unity_builds" = "ON" ]; then
cmake_args+=("-DTT_UNITY_BUILDS=ON")
else
cmake_args+=("-DTT_UNITY_BUILDS=OFF")
fi

# Create and link the build directory
mkdir -p $build_dir
ln -nsf $build_dir build
Expand Down
1 change: 1 addition & 0 deletions cmake/helper_functions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ function(CREATE_EAGER_TEST_EXE TESTLIST)
set(TEST_TARGET ${TEST_NAME})
endif()
add_executable(${TEST_TARGET} ${TEST_SRC_PATH})
TT_ENABLE_UNITY_BUILD(${TEST_TARGET})

target_link_libraries(
${TEST_TARGET}
Expand Down
12 changes: 12 additions & 0 deletions cmake/project_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,16 @@ option(BUILD_PROGRAMMING_EXAMPLES "Enables build of tt_metal programming example
option(TT_METAL_BUILD_TESTS "Enables build of tt_metal tests" OFF)
option(TTNN_BUILD_TESTS "Enables build of ttnn tests" OFF)
option(ENABLE_CCACHE "Build with compiler cache" FALSE)
option(TT_UNITY_BUILDS "Build with Unity builds" ON)
###########################################################################################

if(TT_UNITY_BUILDS)
if(CMAKE_EXPORT_COMPILE_COMMANDS)
message(STATUS "Disabling Unity builds because CMAKE_EXPORT_COMPILE_COMMANDS is ON")
set(TT_UNITY_BUILDS OFF)
endif()
if(CMAKE_VERSION VERSION_LESS "3.20.0")
message(STATUS "CMake 3.20 or newer is required for Unity builds, disabling")
set(TT_UNITY_BUILDS OFF)
endif()
endif()
12 changes: 12 additions & 0 deletions cmake/unity.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
function(TT_ENABLE_UNITY_BUILD TARGET)
if(TT_UNITY_BUILDS)
set_target_properties(
${TARGET}
PROPERTIES
UNITY_BUILD
ON
UNITY_BUILD_UNIQUE_ID
"CMAKE_UNIQUE_NAMESPACE"
)
endif()
endfunction()
47 changes: 0 additions & 47 deletions tests/tt_metal/test_utils/df/bfloat16.hpp

This file was deleted.

1 change: 0 additions & 1 deletion tests/tt_metal/test_utils/df/df.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include "tt_metal/test_utils/df/bfloat16.hpp"
#include "tt_metal/test_utils/df/float32.hpp"
1 change: 1 addition & 0 deletions tests/tt_metal/tt_metal/unit_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ add_executable(
${UNIT_TESTS_SRC}
$<TARGET_OBJECTS:unit_tests_common_o>
)
TT_ENABLE_UNITY_BUILD(unit_tests)
add_executable(unit_tests_galaxy ${CMAKE_CURRENT_SOURCE_DIR}/multichip/galaxy_cluster_api.cpp)

target_link_libraries(
Expand Down
4 changes: 2 additions & 2 deletions tests/tt_metal/tt_metal/unit_tests/buffer/test_banked.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@ bool reader_datacopy_writer(Device* device, const BankedConfig& cfg) {
////////////////////////////////////////////////////////////////////////////
// Stimulus Generation
////////////////////////////////////////////////////////////////////////////
std::vector<uint32_t> input_packed = tt::test_utils::generate_packed_uniform_random_vector<uint32_t, tt::test_utils::df::bfloat16>(
-1.0f, 1.0f, cfg.size_bytes / tt::test_utils::df::bfloat16::SIZEOF, std::chrono::system_clock::now().time_since_epoch().count());
std::vector<uint32_t> input_packed = tt::test_utils::generate_packed_uniform_random_vector<uint32_t, bfloat16>(
-1.0f, 1.0f, cfg.size_bytes / bfloat16::SIZEOF, std::chrono::system_clock::now().time_since_epoch().count());

////////////////////////////////////////////////////////////////////////////
// Compile and Execute Appli cation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "gtest/gtest.h"
#include "tt_metal/host_api.hpp"
#include "tt_metal/test_utils/env_vars.hpp"
Expand Down
34 changes: 17 additions & 17 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_broadcast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct BroadcastConfig {
MathFidelity math_fidelity = MathFidelity::HiFi4;
};

void mask_src_b_for_broadcast(std::vector<tt::test_utils::df::bfloat16>& tile, const std::vector<uint32_t> &shape, BroadcastDim dim) {
void mask_src_b_for_broadcast(std::vector<bfloat16>& tile, const std::vector<uint32_t> &shape, BroadcastDim dim) {
int num_rows = shape.at(0);
int num_cols = shape.at(1);

Expand All @@ -83,14 +83,14 @@ void mask_src_b_for_broadcast(std::vector<tt::test_utils::df::bfloat16>& tile, c
}
}

std::vector<tt::test_utils::df::bfloat16> gold_broadcast(std::vector<tt::test_utils::df::bfloat16>& src_a, std::vector<tt::test_utils::df::bfloat16>& src_b, const std::vector<uint32_t> &shape, EltwiseOp op, BroadcastDim dim, MathFidelity math_fidelity = MathFidelity::HiFi4) {
std::vector<bfloat16> gold_broadcast(std::vector<bfloat16>& src_a, std::vector<bfloat16>& src_b, const std::vector<uint32_t> &shape, EltwiseOp op, BroadcastDim dim, MathFidelity math_fidelity = MathFidelity::HiFi4) {
int num_rows = shape.at(0);
int num_cols = shape.at(1);

uint16_t srca_fid_mask = 0xFFFF;
uint16_t srcb_fid_mask = 0xFFFF;

std::vector<tt::test_utils::df::bfloat16> golden(num_cols * num_rows);
std::vector<bfloat16> golden(num_cols * num_rows);
auto arch = get_arch_from_string(get_umd_arch_name());

switch (math_fidelity) {
Expand All @@ -103,7 +103,7 @@ std::vector<tt::test_utils::df::bfloat16> gold_broadcast(std::vector<tt::test_ut

for (int i = 0; i < num_rows; i++) {
for (int j = 0; j < num_cols; j++) {
tt::test_utils::df::bfloat16 broadcast_value;
bfloat16 broadcast_value;
switch (dim)
{
case BroadcastDim::ROW: { broadcast_value = src_b[j]; break; }
Expand All @@ -118,8 +118,8 @@ std::vector<tt::test_utils::df::bfloat16> gold_broadcast(std::vector<tt::test_ut
case EltwiseOp::SUB: { golden[i * num_cols + j] = src_a[i * num_cols + j].to_float() - broadcast_value.to_float(); break; }
case EltwiseOp::MUL: {
golden[i * num_cols + j] =
tt::test_utils::df::bfloat16(std::bit_cast<uint32_t>(src_a[i * num_cols + j].to_packed() & srca_fid_mask)).to_float() *
tt::test_utils::df::bfloat16(std::bit_cast<uint32_t>(broadcast_value.to_packed() & srcb_fid_mask)).to_float();
bfloat16(std::bit_cast<uint32_t>(src_a[i * num_cols + j].to_packed() & srca_fid_mask)).to_float() *
bfloat16(std::bit_cast<uint32_t>(broadcast_value.to_packed() & srcb_fid_mask)).to_float();
break;
}
default: { TT_THROW("Unsupported EltwiseOp={}", op); break; }
Expand All @@ -142,7 +142,7 @@ void run_single_core_broadcast(tt_metal::Device* device, const BroadcastConfig&
constexpr uint32_t tile_width = 32;
constexpr uint32_t tile_height = 32;

constexpr uint32_t single_tile_size = tile_width * tile_height * tt::test_utils::df::bfloat16::SIZEOF;
constexpr uint32_t single_tile_size = tile_width * tile_height * bfloat16::SIZEOF;

tt_metal::InterleavedBufferConfig dram_config{
.device=device,
Expand Down Expand Up @@ -244,25 +244,25 @@ void run_single_core_broadcast(tt_metal::Device* device, const BroadcastConfig&
(uint32_t)1,
});

std::vector<tt::test_utils::df::bfloat16> input0 = generate_uniform_random_vector<tt::test_utils::df::bfloat16>(
std::vector<bfloat16> input0 = generate_uniform_random_vector<bfloat16>(
-1.0f,
1.0f,
single_tile_size / tt::test_utils::df::bfloat16::SIZEOF,
single_tile_size / bfloat16::SIZEOF,
std::chrono::system_clock::now().time_since_epoch().count());

std::vector<tt::test_utils::df::bfloat16> input1 = generate_uniform_random_vector<tt::test_utils::df::bfloat16>(
std::vector<bfloat16> input1 = generate_uniform_random_vector<bfloat16>(
-1.0f,
1.0f,
single_tile_size / tt::test_utils::df::bfloat16::SIZEOF,
single_tile_size / bfloat16::SIZEOF,
std::chrono::system_clock::now().time_since_epoch().count());

mask_src_b_for_broadcast(input1, {tile_width, tile_height}, test_config.broadcast_dim);

std::vector<tt::test_utils::df::bfloat16> golden = gold_broadcast(input0, input1, {tile_width, tile_height}, test_config.eltwise_op, test_config.broadcast_dim, test_config.math_fidelity);
std::vector<bfloat16> golden = gold_broadcast(input0, input1, {tile_width, tile_height}, test_config.eltwise_op, test_config.broadcast_dim, test_config.math_fidelity);

auto packed_input0 = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(input0);
auto packed_input1 = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(input1);
auto packed_golden = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(golden);
auto packed_input0 = pack_vector<uint32_t, bfloat16>(input0);
auto packed_input1 = pack_vector<uint32_t, bfloat16>(input1);
auto packed_golden = pack_vector<uint32_t, bfloat16>(golden);
unit_tests::compute::GoldenConfig config = {
.num_tiles_r_dim = tile_width/32,
.num_tiles_c_dim = tile_height/32
Expand All @@ -279,10 +279,10 @@ void run_single_core_broadcast(tt_metal::Device* device, const BroadcastConfig&
tt_metal::detail::ReadFromBuffer(dst_dram_buffer, dest_buffer_data);
auto dest_buffer_data_untilized = unit_tests::compute::gold_standard_untilize(dest_buffer_data, config);

bool result = is_close_packed_vectors<tt::test_utils::df::bfloat16, uint32_t>(
bool result = is_close_packed_vectors<bfloat16, uint32_t>(
dest_buffer_data_untilized,
packed_golden,
[&](const tt::test_utils::df::bfloat16& a, const tt::test_utils::df::bfloat16& b) {
[&](const bfloat16& a, const bfloat16& b) {
return is_close(a, b, 0.0155);
});
ASSERT_TRUE(result);
Expand Down
20 changes: 10 additions & 10 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_cumsum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ struct CumsumConfig {
bool rowwise;
};

std::vector<tt::test_utils::df::bfloat16> gold_cumsum(std::vector<tt::test_utils::df::bfloat16>& src, const std::vector<uint32_t> &shape, bool rowwise) {
std::vector<bfloat16> gold_cumsum(std::vector<bfloat16>& src, const std::vector<uint32_t> &shape, bool rowwise) {
int N = shape.at(0);
int W = shape.at(1);
int H = shape.at(2);

std::vector<tt::test_utils::df::bfloat16> golden(N * W * H);
std::vector<bfloat16> golden(N * W * H);

int dim_a = rowwise ? H : W;
int dim_b = rowwise ? W : H;
Expand Down Expand Up @@ -57,7 +57,7 @@ void run_single_core_cumsum(tt_metal::Device* device, const CumsumConfig& test_c
constexpr uint32_t tile_width = 32;
constexpr uint32_t tile_height = 32;

constexpr uint32_t single_tile_size = tile_width * tile_height * tt::test_utils::df::bfloat16::SIZEOF;
constexpr uint32_t single_tile_size = tile_width * tile_height * bfloat16::SIZEOF;

uint32_t W = test_config.Wt * tile_width;
uint32_t H = test_config.Ht * tile_height;
Expand Down Expand Up @@ -147,16 +147,16 @@ void run_single_core_cumsum(tt_metal::Device* device, const CumsumConfig& test_c
(uint32_t)test_config.Ht * test_config.Wt // Used for transposing kernel
});

std::vector<tt::test_utils::df::bfloat16> input = generate_uniform_random_vector<tt::test_utils::df::bfloat16>(
std::vector<bfloat16> input = generate_uniform_random_vector<bfloat16>(
-1.0f,
1.0f,
dram_buffer_size / tt::test_utils::df::bfloat16::SIZEOF,
dram_buffer_size / bfloat16::SIZEOF,
std::chrono::system_clock::now().time_since_epoch().count());

std::vector<tt::test_utils::df::bfloat16> golden = gold_cumsum(input, {test_config.N, W, H}, test_config.rowwise);
auto golden_packed = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(golden);
std::vector<bfloat16> golden = gold_cumsum(input, {test_config.N, W, H}, test_config.rowwise);
auto golden_packed = pack_vector<uint32_t, bfloat16>(golden);

auto input_packed = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(input);
auto input_packed = pack_vector<uint32_t, bfloat16>(input);
auto input_packed_tilized = unit_tests::compute::gold_standard_tilize(input_packed, {test_config.N * test_config.Ht, test_config.Wt});

tt_metal::detail::WriteToBuffer(src_dram_buffer, input_packed_tilized);
Expand All @@ -169,10 +169,10 @@ void run_single_core_cumsum(tt_metal::Device* device, const CumsumConfig& test_c

log_info(tt::LogTest, "Running test for N = {}, Wt = {}, Ht = {}", test_config.N, test_config.Wt, test_config.Ht);

bool result = is_close_packed_vectors<tt::test_utils::df::bfloat16, uint32_t>(
bool result = is_close_packed_vectors<bfloat16, uint32_t>(
output_packed,
golden_packed,
[&](const tt::test_utils::df::bfloat16& a, const tt::test_utils::df::bfloat16& b) {
[&](const bfloat16& a, const bfloat16& b) {
return is_close(a, b, 0.01f);
});
ASSERT_TRUE(result);
Expand Down
Loading

0 comments on commit 948fafb

Please sign in to comment.