From e55907c78a1ce1b0c055032f98e76a32e477e4f8 Mon Sep 17 00:00:00 2001 From: Simon Farre Date: Wed, 3 Jul 2024 10:56:36 -0400 Subject: [PATCH 1/2] Initial AVX512 Support CMake Changes: Added the AVX512F_SUPPORT flag to CMake. Set by checking if system supports AVX512F. If it doesn't it will not build (and therefore run) any of the AVX512 tests Added the target descriptions. Assumed that PKRU support also implies AVX512 support as I could find no information that said otherwise. Added gdb_avx512 test - sets 3 ZMM registers and reads them back in a gdb session of a replay to verify the contents. We place mask registers K0-K7 after ZMM31H internally, because this simplifies the logic that parses the XSAVE area. The target description we provide, takes this into account (and also places them last) so they come after the ZMM registers in the 'g' packet. GDB doesn't care so long as we do what we say in the target desc Changed test harness util.sh to also export TESTNAME variable as we may (I do at least) want to query from the python script what mode we are running in (64-bit/32-bit). See gdb_avx512.py for further info. --- CMakeLists.txt | 35 +++++++++- src/ExtraRegisters.cc | 61 ++++++++++++++-- src/GdbServer.cc | 64 ++++++++++++----- src/GdbServer.h | 5 ++ src/GdbServerConnection.cc | 14 +++- src/GdbServerConnection.h | 10 +-- src/GdbServerRegister.h | 90 ++++++++++++++++++++++++ src/Registers.h | 2 +- src/test/gdb_avx512.c | 54 ++++++++++++++ src/test/gdb_avx512.py | 47 +++++++++++++ src/test/gdb_avx512.run | 2 + src/test/util.sh | 4 ++ src/util.h | 1 + third-party/gdb/32bit-avx512.xml | 22 ++++++ third-party/gdb/64bit-avx512.xml | 97 ++++++++++++++++++++++++++ third-party/gdb/amd64-avx512-linux.xml | 20 ++++++ third-party/gdb/amd64-pkeys-linux.xml | 1 + third-party/gdb/i386-avx512-linux.xml | 19 +++++ third-party/gdb/i386-pkeys-linux.xml | 1 + 19 files changed, 516 insertions(+), 33 deletions(-) create mode 100644 src/test/gdb_avx512.c create mode 100644 src/test/gdb_avx512.py create mode 100644 src/test/gdb_avx512.run create mode 100644 third-party/gdb/32bit-avx512.xml create mode 100644 third-party/gdb/64bit-avx512.xml create mode 100644 third-party/gdb/amd64-avx512-linux.xml create mode 100644 third-party/gdb/i386-avx512-linux.xml diff --git a/CMakeLists.txt b/CMakeLists.txt index 0327d605641..5c471fe9510 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,8 @@ if (SUPPORTS_MACRO_PREFIX_MAP) set(FLAGS_COMMON "${FLAGS_COMMON} -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=") endif() +try_compile(AVX512F_SUPPORT ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src/test/gdb_avx512.c CMAKE_FLAGS -DCOMPILE_DEFINITIONS=-mavx512f) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11") # Define __STDC_LIMIT_MACROS so |#include | works as expected. # Define __STDC_FORMAT_MACROS so |#include | works as expected. @@ -758,11 +760,13 @@ set_source_files_properties(src/exec_stub.c set(RR_GDB_RESOURCES 32bit-avx.xml + 32bit-avx512.xml 32bit-core.xml 32bit-linux.xml 32bit-sse.xml 32bit-pkeys.xml 64bit-avx.xml + 64bit-avx512.xml 64bit-core.xml 64bit-linux.xml 64bit-seg.xml @@ -770,9 +774,11 @@ set(RR_GDB_RESOURCES 64bit-pkeys.xml amd64-pkeys-linux.xml amd64-avx-linux.xml + amd64-avx512-linux.xml amd64-linux.xml i386-pkeys-linux.xml i386-avx-linux.xml + i386-avx512-linux.xml i386-linux.xml aarch64-core.xml aarch64-fpu.xml @@ -1744,6 +1750,10 @@ set(TESTS_WITHOUT_PROGRAM when ) +if(AVX512F_SUPPORT) + set(TESTS_WITHOUT_PROGRAM ${TESTS_WITHOUT_PROGRAM} gdb_avx512) +endif() + if(BUILD_TESTS) # Part of the installable testsuite (test files). if(INSTALL_TESTSUITE) @@ -1818,6 +1828,14 @@ if(BUILD_TESTS) PROPERTIES COMPILE_FLAGS "${RR_TEST_FLAGS} -g -O3") add_dependencies(watchpoint_unaligned2 Generated) + if(AVX512F_SUPPORT) + add_executable(gdb_avx512 src/test/gdb_avx512.c) + post_build_executable(gdb_avx512) + set_target_properties(gdb_avx512 + PROPERTIES COMPILE_FLAGS "${RR_TEST_FLAGS} -g3 -mavx512f") + add_dependencies(gdb_avx512 Generated) + endif() + # Test disabled because it requires libuvc to be built and installed, and a # working USB camera # add_executable(usb src/test/usb.c) @@ -1921,7 +1939,7 @@ if(BUILD_TESTS) bash source_dir/src/test/${test}.run ${testname} -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-no-syscallbuf) endforeach(test) - + # Run 32-bit tests on 64-bit builds. # We copy the test files into '32' subdirectories in the output # directory, so we can set different compile options on them. @@ -1942,6 +1960,11 @@ if(BUILD_TESTS) PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS}") endforeach(test) + if(AVX512F_SUPPORT) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/gdb_avx512.c" "${CMAKE_CURRENT_BINARY_DIR}/32/gdb_avx512.c" COPYONLY) + set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c" PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g3 -mavx512f") + endif() + foreach(test ${BASIC_CPP_TESTS}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/${test}.cc" "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.cc" @@ -1999,6 +2022,16 @@ if(BUILD_TESTS) COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g -O3") add_dependencies(watchpoint_unaligned2_32 Generated) + if(AVX512F_SUPPORT) + add_executable(gdb_avx512_32 "${CMAKE_CURRENT_BINARY_DIR}/32/gdb_avx512.c") + post_build_executable(gdb_avx512_32) + set_target_properties(gdb_avx512_32 + PROPERTIES + LINK_FLAGS "-m32" + COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g3 -mavx512f") + add_dependencies(gdb_avx512_32 Generated) + endif() + add_library(test_lib_32 "${CMAKE_CURRENT_BINARY_DIR}/32/test_lib.c" ) diff --git a/src/ExtraRegisters.cc b/src/ExtraRegisters.cc index a40122eaf71..36ecd2b7779 100644 --- a/src/ExtraRegisters.cc +++ b/src/ExtraRegisters.cc @@ -66,8 +66,11 @@ static bool reg_in_range(GdbServerRegister regno, GdbServerRegister low, GdbServ return true; } -static const int AVX_FEATURE_BIT = 2; -static const int PKRU_FEATURE_BIT = 9; +static constexpr int AVX_FEATURE_BIT = 2; +static constexpr int AVX_OPMASK_FEATURE_BIT = 5; +static constexpr int AVX_ZMM_HI256_FEATURE_BIT = 6; +static constexpr int AVX_ZMM_HI16_FEATURE_BIT = 7; +static constexpr int PKRU_FEATURE_BIT = 9; static const uint64_t PKRU_FEATURE_MASK = 1 << PKRU_FEATURE_BIT; @@ -77,6 +80,48 @@ static const size_t xsave_header_end = xsave_header_offset + xsave_header_size; // This is always at 576 since AVX is always the first optional feature, // if present. static const size_t AVX_xsave_offset = 576; +struct RegisterConfig { + int8_t feature; + GdbServerRegister base; + int8_t size; + int stride; + + int register_offset(GdbServerRegister reg, int base_offset) const noexcept { + const auto& layout = xsave_native_layout(); + return layout.feature_layouts[feature].offset + base_offset + (reg - base) * stride; + } +}; + +static constexpr std::array RegisterConfigLookupTable{ + { { AVX_FEATURE_BIT, DREG_64_YMM0H, 16, 16 }, + { AVX_ZMM_HI16_FEATURE_BIT, DREG_64_XMM16, 16, 64 }, + { AVX_ZMM_HI16_FEATURE_BIT, DREG_64_YMM16H, 16, 64 }, + { AVX_ZMM_HI256_FEATURE_BIT, DREG_64_ZMM0H, 32, 32 }, + { AVX_ZMM_HI16_FEATURE_BIT, DREG_64_ZMM16H, 32, 64 }, + { AVX_OPMASK_FEATURE_BIT, DREG_64_K0, 8, 8 } } +}; + +static constexpr auto YMM16_31 = 0b10; +static constexpr auto ZMM16_31 = 0b100; + +// Every range of registers (except K0-7) are 16 registers long. We use this fact to build +// a lookup table, for the AVX2 and AVX512 registers. +static bool reg_is_avx2_or_512(GdbServerRegister reg, RegData& out) noexcept { + if(reg < DREG_64_YMM0H || reg > DREG_64_K7) { + return false; + } + + const auto selector = (reg - DREG_64_YMM0H) >> 4; + DEBUG_ASSERT(selector >= 0 && selector <= 5 && "GdbServerRegister enum values has been changed."); + const auto cfg = RegisterConfigLookupTable[selector]; + out.xsave_feature_bit = cfg.feature; + out.size = cfg.size; + + // only YMM16-31 and ZMM16-31 have a base offset (16 and 32 respectively) + const auto base_offset = cfg.size * (selector == YMM16_31) | cfg.size * (selector == ZMM16_31); + out.offset = cfg.register_offset(reg, base_offset); + return true; +} // Return the size and data location of register |regno|. // If we can't read the register, returns -1 in 'offset'. @@ -95,6 +140,14 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno) regno = (GdbServerRegister)(regno - DREG_YMM0H + DREG_64_YMM0H); break; } + if(regno >= DREG_ZMM0H && regno <= DREG_ZMM7H) { + regno = (GdbServerRegister)(regno - DREG_ZMM0H + DREG_64_ZMM0H); + break; + } + if(regno >= DREG_K0 && regno <= DREG_K7) { + regno = (GdbServerRegister)(regno - DREG_K0 + DREG_64_K0); + break; + } if (regno == DREG_MXCSR) { regno = DREG_64_MXCSR; } else if (regno == DREG_PKRU) { @@ -123,9 +176,7 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno) return result; } - if (reg_in_range(regno, DREG_64_YMM0H, DREG_64_YMM15H, AVX_xsave_offset, 16, - 16, &result)) { - result.xsave_feature_bit = AVX_FEATURE_BIT; + if(reg_is_avx2_or_512(regno, result)) { return result; } diff --git a/src/GdbServer.cc b/src/GdbServer.cc index f13046da943..67b01409bfe 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -186,26 +186,9 @@ static void maybe_singlestep_for_event(Task* t, GdbRequest* req) { void GdbServer::dispatch_regs_request(const Registers& regs, const ExtraRegisters& extra_regs) { - GdbServerRegister end; - // Send values for all the registers we sent XML register descriptions for. - // Those descriptions are controlled by GdbServerConnection::cpu_features(). - bool have_PKU = dbg->cpu_features() & GdbServerConnection::CPU_PKU; - bool have_AVX = dbg->cpu_features() & GdbServerConnection::CPU_AVX; - switch (regs.arch()) { - case x86: - end = have_PKU ? DREG_PKRU : (have_AVX ? DREG_YMM7H : DREG_ORIG_EAX); - break; - case x86_64: - end = have_PKU ? DREG_64_PKRU : (have_AVX ? DREG_64_YMM15H : DREG_GS_BASE); - break; - case aarch64: - end = DREG_FPCR; - break; - default: - FATAL() << "Unknown architecture"; - return; - } + const GdbServerRegister end = arch_reg_end(regs.arch()); vector rs; + rs.reserve(end); for (GdbServerRegister r = GdbServerRegister(0); r <= end; r = GdbServerRegister(r + 1)) { rs.push_back(get_reg(regs, extra_regs, r)); } @@ -2306,6 +2289,49 @@ void GdbServer::read_back_debugger_mem(DiversionSession& session) { } } +GdbServerRegister GdbServer::arch_reg_end(SupportedArch arch) noexcept { + if(target_regs_end != GdbServerRegister(0)) { + return target_regs_end; + } + + // Send values for all the registers we sent XML register descriptions for. + // Those descriptions are controlled by GdbServerConnection::cpu_features(). + bool have_PKU = dbg->cpu_features() & GdbServerConnection::CPU_PKU; + bool have_AVX = dbg->cpu_features() & GdbServerConnection::CPU_AVX; + bool have_AVX512 = dbg->cpu_features() & GdbServerConnection::CPU_AVX512; + switch (arch) { + case x86: + if(have_PKU) { + target_regs_end = DREG_PKRU; + } else if(have_AVX512) { + target_regs_end = DREG_K7; + } else if(have_AVX) { + target_regs_end = DREG_YMM7H; + } else { + target_regs_end = DREG_ORIG_EAX; + } + break; + case x86_64: + if(have_PKU) { + target_regs_end = DREG_64_PKRU; + } else if(have_AVX512) { + target_regs_end = DREG_64_K7; + } else if(have_AVX) { + target_regs_end = DREG_64_YMM15H; + } else { + target_regs_end = DREG_GS_BASE; + } + break; + case aarch64: + target_regs_end = DREG_FPCR; + break; + default: + FATAL() << "Unknown architecture"; + return target_regs_end; + } + return target_regs_end; +} + bool GdbServer::debugger_mem_region(ThreadGroupUid tguid, remote_ptr addr, int* prot, MemoryRange* mem_range) { auto it = debugger_mem.find(tguid); diff --git a/src/GdbServer.h b/src/GdbServer.h index d639c2787b0..251a944ef49 100644 --- a/src/GdbServer.h +++ b/src/GdbServer.h @@ -223,6 +223,9 @@ class GdbServer { // Read back the contents of all debugger memory regions from the session. void read_back_debugger_mem(DiversionSession& session); + // Get the last GdbServerRegister for "this" arch. If it hasn't be determined, configure it. + GdbServerRegister arch_reg_end(SupportedArch arch) noexcept; + // dbg is never null. std::unique_ptr dbg; // The ThreadGroupUid of the task being debugged. @@ -315,6 +318,8 @@ class GdbServer { ExtraRegisters extra_regs; }; std::unordered_map saved_register_states; + + GdbServerRegister target_regs_end = GdbServerRegister(0); }; } // namespace rr diff --git a/src/GdbServerConnection.cc b/src/GdbServerConnection.cc index ca26e932e09..804b4b559a8 100644 --- a/src/GdbServerConnection.cc +++ b/src/GdbServerConnection.cc @@ -83,10 +83,14 @@ static uint32_t get_cpu_features(SupportedArch arch) { auto cpuid_data = cpuid(CPUID_GETEXTENDEDFEATURES, 0); if ((cpuid_data.ecx & PKU_FEATURE_FLAG) == PKU_FEATURE_FLAG) { // PKU (Skylake) implies AVX (Sandy Bridge). - cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_PKU; + cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_PKU; break; } + if((cpuid_data.ebx & AVX_512_FOUNDATION_FLAG) == AVX_512_FOUNDATION_FLAG) { + cpu_features |= GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_AVX; + } + cpuid_data = cpuid(CPUID_GETFEATURES, 0); // We're assuming here that AVX support on the system making the recording // is the same as the AVX support during replay. But if that's not true, @@ -512,11 +516,15 @@ static const char* target_description_name(uint32_t cpu_features) { return "amd64-linux.xml"; case GdbServerConnection::CPU_AVX: return "i386-avx-linux.xml"; + case GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512: + return "i386-avx512-linux.xml"; case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_AVX: return "amd64-avx-linux.xml"; - case GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX: + case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512: + return "amd64-avx512-linux.xml"; + case GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512: return "i386-pkeys-linux.xml"; - case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX: + case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512: return "amd64-pkeys-linux.xml"; case GdbServerConnection::CPU_AARCH64: return "aarch64-core.xml"; diff --git a/src/GdbServerConnection.h b/src/GdbServerConnection.h index 94a05f9a1c9..5cd51dae6a3 100644 --- a/src/GdbServerConnection.h +++ b/src/GdbServerConnection.h @@ -745,11 +745,13 @@ class GdbServerConnection { const Features& features() { return features_; } enum { - CPU_X86_64 = 0x1, - CPU_AVX = 0x2, - CPU_AARCH64 = 0x4, - CPU_PKU = 0x8 + CPU_X86_64 = 1 << 0, + CPU_AVX = 1 << 1, + CPU_AARCH64 = 1 << 2, + CPU_PKU = 1 << 3, + CPU_AVX512 = 1 << 4 }; + void set_cpu_features(uint32_t features) { cpu_features_ = features; } uint32_t cpu_features() const { return cpu_features_; } diff --git a/src/GdbServerRegister.h b/src/GdbServerRegister.h index ac793152b3e..0234c39349f 100644 --- a/src/GdbServerRegister.h +++ b/src/GdbServerRegister.h @@ -63,6 +63,22 @@ enum GdbServerRegister { DREG_YMM5H, DREG_YMM6H, DREG_YMM7H, + DREG_ZMM0H, + DREG_ZMM1H, + DREG_ZMM2H, + DREG_ZMM3H, + DREG_ZMM4H, + DREG_ZMM5H, + DREG_ZMM6H, + DREG_ZMM7H, + DREG_K0, + DREG_K1, + DREG_K2, + DREG_K3, + DREG_K4, + DREG_K5, + DREG_K6, + DREG_K7, DREG_PKRU, DREG_NUM_LINUX_I386, // Last register we can find in user_regs_struct @@ -153,6 +169,80 @@ enum GdbServerRegister { DREG_64_YMM13H, DREG_64_YMM14H, DREG_64_YMM15H, + DREG_64_XMM16, + DREG_64_XMM17, + DREG_64_XMM18, + DREG_64_XMM19, + DREG_64_XMM20, + DREG_64_XMM21, + DREG_64_XMM22, + DREG_64_XMM23, + DREG_64_XMM24, + DREG_64_XMM25, + DREG_64_XMM26, + DREG_64_XMM27, + DREG_64_XMM28, + DREG_64_XMM29, + DREG_64_XMM30, + DREG_64_XMM31, + DREG_64_YMM16H, + DREG_64_YMM17H, + DREG_64_YMM18H, + DREG_64_YMM19H, + DREG_64_YMM20H, + DREG_64_YMM21H, + DREG_64_YMM22H, + DREG_64_YMM23H, + DREG_64_YMM24H, + DREG_64_YMM25H, + DREG_64_YMM26H, + DREG_64_YMM27H, + DREG_64_YMM28H, + DREG_64_YMM29H, + DREG_64_YMM30H, + DREG_64_YMM31H, + DREG_64_ZMM0H, + DREG_64_ZMM1H, + DREG_64_ZMM2H, + DREG_64_ZMM3H, + DREG_64_ZMM4H, + DREG_64_ZMM5H, + DREG_64_ZMM6H, + DREG_64_ZMM7H, + DREG_64_ZMM8H, + DREG_64_ZMM9H, + DREG_64_ZMM10H, + DREG_64_ZMM11H, + DREG_64_ZMM12H, + DREG_64_ZMM13H, + DREG_64_ZMM14H, + DREG_64_ZMM15H, + DREG_64_ZMM16H, + DREG_64_ZMM17H, + DREG_64_ZMM18H, + DREG_64_ZMM19H, + DREG_64_ZMM20H, + DREG_64_ZMM21H, + DREG_64_ZMM22H, + DREG_64_ZMM23H, + DREG_64_ZMM24H, + DREG_64_ZMM25H, + DREG_64_ZMM26H, + DREG_64_ZMM27H, + DREG_64_ZMM28H, + DREG_64_ZMM29H, + DREG_64_ZMM30H, + DREG_64_ZMM31H, + // We've moved K0..K7 to here, because it simplifies offset calculation for XMM16/YMM16/HiZMM offsets + // target description 64bit-avx512.xml also reflects this + DREG_64_K0, + DREG_64_K1, + DREG_64_K2, + DREG_64_K3, + DREG_64_K4, + DREG_64_K5, + DREG_64_K6, + DREG_64_K7, DREG_64_PKRU, DREG_NUM_LINUX_X86_64, // Last register we can find in user_regs_struct (except for orig_rax). diff --git a/src/Registers.h b/src/Registers.h index e66af7ad2bf..a43be4ac341 100644 --- a/src/Registers.h +++ b/src/Registers.h @@ -53,7 +53,7 @@ const uintptr_t AARCH64_DBG_SPSR_11 = 1 << 11; */ class Registers { public: - enum { MAX_SIZE = 16 }; + enum { MAX_SIZE = 32 }; Registers(SupportedArch a = x86) : arch_(a) { memset(&u, 0, sizeof(u)); diff --git a/src/test/gdb_avx512.c b/src/test/gdb_avx512.c new file mode 100644 index 00000000000..81a0af92cc2 --- /dev/null +++ b/src/test/gdb_avx512.c @@ -0,0 +1,54 @@ +#include + +#if defined(__AVX512F__) + +static int broadcast_to_three_zmm(void) { + asm volatile ( + "vpbroadcastb %0, %%zmm0 \n\t" + : + : "r"(0x1a) + : "zmm0" + ); +#if !defined(__ILP32__) + asm volatile ( + "vpbroadcastb %0, %%zmm16 \n\t" + : + : "r"(0x5b) + : "zmm16" + ); + asm volatile ( + "vpbroadcastb %0, %%zmm30 \n\t" + : + : "r"(0xff) + : "zmm30" + ); +#else // 32-bit only has 0-8 vector registers + asm volatile ( + "vpbroadcastb %0, %%zmm4 \n\t" + : + : "r"(0x5b) + : "zmm4" + ); + asm volatile ( + "vpbroadcastb %0, %%zmm7 \n\t" + : + : "r"(0xff) + : "zmm7" + ); +#endif + return 0; +} + +#else +#error "AVX512 is required" +#endif + +int +main(void) +{ +#ifdef __AVX512F__ + int a = broadcast_to_three_zmm(); + return a; +#endif + return 1; +} diff --git a/src/test/gdb_avx512.py b/src/test/gdb_avx512.py new file mode 100644 index 00000000000..9be954b5ad9 --- /dev/null +++ b/src/test/gdb_avx512.py @@ -0,0 +1,47 @@ +from util import * +import sys +import os + +test = os.getenv("TESTNAME") + +send_gdb('break 39') +expect_gdb('Breakpoint 1') +send_gdb('c') +expect_gdb('Breakpoint 1') + +send_gdb("print/x $xmm0.uint128") +expect_gdb(r"0x1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a") + +send_gdb("print/x $ymm0.v2_int128") +expect_gdb(r"0x1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a,\s+0x1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a") + +send_gdb("print/x $zmm0.v4_int128") +expect_gdb(r"0x1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a,\s+0x1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a,\s+0x1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a,\s+0x1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a") + +regs_per_mode = [] + +if test[-3:] == "_32": + regs_per_mode = [4, 7] +else: + regs_per_mode = [16, 30] + +send_gdb(f"print/x $xmm{regs_per_mode[0]}.uint128") +expect_gdb(r"0x5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b") + +send_gdb(f"print/x $ymm{regs_per_mode[0]}.v2_int128") +expect_gdb(r"0x5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b,\s+0x5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b") + +send_gdb(f"print/x $zmm{regs_per_mode[0]}.v4_int128") +expect_gdb(r"0x5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b,\s+0x5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b,\s+0x5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b,\s+0x5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b5b") + +send_gdb(f"print/x $xmm{regs_per_mode[1]}.uint128") +expect_gdb(r"0xffffffffffffffffffffffffffffffff") + +send_gdb(f"print/x $ymm{regs_per_mode[1]}.v2_int128") +expect_gdb(r"0xffffffffffffffffffffffffffffffff,\s+0xffffffffffffffffffffffffffffffff") + +send_gdb(f"print/x $zmm{regs_per_mode[1]}.v4_int128") +expect_gdb(r"0xffffffffffffffffffffffffffffffff,\s+0xffffffffffffffffffffffffffffffff,\s+0xffffffffffffffffffffffffffffffff,\s+0xffffffffffffffffffffffffffffffff") + +send_gdb('c') +ok() diff --git a/src/test/gdb_avx512.run b/src/test/gdb_avx512.run new file mode 100644 index 00000000000..0e9b81df833 --- /dev/null +++ b/src/test/gdb_avx512.run @@ -0,0 +1,2 @@ +source `dirname $0`/util.sh +debug_test_gdb_only diff --git a/src/test/util.sh b/src/test/util.sh index 9d3d62ff1e9..d1739754b63 100644 --- a/src/test/util.sh +++ b/src/test/util.sh @@ -121,6 +121,10 @@ if [[ $TESTNAME =~ ([A-Za-z0-9_]+)_32$ ]]; then else TESTNAME_NO_BITNESS=$TESTNAME fi + +# We may want to retrieve this from python +export TESTNAME=$TESTNAME + LIB_ARG=$2 OBJDIR=$3 if [[ "$OBJDIR" == "" ]]; then diff --git a/src/util.h b/src/util.h index 9a065d37aa4..aa75953b8c6 100644 --- a/src/util.h +++ b/src/util.h @@ -216,6 +216,7 @@ enum cpuid_requests { CPUID_AMD_PLATFORM_QOS = 0x80000020 }; +constexpr int AVX_512_FOUNDATION_FLAG = 1 << 16; const int XSAVE_FEATURE_FLAG = 1 << 26; const int OSXSAVE_FEATURE_FLAG = 1 << 27; const int AVX_FEATURE_FLAG = 1 << 28; diff --git a/third-party/gdb/32bit-avx512.xml b/third-party/gdb/32bit-avx512.xml new file mode 100644 index 00000000000..dc897275c0d --- /dev/null +++ b/third-party/gdb/32bit-avx512.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/third-party/gdb/64bit-avx512.xml b/third-party/gdb/64bit-avx512.xml new file mode 100644 index 00000000000..04575abea01 --- /dev/null +++ b/third-party/gdb/64bit-avx512.xml @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/third-party/gdb/amd64-avx512-linux.xml b/third-party/gdb/amd64-avx512-linux.xml new file mode 100644 index 00000000000..c1a9fd63c85 --- /dev/null +++ b/third-party/gdb/amd64-avx512-linux.xml @@ -0,0 +1,20 @@ + + + + + + + + i386:x86-64 + GNU/Linux + + + + + + + diff --git a/third-party/gdb/amd64-pkeys-linux.xml b/third-party/gdb/amd64-pkeys-linux.xml index 1fa5bde1161..0fac98360e8 100644 --- a/third-party/gdb/amd64-pkeys-linux.xml +++ b/third-party/gdb/amd64-pkeys-linux.xml @@ -16,5 +16,6 @@ + diff --git a/third-party/gdb/i386-avx512-linux.xml b/third-party/gdb/i386-avx512-linux.xml new file mode 100644 index 00000000000..2fb1d6b6da2 --- /dev/null +++ b/third-party/gdb/i386-avx512-linux.xml @@ -0,0 +1,19 @@ + + + + + + + + i386 + GNU/Linux + + + + + + diff --git a/third-party/gdb/i386-pkeys-linux.xml b/third-party/gdb/i386-pkeys-linux.xml index 47f7b2f0093..7544998218d 100644 --- a/third-party/gdb/i386-pkeys-linux.xml +++ b/third-party/gdb/i386-pkeys-linux.xml @@ -15,5 +15,6 @@ + From 23fcaef6e97a15775db643210c9fda42fe8ab23d Mon Sep 17 00:00:00 2001 From: Simon Farre Date: Wed, 3 Jul 2024 17:11:46 +0200 Subject: [PATCH 2/2] Bug fix AVX512F support always returned true due to using -mavx512f as flag instead of -march=native to `try_compile` --- CMakeLists.txt | 2 +- src/ExtraRegisters.cc | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c471fe9510..601d39330ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,7 @@ if (SUPPORTS_MACRO_PREFIX_MAP) set(FLAGS_COMMON "${FLAGS_COMMON} -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=") endif() -try_compile(AVX512F_SUPPORT ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src/test/gdb_avx512.c CMAKE_FLAGS -DCOMPILE_DEFINITIONS=-mavx512f) +try_compile(AVX512F_SUPPORT ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src/test/gdb_avx512.c CMAKE_FLAGS -DCOMPILE_DEFINITIONS=-march=native) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11") # Define __STDC_LIMIT_MACROS so |#include | works as expected. diff --git a/src/ExtraRegisters.cc b/src/ExtraRegisters.cc index 36ecd2b7779..1efdd0a0a3b 100644 --- a/src/ExtraRegisters.cc +++ b/src/ExtraRegisters.cc @@ -77,9 +77,6 @@ static const uint64_t PKRU_FEATURE_MASK = 1 << PKRU_FEATURE_BIT; static const size_t xsave_header_offset = 512; static const size_t xsave_header_size = 64; static const size_t xsave_header_end = xsave_header_offset + xsave_header_size; -// This is always at 576 since AVX is always the first optional feature, -// if present. -static const size_t AVX_xsave_offset = 576; struct RegisterConfig { int8_t feature; GdbServerRegister base;