Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVX512 Support #3776

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ if (SUPPORTS_MACRO_PREFIX_MAP)
set(FLAGS_COMMON "${FLAGS_COMMON} -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=")
endif()

try_compile(AVX512F_SUPPORT ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src/test/gdb_avx512.c CMAKE_FLAGS -DCOMPILE_DEFINITIONS=-march=native)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11")
# Define __STDC_LIMIT_MACROS so |#include <stdint.h>| works as expected.
# Define __STDC_FORMAT_MACROS so |#include <inttypes.h>| works as expected.
Expand Down Expand Up @@ -758,21 +760,25 @@ set_source_files_properties(src/exec_stub.c

set(RR_GDB_RESOURCES
32bit-avx.xml
32bit-avx512.xml
32bit-core.xml
32bit-linux.xml
32bit-sse.xml
32bit-pkeys.xml
64bit-avx.xml
64bit-avx512.xml
64bit-core.xml
64bit-linux.xml
64bit-seg.xml
64bit-sse.xml
64bit-pkeys.xml
amd64-pkeys-linux.xml
amd64-avx-linux.xml
amd64-avx512-linux.xml
amd64-linux.xml
i386-pkeys-linux.xml
i386-avx-linux.xml
i386-avx512-linux.xml
i386-linux.xml
aarch64-core.xml
aarch64-fpu.xml
Expand Down Expand Up @@ -1744,6 +1750,10 @@ set(TESTS_WITHOUT_PROGRAM
when
)

if(AVX512F_SUPPORT)
set(TESTS_WITHOUT_PROGRAM ${TESTS_WITHOUT_PROGRAM} gdb_avx512)
endif()

if(BUILD_TESTS)
# Part of the installable testsuite (test files).
if(INSTALL_TESTSUITE)
Expand Down Expand Up @@ -1818,6 +1828,14 @@ if(BUILD_TESTS)
PROPERTIES COMPILE_FLAGS "${RR_TEST_FLAGS} -g -O3")
add_dependencies(watchpoint_unaligned2 Generated)

if(AVX512F_SUPPORT)
add_executable(gdb_avx512 src/test/gdb_avx512.c)
post_build_executable(gdb_avx512)
set_target_properties(gdb_avx512
PROPERTIES COMPILE_FLAGS "${RR_TEST_FLAGS} -g3 -mavx512f")
add_dependencies(gdb_avx512 Generated)
endif()

# Test disabled because it requires libuvc to be built and installed, and a
# working USB camera
# add_executable(usb src/test/usb.c)
Expand Down Expand Up @@ -1921,7 +1939,7 @@ if(BUILD_TESTS)
bash source_dir/src/test/${test}.run ${testname} -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT})
configure_test(${test}-no-syscallbuf)
endforeach(test)

# Run 32-bit tests on 64-bit builds.
# We copy the test files into '32' subdirectories in the output
# directory, so we can set different compile options on them.
Expand All @@ -1942,6 +1960,11 @@ if(BUILD_TESTS)
PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS}")
endforeach(test)

if(AVX512F_SUPPORT)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/gdb_avx512.c" "${CMAKE_CURRENT_BINARY_DIR}/32/gdb_avx512.c" COPYONLY)
set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c" PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g3 -mavx512f")
endif()

foreach(test ${BASIC_CPP_TESTS})
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/${test}.cc"
"${CMAKE_CURRENT_BINARY_DIR}/32/${test}.cc"
Expand Down Expand Up @@ -1999,6 +2022,16 @@ if(BUILD_TESTS)
COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g -O3")
add_dependencies(watchpoint_unaligned2_32 Generated)

if(AVX512F_SUPPORT)
add_executable(gdb_avx512_32 "${CMAKE_CURRENT_BINARY_DIR}/32/gdb_avx512.c")
post_build_executable(gdb_avx512_32)
set_target_properties(gdb_avx512_32
PROPERTIES
LINK_FLAGS "-m32"
COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g3 -mavx512f")
add_dependencies(gdb_avx512_32 Generated)
endif()

add_library(test_lib_32
"${CMAKE_CURRENT_BINARY_DIR}/32/test_lib.c"
)
Expand Down
64 changes: 56 additions & 8 deletions src/ExtraRegisters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,59 @@ static bool reg_in_range(GdbServerRegister regno, GdbServerRegister low, GdbServ
return true;
}

static const int AVX_FEATURE_BIT = 2;
static const int PKRU_FEATURE_BIT = 9;
static constexpr int AVX_FEATURE_BIT = 2;
static constexpr int AVX_OPMASK_FEATURE_BIT = 5;
static constexpr int AVX_ZMM_HI256_FEATURE_BIT = 6;
static constexpr int AVX_ZMM_HI16_FEATURE_BIT = 7;
static constexpr int PKRU_FEATURE_BIT = 9;

static const uint64_t PKRU_FEATURE_MASK = 1 << PKRU_FEATURE_BIT;

static const size_t xsave_header_offset = 512;
static const size_t xsave_header_size = 64;
static const size_t xsave_header_end = xsave_header_offset + xsave_header_size;
// This is always at 576 since AVX is always the first optional feature,
// if present.
static const size_t AVX_xsave_offset = 576;
struct RegisterConfig {
int8_t feature;
GdbServerRegister base;
int8_t size;
int stride;

int register_offset(GdbServerRegister reg, int base_offset) const noexcept {
const auto& layout = xsave_native_layout();
return layout.feature_layouts[feature].offset + base_offset + (reg - base) * stride;
}
};

static constexpr std::array<RegisterConfig, 6> RegisterConfigLookupTable{
{ { AVX_FEATURE_BIT, DREG_64_YMM0H, 16, 16 },
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_XMM16, 16, 64 },
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_YMM16H, 16, 64 },
{ AVX_ZMM_HI256_FEATURE_BIT, DREG_64_ZMM0H, 32, 32 },
{ AVX_ZMM_HI16_FEATURE_BIT, DREG_64_ZMM16H, 32, 64 },
{ AVX_OPMASK_FEATURE_BIT, DREG_64_K0, 8, 8 } }
};

static constexpr auto YMM16_31 = 0b10;
static constexpr auto ZMM16_31 = 0b100;

// Every range of registers (except K0-7) are 16 registers long. We use this fact to build
// a lookup table, for the AVX2 and AVX512 registers.
static bool reg_is_avx2_or_512(GdbServerRegister reg, RegData& out) noexcept {
if(reg < DREG_64_YMM0H || reg > DREG_64_K7) {
return false;
}

const auto selector = (reg - DREG_64_YMM0H) >> 4;
DEBUG_ASSERT(selector >= 0 && selector <= 5 && "GdbServerRegister enum values has been changed.");
const auto cfg = RegisterConfigLookupTable[selector];
out.xsave_feature_bit = cfg.feature;
out.size = cfg.size;

// only YMM16-31 and ZMM16-31 have a base offset (16 and 32 respectively)
const auto base_offset = cfg.size * (selector == YMM16_31) | cfg.size * (selector == ZMM16_31);
out.offset = cfg.register_offset(reg, base_offset);
return true;
}

// Return the size and data location of register |regno|.
// If we can't read the register, returns -1 in 'offset'.
Expand All @@ -95,6 +137,14 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno)
regno = (GdbServerRegister)(regno - DREG_YMM0H + DREG_64_YMM0H);
break;
}
if(regno >= DREG_ZMM0H && regno <= DREG_ZMM7H) {
regno = (GdbServerRegister)(regno - DREG_ZMM0H + DREG_64_ZMM0H);
break;
}
if(regno >= DREG_K0 && regno <= DREG_K7) {
regno = (GdbServerRegister)(regno - DREG_K0 + DREG_64_K0);
break;
}
if (regno == DREG_MXCSR) {
regno = DREG_64_MXCSR;
} else if (regno == DREG_PKRU) {
Expand Down Expand Up @@ -123,9 +173,7 @@ static RegData xsave_register_data(SupportedArch arch, GdbServerRegister regno)
return result;
}

if (reg_in_range(regno, DREG_64_YMM0H, DREG_64_YMM15H, AVX_xsave_offset, 16,
16, &result)) {
result.xsave_feature_bit = AVX_FEATURE_BIT;
if(reg_is_avx2_or_512(regno, result)) {
return result;
}

Expand Down
64 changes: 45 additions & 19 deletions src/GdbServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,26 +186,9 @@ static void maybe_singlestep_for_event(Task* t, GdbRequest* req) {

void GdbServer::dispatch_regs_request(const Registers& regs,
const ExtraRegisters& extra_regs) {
GdbServerRegister end;
// Send values for all the registers we sent XML register descriptions for.
// Those descriptions are controlled by GdbServerConnection::cpu_features().
bool have_PKU = dbg->cpu_features() & GdbServerConnection::CPU_PKU;
bool have_AVX = dbg->cpu_features() & GdbServerConnection::CPU_AVX;
switch (regs.arch()) {
case x86:
end = have_PKU ? DREG_PKRU : (have_AVX ? DREG_YMM7H : DREG_ORIG_EAX);
break;
case x86_64:
end = have_PKU ? DREG_64_PKRU : (have_AVX ? DREG_64_YMM15H : DREG_GS_BASE);
break;
case aarch64:
end = DREG_FPCR;
break;
default:
FATAL() << "Unknown architecture";
return;
}
const GdbServerRegister end = arch_reg_end(regs.arch());
vector<GdbServerRegisterValue> rs;
rs.reserve(end);
for (GdbServerRegister r = GdbServerRegister(0); r <= end; r = GdbServerRegister(r + 1)) {
rs.push_back(get_reg(regs, extra_regs, r));
}
Expand Down Expand Up @@ -2306,6 +2289,49 @@ void GdbServer::read_back_debugger_mem(DiversionSession& session) {
}
}

GdbServerRegister GdbServer::arch_reg_end(SupportedArch arch) noexcept {
if(target_regs_end != GdbServerRegister(0)) {
return target_regs_end;
}

// Send values for all the registers we sent XML register descriptions for.
// Those descriptions are controlled by GdbServerConnection::cpu_features().
bool have_PKU = dbg->cpu_features() & GdbServerConnection::CPU_PKU;
bool have_AVX = dbg->cpu_features() & GdbServerConnection::CPU_AVX;
bool have_AVX512 = dbg->cpu_features() & GdbServerConnection::CPU_AVX512;
switch (arch) {
case x86:
if(have_PKU) {
target_regs_end = DREG_PKRU;
} else if(have_AVX512) {
target_regs_end = DREG_K7;
} else if(have_AVX) {
target_regs_end = DREG_YMM7H;
} else {
target_regs_end = DREG_ORIG_EAX;
}
break;
case x86_64:
if(have_PKU) {
target_regs_end = DREG_64_PKRU;
} else if(have_AVX512) {
target_regs_end = DREG_64_K7;
} else if(have_AVX) {
target_regs_end = DREG_64_YMM15H;
} else {
target_regs_end = DREG_GS_BASE;
}
break;
case aarch64:
target_regs_end = DREG_FPCR;
break;
default:
FATAL() << "Unknown architecture";
return target_regs_end;
}
return target_regs_end;
}

bool GdbServer::debugger_mem_region(ThreadGroupUid tguid, remote_ptr<void> addr,
int* prot, MemoryRange* mem_range) {
auto it = debugger_mem.find(tguid);
Expand Down
5 changes: 5 additions & 0 deletions src/GdbServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ class GdbServer {
// Read back the contents of all debugger memory regions from the session.
void read_back_debugger_mem(DiversionSession& session);

// Get the last GdbServerRegister for "this" arch. If it hasn't be determined, configure it.
GdbServerRegister arch_reg_end(SupportedArch arch) noexcept;

// dbg is never null.
std::unique_ptr<GdbServerConnection> dbg;
// The ThreadGroupUid of the task being debugged.
Expand Down Expand Up @@ -315,6 +318,8 @@ class GdbServer {
ExtraRegisters extra_regs;
};
std::unordered_map<int, SavedRegisters> saved_register_states;

GdbServerRegister target_regs_end = GdbServerRegister(0);
};

} // namespace rr
Expand Down
14 changes: 11 additions & 3 deletions src/GdbServerConnection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,14 @@ static uint32_t get_cpu_features(SupportedArch arch) {
auto cpuid_data = cpuid(CPUID_GETEXTENDEDFEATURES, 0);
if ((cpuid_data.ecx & PKU_FEATURE_FLAG) == PKU_FEATURE_FLAG) {
// PKU (Skylake) implies AVX (Sandy Bridge).
cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_PKU;
cpu_features |= GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_PKU;
break;
}

if((cpuid_data.ebx & AVX_512_FOUNDATION_FLAG) == AVX_512_FOUNDATION_FLAG) {
cpu_features |= GdbServerConnection::CPU_AVX512 | GdbServerConnection::CPU_AVX;
}

cpuid_data = cpuid(CPUID_GETFEATURES, 0);
// We're assuming here that AVX support on the system making the recording
// is the same as the AVX support during replay. But if that's not true,
Expand Down Expand Up @@ -512,11 +516,15 @@ static const char* target_description_name(uint32_t cpu_features) {
return "amd64-linux.xml";
case GdbServerConnection::CPU_AVX:
return "i386-avx-linux.xml";
case GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512:
return "i386-avx512-linux.xml";
case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_AVX:
return "amd64-avx-linux.xml";
case GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX:
case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512:
return "amd64-avx512-linux.xml";
case GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512:
return "i386-pkeys-linux.xml";
case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX:
case GdbServerConnection::CPU_X86_64 | GdbServerConnection::CPU_PKU | GdbServerConnection::CPU_AVX | GdbServerConnection::CPU_AVX512:
return "amd64-pkeys-linux.xml";
case GdbServerConnection::CPU_AARCH64:
return "aarch64-core.xml";
Expand Down
10 changes: 6 additions & 4 deletions src/GdbServerConnection.h
Original file line number Diff line number Diff line change
Expand Up @@ -745,11 +745,13 @@ class GdbServerConnection {
const Features& features() { return features_; }

enum {
CPU_X86_64 = 0x1,
CPU_AVX = 0x2,
CPU_AARCH64 = 0x4,
CPU_PKU = 0x8
CPU_X86_64 = 1 << 0,
CPU_AVX = 1 << 1,
CPU_AARCH64 = 1 << 2,
CPU_PKU = 1 << 3,
CPU_AVX512 = 1 << 4
};

void set_cpu_features(uint32_t features) { cpu_features_ = features; }
uint32_t cpu_features() const { return cpu_features_; }

Expand Down
Loading