From f1856729533d5a52358c54afa8d08b211027e322 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 16 Oct 2024 15:41:39 -0700 Subject: [PATCH] Record and replay architected timer accesses on arm64 This works using the recently added support for prctl(PR_SET_TSC) on arm64 which is due to be released in kernel version 6.12. Fixes #3740 --- CMakeLists.txt | 26 +++++++++++- src/DiversionSession.cc | 36 ++++++++++++---- src/DiversionSession.h | 4 +- src/Task.cc | 16 ++++++- src/record_signal.cc | 16 ++++++- src/test/arm/arch_timer.c | 62 ++++++++++++++++++++++++++++ src/test/arm/arch_timer.run | 6 +++ src/test/arm/diversion_arch_timer.py | 12 ++++++ src/test/arm/util.h | 1 + src/test/arm/util.py | 1 + src/test/arm/util.sh | 2 + src/test/{x86 => }/prctl_tsc.c | 6 ++- src/test/prctl_tsc.run | 5 +++ src/test/prctl_tsc_supported.c | 6 +++ src/test/util.h | 10 +++++ src/util.cc | 22 ++++++++++ src/util.h | 26 ++++++++++++ 17 files changed, 241 insertions(+), 16 deletions(-) create mode 100644 src/test/arm/arch_timer.c create mode 100644 src/test/arm/arch_timer.run create mode 100644 src/test/arm/diversion_arch_timer.py create mode 100644 src/test/arm/util.h create mode 120000 src/test/arm/util.py create mode 100644 src/test/arm/util.sh rename src/test/{x86 => }/prctl_tsc.c (91%) create mode 100644 src/test/prctl_tsc.run create mode 100644 src/test/prctl_tsc_supported.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d555a8f2be..26da2457d29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1232,7 +1232,6 @@ set(BASIC_TESTS prctl_name prctl_short_name prctl_speculation_ctrl - x86/prctl_tsc privileged_net_ioctl proc_fds proc_mem @@ -1461,6 +1460,7 @@ set(BASIC_CPP_TESTS set(TESTS_WITH_PROGRAM abort_nonmain alternate_thread_diversion + arm/arch_timer args async_kill_with_syscallbuf async_kill_with_syscallbuf2 @@ -1574,6 +1574,7 @@ set(TESTS_WITH_PROGRAM pack patch_page_end x86/patch_40_80_f6_81 + prctl_tsc priority ptrace_remote_unmap x86/rdtsc_loop @@ -1817,6 +1818,9 @@ if(BUILD_TESTS) if (NOT x86ish AND ${test} MATCHES "^x86/.*") continue() endif() + if (NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" AND ${test} MATCHES "^arm/.*") + continue() + endif() get_filename_component(testname ${test} NAME) add_executable(${testname} src/test/${test}.c) @@ -1854,6 +1858,9 @@ if(BUILD_TESTS) PROPERTIES COMPILE_FLAGS "${RR_TEST_FLAGS} -g -O3") add_dependencies(watchpoint_unaligned2 Generated) + add_executable(prctl_tsc_supported src/test/prctl_tsc_supported.c) + post_build_executable(prctl_tsc_supported) + # Test disabled because it requires libuvc to be built and installed, and a # working USB camera # add_executable(usb src/test/usb.c) @@ -1925,7 +1932,7 @@ if(BUILD_TESTS) set(CTEST_TEST_TIMEOUT 1000) function(configure_test test) - set_tests_properties(${test} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") + set_tests_properties(${test} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED" SKIP_RETURN_CODE 77) endfunction(configure_test) if(INSTALL_TESTSUITE) @@ -1947,6 +1954,9 @@ if(BUILD_TESTS) if (NOT x86ish AND ${test} MATCHES "^x86/.*") continue() endif() + if (NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" AND ${test} MATCHES "^arm/.*") + continue() + endif() get_filename_component(testname ${test} NAME) add_test(${test} bash source_dir/src/test/basic_test.run ${testname} "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) @@ -1960,6 +1970,9 @@ if(BUILD_TESTS) if (NOT x86ish AND ${test} MATCHES "^x86/.*") continue() endif() + if (NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" AND ${test} MATCHES "^arm/.*") + continue() + endif() get_filename_component(testname ${test} NAME) add_test(${test} bash source_dir/src/test/${test}.run ${testname} "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) @@ -2006,6 +2019,9 @@ if(BUILD_TESTS) endforeach(file) foreach(test ${BASIC_TESTS} ${BASIC_CPP_TESTS} ${TESTS_WITH_PROGRAM}) + if (NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" AND ${test} MATCHES "^arm/.*") + continue() + endif() get_filename_component(testname ${test} NAME) if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c") add_executable(${testname}_32 "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c") @@ -2084,6 +2100,9 @@ if(BUILD_TESTS) endif(INSTALL_TESTSUITE) foreach(test ${BASIC_TESTS} ${BASIC_CPP_TESTS} ${OTHER_TESTS}) + if (NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" AND ${test} MATCHES "^arm/.*") + continue() + endif() get_filename_component(testname ${test} NAME) add_test(${test}-32 bash source_dir/src/test/basic_test.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) @@ -2094,6 +2113,9 @@ if(BUILD_TESTS) endforeach(test) foreach(test ${TESTS_WITH_PROGRAM} ${TESTS_WITHOUT_PROGRAM}) + if (NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" AND ${test} MATCHES "^arm/.*") + continue() + endif() get_filename_component(testname ${test} NAME) add_test(${test}-32 bash source_dir/src/test/${test}.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) diff --git a/src/DiversionSession.cc b/src/DiversionSession.cc index 1bf3ac0fbbf..4ca2f4843cf 100644 --- a/src/DiversionSession.cc +++ b/src/DiversionSession.cc @@ -15,7 +15,7 @@ using namespace std; namespace rr { DiversionSession::DiversionSession(int cpu_binding) : - emu_fs(EmuFs::create()), fake_rdstc(uint64_t(1) << 60), cpu_binding_(cpu_binding) {} + emu_fs(EmuFs::create()), fake_timer_counter(uint64_t(1) << 60), cpu_binding_(cpu_binding) {} DiversionSession::~DiversionSession() { // We won't permanently leak any OS resources by not ensuring @@ -50,10 +50,10 @@ static void execute_syscall(Task* t) { remote.regs().set_syscall_result(t->regs().syscall_result()); } -uint64_t DiversionSession::next_rdtsc_value() { - uint64_t rdtsc_value = fake_rdstc; - fake_rdstc += 1 << 20; // 1M cycles - return rdtsc_value; +uint64_t DiversionSession::next_timer_counter() { + uint64_t value = fake_timer_counter; + fake_timer_counter += 1 << 20; // 1M cycles + return value; } template @@ -70,7 +70,7 @@ static void process_syscall_arch(Task* t, int syscallno) { } if (syscallno == t->session().syscall_number_for_rrcall_rdtsc()) { - uint64_t rdtsc_value = static_cast(&t->session())->next_rdtsc_value(); + uint64_t rdtsc_value = static_cast(&t->session())->next_timer_counter(); LOG(debug) << "Faking rrcall_rdtsc syscall with value " << rdtsc_value; remote_ptr out_param(t->regs().arg1()); t->write_mem(out_param, rdtsc_value); @@ -230,7 +230,7 @@ DiversionSession::DiversionResult DiversionSession::diversion_step( auto special_instruction = special_instruction_at(t, t->ip()); if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC) { size_t len = special_instruction_len(special_instruction.opcode); - uint64_t rdtsc_value = next_rdtsc_value(); + uint64_t rdtsc_value = next_timer_counter(); LOG(debug) << "Faking RDTSC instruction with value " << rdtsc_value; Registers r = t->regs(); r.set_ip(r.ip() + len); @@ -239,6 +239,28 @@ DiversionSession::DiversionResult DiversionSession::diversion_step( t->set_regs(r); result.break_status = BreakStatus(); continue; + } else if (special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCT_EL0 || + special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCTSS_EL0) { + size_t len = special_instruction_len(special_instruction.opcode); + uint64_t cntvct_value = next_timer_counter(); + Registers r = t->regs(); + r.set_ip(r.ip() + len); + if (special_instruction.regno != 31) { + r.set_x(special_instruction.regno, cntvct_value); + } + t->set_regs(r); + result.break_status = BreakStatus(); + continue; + } else if (special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTFRQ_EL0) { + size_t len = special_instruction_len(special_instruction.opcode); + Registers r = t->regs(); + r.set_ip(r.ip() + len); + if (special_instruction.regno != 31) { + r.set_x(special_instruction.regno, cntfrq()); + } + t->set_regs(r); + result.break_status = BreakStatus(); + continue; } } LOG(debug) << "Diversion break at ip=" << (void*)t->ip().register_value() diff --git a/src/DiversionSession.h b/src/DiversionSession.h index 9bce655f663..b315cdf95fc 100644 --- a/src/DiversionSession.h +++ b/src/DiversionSession.h @@ -59,13 +59,13 @@ class DiversionSession final : public Session { void set_tracee_fd_number(int fd_number) { tracee_socket_fd_number = fd_number; } void on_create(Task *t) override { this->Session::on_create(t); } - uint64_t next_rdtsc_value(); + uint64_t next_timer_counter(); private: friend class ReplaySession; std::shared_ptr emu_fs; - uint64_t fake_rdstc; + uint64_t fake_timer_counter; int cpu_binding_; }; diff --git a/src/Task.cc b/src/Task.cc index fb14eb7cff3..be09352e459 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -128,8 +128,8 @@ void Task::detach() { } void Task::reenable_cpuid_tsc() { + AutoRemoteSyscalls remote(this); if (is_x86ish(arch())) { - AutoRemoteSyscalls remote(this); if (session().has_cpuid_faulting()) { remote.infallible_syscall(syscall_number_for_arch_prctl(arch()), ARCH_SET_CPUID, 1); @@ -137,6 +137,12 @@ void Task::reenable_cpuid_tsc() { remote.infallible_syscall(syscall_number_for_prctl(arch()), PR_SET_TSC, PR_TSC_ENABLE); } + if (arch() == aarch64) { + // Not infallible because the prctl is only available in 6.12+. + // We already warned about this in post_exec_syscall(). + remote.syscall(syscall_number_for_prctl(arch()), + PR_SET_TSC, PR_TSC_ENABLE); + } } void Task::wait_exit() { @@ -1114,6 +1120,14 @@ void Task::post_exec_syscall(const std::string& original_exe_file) { remote.infallible_syscall(syscall_number_for_arch_prctl(arch()), ARCH_SET_CPUID, 0); } + if (arch() == aarch64) { + if (remote.syscall(syscall_number_for_prctl(remote.task()->arch()), + PR_SET_TSC, PR_TSC_SIGSEGV, 0, 0) != 0) { + LOG(warn) << "Missing kernel support for PR_SET_TSC; architected timer " + "accesses will not be replayed deterministically. It is " + "recommended to upgrade to kernel version 6.12"; + } + } } bool Task::execed() const { return tg->execed; } diff --git a/src/record_signal.cc b/src/record_signal.cc index 3591484315e..81f34950c50 100644 --- a/src/record_signal.cc +++ b/src/record_signal.cc @@ -80,6 +80,9 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { auto special_instruction = special_instruction_at(t, t->ip()); switch (special_instruction.opcode) { + case SpecialInstOpcode::ARM_MRS_CNTFRQ_EL0: + case SpecialInstOpcode::ARM_MRS_CNTVCT_EL0: + case SpecialInstOpcode::ARM_MRS_CNTVCTSS_EL0: case SpecialInstOpcode::X86_RDTSC: case SpecialInstOpcode::X86_RDTSCP: if (t->tsc_mode == PR_TSC_SIGSEGV) { @@ -99,8 +102,17 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { ASSERT(t, len > 0); Registers r = t->regs(); - if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC || - special_instruction.opcode == SpecialInstOpcode::X86_RDTSCP) { + if (special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCT_EL0 || + special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCTSS_EL0) { + if (special_instruction.regno != 31) { + r.set_x(special_instruction.regno, cntvct()); + } + } else if (special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTFRQ_EL0) { + if (special_instruction.regno != 31) { + r.set_x(special_instruction.regno, cntfrq()); + } + } else if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC || + special_instruction.opcode == SpecialInstOpcode::X86_RDTSCP) { if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC && t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, true)) { Event ev = Event::patch_syscall(); diff --git a/src/test/arm/arch_timer.c b/src/test/arm/arch_timer.c new file mode 100644 index 00000000000..6ccb7239b60 --- /dev/null +++ b/src/test/arm/arch_timer.c @@ -0,0 +1,62 @@ +#include "util.h" + +#include +#include +#include + +long cntfrq(void) { + long c; + __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(c)); + return c; +} + +long cntvct(void) { + long c; + __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(c)); + return c; +} + +long cntvctss(void) { + long c; + if (getauxval(AT_HWCAP2) & HWCAP2_ECV) { + __asm__ __volatile__(".arch armv8.6-a\nmrs %0, cntvctss_el0" : "=r"(c)); + } else { + __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(c)); + } + return c; +} + +long initial_cntfrq; +long initial_cntvct; + +void arch_timer_nops(void) { + __asm__ __volatile__("mrs xzr, cntfrq_el0"); + __asm__ __volatile__("mrs xzr, cntvct_el0"); + if (getauxval(AT_HWCAP2) & HWCAP2_ECV) { + __asm__ __volatile__("mrs xzr, cntvctss_el0"); + } +} + +void diversion_check(void) { + arch_timer_nops(); + test_assert(initial_cntfrq == cntfrq()); + test_assert(initial_cntvct < cntvct()); + test_assert(initial_cntvct < cntvctss()); + atomic_puts("diversion_check passed"); +} + +void breakpoint(void) {} + +int main(void) { + initial_cntfrq = cntfrq(); + initial_cntvct = cntvct(); + breakpoint(); + + atomic_printf("%ld\n", cntvct()); + atomic_printf("%ld\n", cntvctss()); + atomic_printf("%ld\n", cntfrq()); + + arch_timer_nops(); + + atomic_puts("EXIT-SUCCESS"); +} diff --git a/src/test/arm/arch_timer.run b/src/test/arm/arch_timer.run new file mode 100644 index 00000000000..209e9dd047e --- /dev/null +++ b/src/test/arm/arch_timer.run @@ -0,0 +1,6 @@ +source `dirname $0`/util.sh +if ! prctl_tsc_supported; then + exit 77 +fi +compare_test EXIT-SUCCESS +debug_gdb_only arm/diversion_arch_timer diff --git a/src/test/arm/diversion_arch_timer.py b/src/test/arm/diversion_arch_timer.py new file mode 100644 index 00000000000..48a335c8559 --- /dev/null +++ b/src/test/arm/diversion_arch_timer.py @@ -0,0 +1,12 @@ +from util import * + +send_gdb('break breakpoint') +expect_gdb('Breakpoint 1') + +send_gdb('c') +expect_gdb('Breakpoint 1') + +send_gdb('call diversion_check()') +expect_gdb('diversion_check passed') + +ok() diff --git a/src/test/arm/util.h b/src/test/arm/util.h new file mode 100644 index 00000000000..a53d4ee1e0b --- /dev/null +++ b/src/test/arm/util.h @@ -0,0 +1 @@ +#include "../util.h" diff --git a/src/test/arm/util.py b/src/test/arm/util.py new file mode 120000 index 00000000000..7f16d68497e --- /dev/null +++ b/src/test/arm/util.py @@ -0,0 +1 @@ +../util.py \ No newline at end of file diff --git a/src/test/arm/util.sh b/src/test/arm/util.sh new file mode 100644 index 00000000000..2034898c7b5 --- /dev/null +++ b/src/test/arm/util.sh @@ -0,0 +1,2 @@ +TEST_PREFIX=arm/ +source `dirname $0`/../util.sh diff --git a/src/test/x86/prctl_tsc.c b/src/test/prctl_tsc.c similarity index 91% rename from src/test/x86/prctl_tsc.c rename to src/test/prctl_tsc.c index 5d1d87501cc..70880927b7e 100644 --- a/src/test/x86/prctl_tsc.c +++ b/src/test/prctl_tsc.c @@ -9,6 +9,8 @@ static void skip_handler(__attribute__((unused)) int sig, ctx->uc_mcontext.gregs[REG_EIP] += 2; #elif defined(__x86_64__) ctx->uc_mcontext.gregs[REG_RIP] += 2; +#elif defined(__aarch64__) + ctx->uc_mcontext.pc += 4; #else #error unknown architecture #endif @@ -38,7 +40,7 @@ int main(void) { test_assert(0 == prctl(PR_GET_TSC, &status)); test_assert(PR_TSC_SIGSEGV == status); signal(SIGSEGV, exit_handler); - rdtsc(); + trigger_timer_counter_trap(); return 77; } @@ -48,6 +50,6 @@ int main(void) { signal(SIGSEGV, print_handler); test_assert(0 == prctl(PR_GET_TSC, &status)); test_assert(PR_TSC_SIGSEGV == status); - rdtsc(); + trigger_timer_counter_trap(); return 1; } diff --git a/src/test/prctl_tsc.run b/src/test/prctl_tsc.run new file mode 100644 index 00000000000..d3df9791bff --- /dev/null +++ b/src/test/prctl_tsc.run @@ -0,0 +1,5 @@ +source `dirname $0`/util.sh +if ! prctl_tsc_supported; then + exit 77 +fi +compare_test EXIT-SUCCESS diff --git a/src/test/prctl_tsc_supported.c b/src/test/prctl_tsc_supported.c new file mode 100644 index 00000000000..894de4083c1 --- /dev/null +++ b/src/test/prctl_tsc_supported.c @@ -0,0 +1,6 @@ +#include +#include + +int main(void) { + return prctl(PR_SET_TSC, PR_TSC_ENABLE) == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/test/util.h b/src/test/util.h index 5fc24a66a5c..be06626ed10 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -220,6 +220,16 @@ inline static void check_data(void* buf, size_t len) { inline static uint64_t rdtsc(void) { return __rdtsc(); } #endif +inline static void trigger_timer_counter_trap(void) { +#if defined(__i386__) || defined(__x86_64) + rdtsc(); +#elif defined(__aarch64__) + __asm__ __volatile__("mrs xzr, cntvct_el0"); +#else +#error "Unknown architecture" +#endif +} + /** * Perform some syscall that writes an event, i.e. is not syscall-buffered. */ diff --git a/src/util.cc b/src/util.cc index 1ddcc709217..05acebbd8f6 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1980,6 +1980,24 @@ SpecialInst special_instruction_at(Task* t, remote_code_ptr ip) { !memcmp(insn, pushf16_insn, sizeof(pushf16_insn))) { return {SpecialInstOpcode::X86_PUSHF16}; } + } else if (t->arch() == aarch64) { + uint8_t insn[4]; + ssize_t ret = + t->read_bytes_fallible(ip.to_data_ptr(), sizeof(insn), insn); + if (ret < 0) { + return {SpecialInstOpcode::NONE}; + } + uint32_t insn_word = + insn[0] | (insn[1] << 8) | (insn[2] << 16) | (insn[3] << 24); + if ((insn_word & 0xffffffe0) == 0xd53be000) { + return {SpecialInstOpcode::ARM_MRS_CNTFRQ_EL0, insn_word & 31}; + } + if ((insn_word & 0xffffffe0) == 0xd53be040) { + return {SpecialInstOpcode::ARM_MRS_CNTVCT_EL0, insn_word & 31}; + } + if ((insn_word & 0xffffffe0) == 0xd53be0c0) { + return {SpecialInstOpcode::ARM_MRS_CNTVCTSS_EL0, insn_word & 31}; + } } return {SpecialInstOpcode::NONE}; } @@ -1997,6 +2015,10 @@ size_t special_instruction_len(SpecialInstOpcode insn) { return sizeof(pushf_insn); } else if (insn == SpecialInstOpcode::X86_PUSHF16) { return sizeof(pushf16_insn); + } else if (insn == SpecialInstOpcode::ARM_MRS_CNTFRQ_EL0 || + insn == SpecialInstOpcode::ARM_MRS_CNTVCT_EL0 || + insn == SpecialInstOpcode::ARM_MRS_CNTVCTSS_EL0) { + return 4; } else { return 0; } diff --git a/src/util.h b/src/util.h index 85ac73cf643..4a4c1638c56 100644 --- a/src/util.h +++ b/src/util.h @@ -488,6 +488,9 @@ int get_num_cpus(); enum class SpecialInstOpcode { NONE, + ARM_MRS_CNTFRQ_EL0, + ARM_MRS_CNTVCT_EL0, + ARM_MRS_CNTVCTSS_EL0, X86_RDTSC, X86_RDTSCP, X86_CPUID, @@ -498,6 +501,7 @@ enum class SpecialInstOpcode { struct SpecialInst { SpecialInstOpcode opcode; + unsigned regno = 0; }; /* If |t->ip()| points at a decoded instruction, return the instruction */ @@ -650,6 +654,28 @@ inline unsigned long long rdtsc(void) { #endif } +inline unsigned long long cntfrq(void) { +#if defined(__aarch64__) + unsigned long long val; + asm volatile("mrs %0, CNTFRQ_EL0" : "=r" (val)); + return val; +#else + FATAL() << "Reached AArch64-only code path on non-AArch64 architecture"; + return 0; +#endif +} + +inline unsigned long long cntvct(void) { +#if defined(__aarch64__) + unsigned long long val; + asm volatile("mrs %0, CNTVCT_EL0" : "=r" (val)); + return val; +#else + FATAL() << "Reached AArch64-only code path on non-AArch64 architecture"; + return 0; +#endif +} + inline unsigned long long dczid_el0_block_size(void) { #if defined(__aarch64__) unsigned long long val;