diff --git a/clients/drcachesim/tests/offline-burst_aarch64_sys.templatex b/clients/drcachesim/tests/offline-burst_aarch64_sys.templatex index a8514659127..2fc59c82408 100644 --- a/clients/drcachesim/tests/offline-burst_aarch64_sys.templatex +++ b/clients/drcachesim/tests/offline-burst_aarch64_sys.templatex @@ -13,7 +13,7 @@ Total counts: .* total data loads .* total data stores 1 total icache flushes - 4 total dcache flushes + 3 total dcache flushes 1 total threads .* total timestamp \+ cpuid markers .* @@ -25,6 +25,6 @@ Thread .* counts: .* data loads .* data stores 1 icache flushes - 4 dcache flushes + 3 dcache flushes .* timestamp \+ cpuid markers .* diff --git a/clients/drcachesim/tests/offline-legacy-int-offs.templatex b/clients/drcachesim/tests/offline-legacy-int-offs.templatex index 95775cc3508..a022f00abb5 100644 --- a/clients/drcachesim/tests/offline-legacy-int-offs.templatex +++ b/clients/drcachesim/tests/offline-legacy-int-offs.templatex @@ -3,12 +3,12 @@ ERROR: failed to initialize analyzer: Directory setup failed: Failed sanity chec #elif defined(X86) && defined(X64) Basic counts tool results: Total counts: - 109205 total \(fetched\) instructions - 6768 total unique \(fetched\) instructions + 109201 total \(fetched\) instructions + 6765 total unique \(fetched\) instructions 93 total non-fetched instructions 11 total prefetches - 23974 total data loads - 5544 total data stores + 23972 total data loads + 5543 total data stores 0 total icache flushes 0 total dcache flushes 3 total threads @@ -25,14 +25,14 @@ Total counts: 0 total system call number markers 0 total blocking system call markers 12 total other markers - 8429 total encodings + 8426 total encodings Thread 552306 counts: - 101049 \(fetched\) instructions - 6393 unique \(fetched\) instructions + 101045 \(fetched\) instructions + 6390 unique \(fetched\) instructions 93 non-fetched instructions 11 prefetches - 21712 data loads - 4442 data stores + 21710 data loads + 4441 data stores 0 icache flushes 0 dcache flushes 110 timestamp \+ cpuid markers @@ -48,7 +48,7 @@ Thread 552306 counts: 0 system call number markers 0 blocking system call markers 4 other markers - 6393 encodings + 6390 encodings Thread 552323 counts: 4674 \(fetched\) instructions 1028 unique \(fetched\) instructions diff --git a/clients/drcachesim/tests/raw2trace_unit_tests.cpp b/clients/drcachesim/tests/raw2trace_unit_tests.cpp index 6466d90fd1e..14f68f4600c 100644 --- a/clients/drcachesim/tests/raw2trace_unit_tests.cpp +++ b/clients/drcachesim/tests/raw2trace_unit_tests.cpp @@ -436,9 +436,10 @@ test_branch_delays(void *drcontext) check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) && check_entry(entries, idx, TRACE_TYPE_THREAD, -1) && check_entry(entries, idx, TRACE_TYPE_PID, -1) && - check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && check_entry(entries, idx, TRACE_TYPE_MARKER, - TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && + TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && // 5 type: 28 size: 10 val: 64 + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && // 10000000 check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && // Both branches should be delayed until after the timestamp+cpu markers: @@ -3156,6 +3157,124 @@ test_ifiltered(void *drcontext) #endif } +/* Tests asynchronous signal handling (i#7050). */ +bool +test_asynchronous_signal(void *drcontext) +{ + bool res = true; + { + std::cerr << "\n===============\nTesting instr removal by async signal\n"; + instrlist_t *ilist = instrlist_create(drcontext); + // raw2trace doesn't like offsets of 0 so we shift with a nop. + instr_t *nop = XINST_CREATE_nop(drcontext); + instr_t *move = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *move2 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG2), opnd_create_reg(REG1)); + instrlist_append(ilist, nop); + instrlist_append(ilist, move); + instrlist_append(ilist, move2); + size_t offs_nop = 0; + size_t offs_move1 = offs_nop + instr_length(drcontext, nop); + + std::vector raw; + raw.push_back(make_header()); + raw.push_back(make_tid()); + raw.push_back(make_pid()); + raw.push_back(make_line_size()); + raw.push_back(make_timestamp()); + raw.push_back(make_core()); + raw.push_back(make_block(offs_move1, 1)); + raw.push_back(make_marker(TRACE_MARKER_TYPE_KERNEL_EVENT, offs_move1)); + raw.push_back(make_exit()); + + std::vector stats; + std::vector entries; + if (!run_raw2trace(drcontext, raw, ilist, entries, &stats)) + return false; + int idx = 0; + res = check_entry(entries, idx, TRACE_TYPE_HEADER, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) && + check_entry(entries, idx, TRACE_TYPE_THREAD, -1) && + check_entry(entries, idx, TRACE_TYPE_PID, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + // The move instruction is removed because of the asynchronous signal. + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_KERNEL_EVENT, + offs_move1) && + check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) && + check_entry(entries, idx, TRACE_TYPE_FOOTER, -1); + } + { + std::cerr + << "\n===============\nTesting instr and memref removal by async signal\n"; + instrlist_t *ilist = instrlist_create(drcontext); + // raw2trace doesn't like offsets of 0 so we shift with a nop. + instr_t *nop = XINST_CREATE_nop(drcontext); + instr_t *move = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *store = XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(REG2, 0), + opnd_create_reg(REG1)); + instr_t *move2 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG2), opnd_create_reg(REG1)); + instrlist_append(ilist, nop); + instrlist_append(ilist, move); + instrlist_append(ilist, store); + instrlist_append(ilist, move2); + size_t offs_nop = 0; + size_t offs_move1 = offs_nop + instr_length(drcontext, nop); + size_t offs_store = offs_move1 + instr_length(drcontext, move); + size_t offs_move2 = offs_store + instr_length(drcontext, store); + + std::vector raw; + raw.push_back(make_header()); + raw.push_back(make_tid()); + raw.push_back(make_pid()); + raw.push_back(make_line_size()); + raw.push_back(make_timestamp()); + raw.push_back(make_core()); + raw.push_back(make_block(offs_move1, 2)); + raw.push_back(make_memref(42)); + raw.push_back(make_marker(TRACE_MARKER_TYPE_KERNEL_EVENT, offs_store)); + raw.push_back(make_block(offs_move2, 1)); + raw.push_back(make_exit()); + + std::vector stats; + std::vector entries; + if (!run_raw2trace(drcontext, raw, ilist, entries, &stats)) + return false; + int idx = 0; + res &= + (check_entry(entries, idx, TRACE_TYPE_HEADER, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) && + check_entry(entries, idx, TRACE_TYPE_THREAD, -1) && + check_entry(entries, idx, TRACE_TYPE_PID, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + // The move instruction. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1, offs_move1) && + // The store instruction and the memref are removed. + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_KERNEL_EVENT, + offs_store) && + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1, offs_move2) && + check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) && + check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); + } + return res; +} + int test_main(int argc, const char *argv[]) { @@ -3177,7 +3296,8 @@ test_main(int argc, const char *argv[]) !test_xfer_modoffs(drcontext) || !test_xfer_absolute(drcontext) || !test_branch_decoration(drcontext) || !test_stats_timestamp_instr_count(drcontext) || - !test_is_maybe_blocking_syscall(drcontext) || !test_ifiltered(drcontext)) + !test_is_maybe_blocking_syscall(drcontext) || !test_ifiltered(drcontext) || + !test_asynchronous_signal(drcontext)) return 1; return 0; } diff --git a/clients/drcachesim/tests/signal_invariants.c b/clients/drcachesim/tests/signal_invariants.c index a0044b0623f..f72371d7a4b 100644 --- a/clients/drcachesim/tests/signal_invariants.c +++ b/clients/drcachesim/tests/signal_invariants.c @@ -190,8 +190,10 @@ GLOBAL_LABEL(FUNCNAME:) #define FUNCNAME test_signal_midbb DECLARE_FUNC(FUNCNAME) GLOBAL_LABEL(FUNCNAME:) - /* prefetcht2's address is the instr count until a signal */ - prefetcht2 [3] + /* prefetcht2's address is the instr count until a signal not counting + * the preempted instr. + */ + prefetcht2 [2] nop nop ud2 @@ -205,8 +207,10 @@ GLOBAL_LABEL(FUNCNAME:) #define FUNCNAME test_signal_startbb DECLARE_FUNC(FUNCNAME) GLOBAL_LABEL(FUNCNAME:) - /* prefetcht2's address is the instr count until a signal */ - prefetcht2 [2] + /* prefetcht2's address is the instr count until a signal not counting + * the preempted instr. + */ + prefetcht2 [1] jmp new_bb new_bb: ud2 @@ -221,10 +225,14 @@ GLOBAL_LABEL(FUNCNAME:) * XXX i#3958: Today the 2nd movs memref is incorrectly included *before* * the fault. */ - /* prefetcht2's address is the instr count until a signal */ - prefetcht2 [5] - /* prefetcht1's address is the memref count until a signal */ - prefetcht1 [3] + /* prefetcht2's address is the instr count until a signal not counting + * the preempted instr. + */ + prefetcht2 [4] + /* prefetcht1's address is the memref count until a signal not counting + * the preempted memref. + */ + prefetcht1 [1] mov REG_XSI, HEX(42) mov REG_XDI, REG_XSP push REG_XAX diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index cd6fa455a4c..7908ba47904 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1816,6 +1816,59 @@ raw2trace_t::analyze_elidable_addresses(raw2trace_thread_data_t *tdata, uint64 m return true; } +bool +raw2trace_t::preempted_by_kernel_event(raw2trace_thread_data_t *tdata, uint64_t cur_pc, + uint64_t cur_offs) +{ + bool is_preempted = false; + std::deque pre_read_buffer; + + const offline_entry_t *next_entry = get_next_entry(tdata); + while (next_entry != NULL) { + pre_read_buffer.push_front(*next_entry); + if (next_entry->addr.type == OFFLINE_TYPE_MEMREF || + next_entry->addr.type == OFFLINE_TYPE_MEMREF_HIGH) { + next_entry = get_next_entry(tdata); + continue; + } + + if (next_entry->extended.type != OFFLINE_TYPE_EXTENDED) { + break; + } + if (next_entry->extended.ext == OFFLINE_EXT_TYPE_MEMINFO) { + next_entry = get_next_entry(tdata); + continue; + } + if (next_entry->extended.ext == OFFLINE_EXT_TYPE_MARKER && + next_entry->extended.valueB == TRACE_MARKER_TYPE_KERNEL_EVENT) { + uintptr_t marker_val = 0; + if (!get_marker_value(tdata, &next_entry, &marker_val)) { + return false; + } + // A signal/exception marker in the next entry could be at any point + // among non-memref instrs, or it could be after this bb. + // We check the stored PC. + int version = get_version(tdata); + if (version < OFFLINE_FILE_VERSION_KERNEL_INT_PC) { + // We have only the offs, so we can't handle differing modules for + // the source and target for legacy traces. + if (marker_val == cur_offs) + is_preempted = true; + } else { + if (marker_val == cur_pc) + is_preempted = true; + } + } + break; + } + // Move the entries back to the pre_read queues. + while (!pre_read_buffer.empty()) { + tdata->pre_read.push_front(pre_read_buffer.front()); + pre_read_buffer.pop_front(); + } + return is_preempted; +} + bool raw2trace_t::process_memref(raw2trace_thread_data_t *tdata, trace_entry_t **buf_in, const instr_summary_t *instr, @@ -1825,12 +1878,13 @@ raw2trace_t::process_memref(raw2trace_thread_data_t *tdata, trace_entry_t **buf_ DR_PARAM_OUT bool *reached_end_of_memrefs, DR_PARAM_OUT bool *interrupted) { - if (!append_memref(tdata, buf_in, instr, memref, write, reg_vals, - reached_end_of_memrefs)) - return false; - return handle_kernel_interrupt_and_markers(tdata, buf_in, cur_pc, cur_offs, - instr->length(), instrs_are_separate, - interrupted); + if (*interrupted) { + // Flush the memref entry. + get_next_entry(tdata); + return true; + } + return append_memref(tdata, buf_in, instr, memref, write, reg_vals, + reached_end_of_memrefs); } bool @@ -2023,38 +2077,44 @@ raw2trace_t::append_bb_entries(raw2trace_thread_data_t *tdata, } } } - if (!skip_icache && record_encoding_emitted(tdata, decode_pc)) { - if (!append_encoding(tdata, decode_pc, instr->length(), buf, buf_start)) - return false; - } - - // XXX i#1729: make bundles via lazy accum until hit memref/end, if - // we don't need encodings. - buf->type = instr->type(); - if (buf->type == TRACE_TYPE_INSTR_MAYBE_FETCH) { - // We want it to look like the original rep string, with just one instr - // fetch for the whole loop, instead of the drutil-expanded loop. - // We fix up the maybe-fetch here so our offline file doesn't have to - // rely on our own reader. - if (!was_prev_instr_rep_string(tdata)) { - set_prev_instr_rep_string(tdata, true); - buf->type = TRACE_TYPE_INSTR; - } else { - log(3, "Skipping instr fetch for " PFX "\n", (ptr_uint_t)decode_pc); - // We still include the instr to make it easier for core simulators - // (i#2051). - buf->type = TRACE_TYPE_INSTR_NO_FETCH; + // i#7050: remove instructions which were preempted by a kernel event. + // The trace is recording instruction retirement, premmpted instructions + // and the corresponding memrefs are removed. + bool preempted = preempted_by_kernel_event(tdata, cur_pc, cur_offs); + if (!preempted) { + if (!skip_icache && record_encoding_emitted(tdata, decode_pc)) { + if (!append_encoding(tdata, decode_pc, instr->length(), buf, buf_start)) + return false; } - } else - set_prev_instr_rep_string(tdata, false); - if (instr->is_syscall()) - set_last_pc_if_syscall(tdata, orig_pc); - else - set_last_pc_if_syscall(tdata, 0); - buf->size = (ushort)(skip_icache ? 0 : instr->length()); - buf->addr = (addr_t)orig_pc; - ++buf; - log(4, "Appended instr fetch for original %p\n", orig_pc); + + // XXX i#1729: make bundles via lazy accum until hit memref/end, if + // we don't need encodings. + buf->type = instr->type(); + if (buf->type == TRACE_TYPE_INSTR_MAYBE_FETCH) { + // We want it to look like the original rep string, with just one instr + // fetch for the whole loop, instead of the drutil-expanded loop. + // We fix up the maybe-fetch here so our offline file doesn't have to + // rely on our own reader. + if (!was_prev_instr_rep_string(tdata)) { + set_prev_instr_rep_string(tdata, true); + buf->type = TRACE_TYPE_INSTR; + } else { + log(3, "Skipping instr fetch for " PFX "\n", (ptr_uint_t)decode_pc); + // We still include the instr to make it easier for core simulators + // (i#2051). + buf->type = TRACE_TYPE_INSTR_NO_FETCH; + } + } else + set_prev_instr_rep_string(tdata, false); + if (instr->is_syscall()) + set_last_pc_if_syscall(tdata, orig_pc); + else + set_last_pc_if_syscall(tdata, 0); + buf->size = (ushort)(skip_icache ? 0 : instr->length()); + buf->addr = (addr_t)orig_pc; + ++buf; + log(4, "Appended instr fetch for original %p\n", orig_pc); + } decode_pc = pc; if (tdata->rseq_past_end_) { // In case handle_kernel_interrupt_and_markers() calls @@ -2064,20 +2124,7 @@ raw2trace_t::append_bb_entries(raw2trace_thread_data_t *tdata, return false; buf = buf_start; } - // Check for a signal *after* the instruction. The trace is recording - // instruction *fetches*, not instruction retirement, and we want to - // include a faulting instruction before its raised signal. - bool interrupted = false; - if (!handle_kernel_interrupt_and_markers(tdata, &buf, cur_pc, cur_offs, - instr->length(), instrs_are_separate, - &interrupted)) - return false; - if (interrupted) { - log(3, "Stopping bb at kernel interruption point %p\n", cur_pc); - } - // We need to interleave instrs with memrefs. - // There is no following memref for (instrs_are_separate && !skip_icache). - if (!interrupted && (!instrs_are_separate || skip_icache) && + if (!preempted && (!instrs_are_separate || skip_icache) && // Rule out OP_lea. (instr->reads_memory() || instr->writes_memory()) && // No following memref for instruction-only trace type. @@ -2104,32 +2151,52 @@ raw2trace_t::append_bb_entries(raw2trace_thread_data_t *tdata, // dest/src of the original scatter/gather instr for all. is_scatter ? instr->mem_dest_at(0) : instr->mem_src_at(0), is_scatter, reg_vals, cur_pc, cur_offs, instrs_are_separate, - &reached_end_of_memrefs, &interrupted)) + &reached_end_of_memrefs, &preempted)) return false; - if (interrupted) + if (preempted) break; } } else { for (uint j = 0; j < instr->num_mem_srcs(); j++) { if (!process_memref(tdata, &buf, instr, instr->mem_src_at(j), false, reg_vals, cur_pc, cur_offs, instrs_are_separate, - nullptr, &interrupted)) + nullptr, &preempted)) return false; - if (interrupted) + if (preempted) break; } // We break before subsequent memrefs on an interrupt, though with // today's tracer that will never happen (i#3958). - for (uint j = 0; !interrupted && j < instr->num_mem_dests(); j++) { + for (uint j = 0; !preempted && j < instr->num_mem_dests(); j++) { if (!process_memref(tdata, &buf, instr, instr->mem_dest_at(j), true, reg_vals, cur_pc, cur_offs, instrs_are_separate, - nullptr, &interrupted)) + nullptr, &preempted)) return false; - if (interrupted) + if (preempted) + break; + } + } + } else { + // Flush memref entries. + if (preempted) { + const offline_entry_t *next_entry = get_next_entry(tdata); + while (next_entry != nullptr) { + if (next_entry->addr.type != OFFLINE_TYPE_MEMREF && + next_entry->addr.type != OFFLINE_TYPE_MEMREF_HIGH) break; + next_entry = get_next_entry(tdata); } + if (next_entry != nullptr) + tdata->pre_read.push_front(*next_entry); } } + // Check for a signal *after* the instruction. + bool interrupted = false; + if (!handle_kernel_interrupt_and_markers(tdata, &buf, cur_pc, cur_offs, + instr->length(), instrs_are_separate, + &interrupted)) + return false; + cur_pc += instr->length(); cur_offs += instr->length(); if ((size_t)(buf - buf_start) >= WRITE_BUFFER_SIZE) { diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index a2f1b83b168..fa5f18f7871 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1488,6 +1488,13 @@ class raw2trace_t { analyze_elidable_addresses(raw2trace_thread_data_t *tdata, uint64 modidx, uint64 modoffs, app_pc start_pc, uint instr_count); + // Returns true if a kernel interrupt happened at cur_pc. The check skips + // memrefs to search for a kernel event marker. If the kernel event marker has + // the value of cur_pc, it returns true. + bool + preempted_by_kernel_event(raw2trace_thread_data_t *tdata, uint64_t cur_pc, + uint64_t cur_offs); + bool process_memref(raw2trace_thread_data_t *tdata, trace_entry_t **buf_in, const instr_summary_t *instr, instr_summary_t::memref_summary_t memref,