From 71f7490e5bd244dc71ea439d0520e4a033235974 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Tue, 21 Nov 2023 18:25:02 -0500 Subject: [PATCH 1/5] i#6471 sched idle: Add idle time Adds a new STATUS_IDLE return code, and a corresponding TRACE_MARKER_TYPE_CORE_IDLE record. Changes the scheduler behavior to no longer return STATUS_EOF for an output when the ready queue is empty: instead STATUS_IDLE is returned until every single input is at EOF. This results in a more realistic schedule where other cores can pick up work later rather than disappearing from the system. Augments the schedule_stats tool to count idle replies and compute a % cpu usage metric. Adds a unit test for counting idles. Augments the scheduler_launcher to also compute %cpu usage. Updates all the scheduler tests for the new change. Adding idle time due to blocking syscalls will be done separately. Issue: #6471 --- clients/drcachesim/analyzer.cpp | 33 +++++++++- clients/drcachesim/analyzer.h | 3 + clients/drcachesim/common/options.cpp | 4 +- clients/drcachesim/common/trace_entry.h | 11 ++++ clients/drcachesim/reader/reader.h | 3 +- clients/drcachesim/scheduler/scheduler.cpp | 61 ++++++++++++++----- clients/drcachesim/scheduler/scheduler.h | 27 +++++++- .../tests/schedule_stats_nopreempt.templatex | 10 +++ .../drcachesim/tests/schedule_stats_test.cpp | 58 +++++++++++++++++- .../drcachesim/tests/scheduler_launcher.cpp | 31 +++++++++- .../drcachesim/tests/scheduler_unit_tests.cpp | 61 +++++++++++-------- clients/drcachesim/tools/basic_counts.cpp | 3 +- clients/drcachesim/tools/schedule_stats.cpp | 51 ++++++++++++---- clients/drcachesim/tools/schedule_stats.h | 3 + clients/drcachesim/tools/view.cpp | 1 + 15 files changed, 292 insertions(+), 68 deletions(-) diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp index 8b0452f3d91..ddd51ba291f 100644 --- a/clients/drcachesim/analyzer.cpp +++ b/clients/drcachesim/analyzer.cpp @@ -133,6 +133,17 @@ analyzer_t::create_wait_marker() return record; } +template <> +memref_t +analyzer_t::create_idle_marker() +{ + memref_t record = {}; // Zero the other fields. + record.marker.type = TRACE_TYPE_MARKER; + record.marker.marker_type = TRACE_MARKER_TYPE_CORE_IDLE; + record.marker.tid = INVALID_THREAD_ID; + return record; +} + /****************************************************************************** * Specializations for analyzer_tmpl_t, aka record_analyzer_t. */ @@ -182,6 +193,17 @@ record_analyzer_t::create_wait_marker() return record; } +template <> +trace_entry_t +record_analyzer_t::create_idle_marker() +{ + trace_entry_t record; + record.type = TRACE_TYPE_MARKER; + record.size = TRACE_MARKER_TYPE_CORE_IDLE; + record.addr = 0; // Marker value has no meaning so we zero it. + return record; +} + /******************************************************************** * Other analyzer_tmpl_t routines that do not need to be specialized. */ @@ -537,6 +559,11 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w // We synthesize a record here. If we wanted this to count toward output // stream ordinals we would need to add a scheduler API to inject it. record = create_wait_marker(); + } else if (status == sched_type_t::STATUS_IDLE) { + // We let tools know about idle time so they can analyze cpu usage. + // We synthesize a record here. If we wanted this to count toward output + // stream ordinals we would need to add a scheduler API to inject it. + record = create_idle_marker(); } else if (status != sched_type_t::STATUS_OK) { if (status == sched_type_t::STATUS_REGION_INVALID) { worker->error = @@ -596,8 +623,10 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w } } if (shard_type_ == SHARD_BY_CORE) { - if (!process_shard_exit(worker, worker->index)) - return; + if (worker->shard_data.find(worker->index) != worker->shard_data.end()) { + if (!process_shard_exit(worker, worker->index)) + return; + } } for (const auto &keyval : worker->shard_data) { if (!keyval.second.exited) { diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h index 35a1df83a56..8ebc10547b2 100644 --- a/clients/drcachesim/analyzer.h +++ b/clients/drcachesim/analyzer.h @@ -252,6 +252,9 @@ template class analyzer_tmpl_t { RecordType create_wait_marker(); + RecordType + create_idle_marker(); + // Invoked when the given interval finishes during serial or parallel // analysis of the trace. For parallel analysis, the shard_id // parameter should be set to the shard_id for which the interval diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 71d7c5ac2f1..53fceb2c03d 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -809,7 +809,7 @@ droption_t op_core_sharded( "software threads. This option instead schedules those threads onto virtual cores " "and analyzes each core in parallel. Thus, each shard consists of pieces from " "many software threads. How the scheduling is performed is controlled by a set " - "of options with the prefix \"sched_\" along with -num_cores."); + "of options with the prefix \"sched_\" along with -cores."); droption_t op_core_serial( DROPTION_SCOPE_ALL, "core_serial", false, "Analyze per-core in serial.", @@ -817,7 +817,7 @@ droption_t op_core_serial( "However, the resulting schedule is acted upon by a single analysis thread" "which walks the N cores in lockstep in round robin fashion. " "How the scheduling is performed is controlled by a set " - "of options with the prefix \"sched_\" along with -num_cores."); + "of options with the prefix \"sched_\" along with -cores."); droption_t op_sched_quantum(DROPTION_SCOPE_ALL, "sched_quantum", 1 * 1000 * 1000, diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index c885ed1a72a..9cffd946533 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -583,6 +583,17 @@ typedef enum { */ TRACE_MARKER_TYPE_CORE_WAIT, + /** + * This marker is used for core-sharded analyses to indicate that the current + * core has no available inputs to run (all inputs are on other cores or are + * blocked waiting for kernel resources). A new marker is emitted each + * time the tool analysis framework requests a new record from the scheduler and + * is given a wait status. There are no units of time here but each repetition + * is roughly the time where a regular record could have been read and passed + * along. + */ + TRACE_MARKER_TYPE_CORE_IDLE, + // ... // These values are reserved for future built-in marker types. // ... diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h index ad2b64b7f82..98dcae855c2 100644 --- a/clients/drcachesim/reader/reader.h +++ b/clients/drcachesim/reader/reader.h @@ -193,7 +193,8 @@ class reader_t : public std::iterator, is_record_synthetic() const override { if (cur_ref_.marker.type == TRACE_TYPE_MARKER && - cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) { + (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT || + cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CORE_IDLE)) { // These are synthetic records not part of the input and not // counting toward ordinals. return true; diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 14fda834445..8786d8d5e0b 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -631,6 +631,8 @@ scheduler_tmpl_t::init( } } } + VPRINT(this, 1, "%zu inputs\n", inputs_.size()); + live_input_count_.store(inputs_.size(), std::memory_order_release); return set_initial_schedule(workload2inputs); } @@ -1313,7 +1315,7 @@ scheduler_tmpl_t::advance_region_of_interest( input.cur_region); if (input.cur_region >= static_cast(input.regions_of_interest.size())) { if (input.at_eof) - return sched_type_t::STATUS_EOF; + return eof_or_idle(output); else { // We let the user know we're done. if (options_.schedule_record_ostream != nullptr) { @@ -1329,7 +1331,7 @@ scheduler_tmpl_t::advance_region_of_interest( return status; } input.queue.push_back(create_thread_exit(input.tid)); - input.at_eof = true; + mark_input_eof(input); return sched_type_t::STATUS_SKIPPED; } } @@ -1408,7 +1410,7 @@ scheduler_tmpl_t::skip_instructions(output_ordinal_t out if (*input.reader == *input.reader_end) { // Raise error because the input region is out of bounds. VPRINT(this, 2, "skip_instructions: input=%d skip out of bounds\n", input.index); - input.at_eof = true; + mark_input_eof(input); return sched_type_t::STATUS_REGION_INVALID; } input.in_cur_region = true; @@ -1645,7 +1647,7 @@ scheduler_tmpl_t::pick_next_input_as_previously( { if (outputs_[output].record_index + 1 >= static_cast(outputs_[output].record.size())) - return sched_type_t::STATUS_EOF; + return eof_or_idle(output); const schedule_record_t &segment = outputs_[output].record[outputs_[output].record_index + 1]; index = segment.key.input; @@ -1719,7 +1721,7 @@ scheduler_tmpl_t::pick_next_input_as_previously( // queued candidate record, if any. clear_input_queue(inputs_[index]); inputs_[index].queue.push_back(create_thread_exit(inputs_[index].tid)); - inputs_[index].at_eof = true; + mark_input_eof(inputs_[index]); VPRINT(this, 2, "early end for input %d\n", index); // We're done with this entry but we need the queued record to be read, // so we do not move past the entry. @@ -1773,7 +1775,11 @@ scheduler_tmpl_t::pick_next_input(output_ordinal_t outpu const schedule_record_t &segment = outputs_[output].record[outputs_[output].record_index]; int input = segment.key.input; - VPRINT(this, res == sched_type_t::STATUS_WAIT ? 3 : 2, + VPRINT(this, + (res == sched_type_t::STATUS_IDLE || + res == sched_type_t::STATUS_WAIT) + ? 3 + : 2, "next_record[%d]: replay segment in=%d (@%" PRId64 ") type=%d start=%" PRId64 " end=%" PRId64 "\n", output, input, @@ -1819,10 +1825,10 @@ scheduler_tmpl_t::pick_next_input(output_ordinal_t outpu // We found a direct switch target above. } else if (ready_queue_empty()) { if (prev_index == INVALID_INPUT_ORDINAL) - return sched_type_t::STATUS_EOF; + return eof_or_idle(output); std::lock_guard lock(*inputs_[prev_index].lock); if (inputs_[prev_index].at_eof) - return sched_type_t::STATUS_EOF; + return eof_or_idle(output); else index = prev_index; // Go back to prior. } else { @@ -1836,7 +1842,7 @@ scheduler_tmpl_t::pick_next_input(output_ordinal_t outpu } input_info_t *queue_next = pop_from_ready_queue(output); if (queue_next == nullptr) - return sched_type_t::STATUS_EOF; + return eof_or_idle(output); index = queue_next->index; } } else if (options_.deps == DEPENDENCY_TIMESTAMPS) { @@ -1850,7 +1856,7 @@ scheduler_tmpl_t::pick_next_input(output_ordinal_t outpu } } if (index < 0) - return sched_type_t::STATUS_EOF; + return eof_or_idle(output); VPRINT(this, 2, "next_record[%d]: advancing to timestamp %" PRIu64 " == input #%d\n", @@ -1883,14 +1889,15 @@ scheduler_tmpl_t::pick_next_input(output_ordinal_t outpu std::lock_guard lock(*inputs_[index].lock); if (inputs_[index].at_eof || *inputs_[index].reader == *inputs_[index].reader_end) { - VPRINT(this, 2, "next_record[%d]: local index %d == input #%d at eof\n", - output, outputs_[output].input_indices_index, index); + VPRINT(this, 2, "next_record[%d]: input #%d at eof\n", output, index); if (options_.schedule_record_ostream != nullptr && prev_index != INVALID_INPUT_ORDINAL) close_schedule_segment(output, inputs_[prev_index]); - inputs_[index].at_eof = true; + if (!inputs_[index].at_eof) + mark_input_eof(inputs_[index]); index = INVALID_INPUT_ORDINAL; // Loop and pick next thread. + prev_index = INVALID_INPUT_ORDINAL; continue; } break; @@ -1911,7 +1918,7 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, // check for quantum end. outputs_[output].cur_time = cur_time; // Invalid values are checked below. if (!outputs_[output].active) - return sched_type_t::STATUS_WAIT; + return sched_type_t::STATUS_IDLE; if (outputs_[output].waiting) { VPRINT(this, 5, "next_record[%d]: need new input (cur=waiting)\n", output); sched_type_t::stream_status_t res = pick_next_input(output, true); @@ -1922,7 +1929,7 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, if (outputs_[output].cur_input < 0) { // This happens with more outputs than inputs. For non-empty outputs we // require cur_input to be set to >=0 during init(). - return sched_type_t::STATUS_EOF; + return eof_or_idle(output); } input = &inputs_[outputs_[output].cur_input]; auto lock = std::unique_lock(*input->lock); @@ -1970,6 +1977,8 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, input->needs_advance = true; } if (input->at_eof || *input->reader == *input->reader_end) { + if (!input->at_eof) + mark_input_eof(*input); lock.unlock(); VPRINT(this, 5, "next_record[%d]: need new input (cur=%d eof)\n", output, input->index); @@ -1998,6 +2007,7 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, if (outputs_[output].record_index >= static_cast(outputs_[output].record.size())) { // We're on the last record. + VPRINT(this, 4, "next_record[%d]: on last record\n", output); } else if (outputs_[output].record[outputs_[output].record_index].type == schedule_record_t::SKIP) { VPRINT(this, 5, "next_record[%d]: need new input after skip\n", output); @@ -2257,6 +2267,27 @@ scheduler_tmpl_t::stop_speculation(output_ordinal_t outp return sched_type_t::STATUS_OK; } +template +void +scheduler_tmpl_t::mark_input_eof(input_info_t &input) +{ + input.at_eof = true; + assert(live_input_count_.load(std::memory_order_acquire) > 0); + live_input_count_.fetch_add(-1, std::memory_order_release); +} + +template +typename scheduler_tmpl_t::stream_status_t +scheduler_tmpl_t::eof_or_idle(output_ordinal_t output) +{ + if (live_input_count_.load(std::memory_order_acquire) == 0) { + return sched_type_t::STATUS_EOF; + } else { + outputs_[output].waiting = true; + return sched_type_t::STATUS_IDLE; + } +} + template typename scheduler_tmpl_t::stream_status_t scheduler_tmpl_t::set_output_active(output_ordinal_t output, diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 046c57e3085..e9f999117bb 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -45,6 +45,7 @@ #include #include +#include #include #include #include @@ -112,7 +113,9 @@ template class scheduler_tmpl_t { * (#DEPENDENCY_TIMESTAMPS) to avoid one stream getting ahead of another. For * replaying a schedule as it was traced with #MAP_TO_RECORDED_OUTPUT this can * indicate an idle period on a core where the traced workload was not currently - * scheduled. + * scheduled, but generally #STATUS_WAIT should be treated as artificial. + * Simulators are suggested to not advance simulated time for #STATUS_WAIT while + * they should advance time for #STATUS_IDLE. */ STATUS_WAIT, STATUS_INVALID, /**< Error condition. */ @@ -120,6 +123,15 @@ template class scheduler_tmpl_t { STATUS_NOT_IMPLEMENTED, /**< Feature not implemented. */ STATUS_SKIPPED, /**< Used for internal scheduler purposes. */ STATUS_RECORD_FAILED, /**< Failed to record schedule for future replay. */ + /** + * This code indicates that all inputs are blocked waiting for kernel resources + * (such as i/o). This is similar to #STATUS_WAIT, but #STATUS_WAIT indicates an + * artificial pause due to imposing the original ordering while #STATUS_IDLE + * indicates actual idle time in the application. Simulators are suggested + * to not advance simulated time for #STATUS_WAIT while they should advance + * time for #STATUS_IDLE. + */ + STATUS_IDLE, }; /** Identifies an input stream by its index. */ @@ -629,7 +641,7 @@ template class scheduler_tmpl_t { /** * Disables or re-enables this output stream. If "active" is false, this * stream becomes inactive and its currently assigned input is moved to the - * ready queue to be scheduled on other outputs. The #STATUS_WAIT code is + * ready queue to be scheduled on other outputs. The #STATUS_IDLE code is * returned to next_record() for inactive streams. If "active" is true, * this stream becomes active again. * This is only supported for #MAP_TO_ANY_OUTPUT. @@ -1076,7 +1088,7 @@ template class scheduler_tmpl_t { // sched_lock_. std::vector record; int record_index = 0; - bool waiting = false; + bool waiting = false; // Waiting or idling. bool active = true; // Used for time-based quanta. uint64_t cur_time = 0; @@ -1259,6 +1271,13 @@ template class scheduler_tmpl_t { stream_status_t set_output_active(output_ordinal_t output, bool active); + // Caller must hold the input's lock. + void + mark_input_eof(input_info_t &input); + + stream_status_t + eof_or_idle(output_ordinal_t output); + /////////////////////////////////////////////////////////////////////////// // Support for ready queues for who to schedule next: @@ -1325,6 +1344,8 @@ template class scheduler_tmpl_t { flexible_queue_t ready_priority_; // Global ready queue counter used to provide FIFO for same-priority inputs. uint64_t ready_counter_ = 0; + // Count of inputs not yet at eof. + std::atomic live_input_count_; // Map from workload,tid pair to input. struct workload_tid_t { workload_tid_t(int wl, memref_tid_t tid) diff --git a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex index 9f1ef413e23..f96422fd667 100644 --- a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex +++ b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex @@ -14,6 +14,8 @@ Total counts: 2 maybe-blocking system calls 0 direct switch requests 0 waits + 0 idles + 100.00% cpu busy Core #0 counts: . threads *[0-9]* instructions @@ -28,6 +30,8 @@ Core #0 counts: . maybe-blocking system calls 0 direct switch requests 0 waits + 0 idles + 100.00% cpu busy Core #1 counts: . threads *[0-9]* instructions @@ -42,6 +46,8 @@ Core #1 counts: . maybe-blocking system calls 0 direct switch requests 0 waits + 0 idles + 100.00% cpu busy Core #2 counts: . threads *[0-9]* instructions @@ -56,6 +62,8 @@ Core #2 counts: . maybe-blocking system calls 0 direct switch requests 0 waits + 0 idles + 100.00% cpu busy Core #3 counts: . threads *[0-9]* instructions @@ -70,6 +78,8 @@ Core #3 counts: . maybe-blocking system calls 0 direct switch requests 0 waits + 0 idles + 100.00% cpu busy Core #0 schedule: [A-H,]* Core #1 schedule: [A-H,]* Core #2 schedule: [A-H,]* diff --git a/clients/drcachesim/tests/schedule_stats_test.cpp b/clients/drcachesim/tests/schedule_stats_test.cpp index 897ffe46fbf..48177635375 100644 --- a/clients/drcachesim/tests/schedule_stats_test.cpp +++ b/clients/drcachesim/tests/schedule_stats_test.cpp @@ -54,6 +54,7 @@ namespace drmemtrace { using ::dynamorio::drmemtrace::default_memtrace_stream_t; using ::dynamorio::drmemtrace::memref_t; using ::dynamorio::drmemtrace::memref_tid_t; +using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_CORE_IDLE; using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_CORE_WAIT; using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH; using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL; @@ -198,10 +199,65 @@ test_basic_stats() return true; } +static bool +test_idle() +{ + static constexpr int64_t TID_A = 42; + static constexpr int64_t TID_B = 142; + static constexpr int64_t TID_C = 242; + std::unordered_map tid2ord; + tid2ord[TID_A] = 0; + tid2ord[TID_B] = 1; + tid2ord[TID_C] = 2; + std::vector> memrefs = { + { + gen_instr(TID_B), + gen_instr(TID_B), + gen_marker(TID_B, TRACE_MARKER_TYPE_CORE_IDLE, 0), + gen_marker(TID_B, TRACE_MARKER_TYPE_CORE_IDLE, 0), + gen_marker(TID_B, TRACE_MARKER_TYPE_CORE_IDLE, 0), + gen_instr(TID_B), + gen_instr(TID_B), + gen_instr(TID_B), + }, + { + gen_instr(TID_C), + // Involuntary switch. + gen_instr(TID_A), + // Involuntary switch. + gen_instr(TID_C), + gen_marker(TID_C, TRACE_MARKER_TYPE_CORE_IDLE, 0), + gen_marker(TID_C, TRACE_MARKER_TYPE_CORE_IDLE, 0), + gen_marker(TID_C, TRACE_MARKER_TYPE_CORE_IDLE, 0), + gen_instr(TID_C), + gen_instr(TID_C), + // Wait. + gen_marker(TID_C, TRACE_MARKER_TYPE_CORE_WAIT, 0), + gen_marker(TID_C, TRACE_MARKER_TYPE_CORE_WAIT, 0), + gen_marker(TID_C, TRACE_MARKER_TYPE_CORE_WAIT, 0), + // Involuntary switch. + gen_instr(TID_A), + gen_instr(TID_A), + gen_instr(TID_A), + }, + }; + auto result = run_schedule_stats(memrefs, tid2ord); + assert(result.instrs == 13); + assert(result.total_switches == 3); + assert(result.voluntary_switches == 0); + assert(result.direct_switches == 0); + assert(result.syscalls == 0); + assert(result.maybe_blocking_syscalls == 0); + assert(result.direct_switch_requests == 0); + assert(result.waits == 3); + assert(result.idles == 6); + return true; +} + int test_main(int argc, const char *argv[]) { - if (test_basic_stats()) { + if (test_basic_stats() && test_idle()) { std::cerr << "schedule_stats_test passed\n"; return 0; } diff --git a/clients/drcachesim/tests/scheduler_launcher.cpp b/clients/drcachesim/tests/scheduler_launcher.cpp index bfebb7e5861..814a0b703fa 100644 --- a/clients/drcachesim/tests/scheduler_launcher.cpp +++ b/clients/drcachesim/tests/scheduler_launcher.cpp @@ -155,6 +155,11 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch memref_t record; uint64_t micros = op_sched_time.get_value() ? get_current_microseconds() : 0; uint64_t cur_segment_instrs = 0; + // XXX: Should we remove this tool as the schedule_stats analysis tool has + // superceded it? We're basically duplicating new features in both. + bool prev_was_wait = false, prev_was_idle = false; + // Measure cpu usage by counting each next_record() as one cycle. + uint64_t cycles_total = 0, cycles_busy = 0; // Thread ids can be duplicated, so use the input ordinals to distinguish. scheduler_t::input_ordinal_t prev_input = scheduler_t::INVALID_INPUT_ORDINAL; for (scheduler_t::stream_status_t status = stream->next_record(record, micros); @@ -162,13 +167,28 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch status = stream->next_record(record, micros)) { if (op_sched_time.get_value()) micros = get_current_microseconds(); + ++cycles_total; if (status == scheduler_t::STATUS_WAIT) { - thread_sequence += '-'; + if (!prev_was_wait || cur_segment_instrs == op_print_every.get_value()) + thread_sequence += '-'; + ++cur_segment_instrs; + if (cur_segment_instrs == op_print_every.get_value()) + cur_segment_instrs = 0; + prev_was_wait = true; std::this_thread::yield(); continue; - } - if (status != scheduler_t::STATUS_OK) + } else if (status == scheduler_t::STATUS_IDLE) { + if (!prev_was_idle || cur_segment_instrs == op_print_every.get_value()) + thread_sequence += '_'; + ++cur_segment_instrs; + if (cur_segment_instrs == op_print_every.get_value()) + cur_segment_instrs = 0; + prev_was_idle = true; + std::this_thread::yield(); + continue; + } else if (status != scheduler_t::STATUS_OK) FATAL_ERROR("scheduler failed to advance: %d", status); + ++cycles_busy; if (op_verbose.get_value() >= 4) { std::ostringstream line; line << "Core #" << std::setw(2) << ordinal << " @" << std::setw(9) @@ -249,6 +269,11 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch } #endif } + float usage = 0; + if (cycles_total > 0) + usage = 100.f * cycles_busy / static_cast(cycles_total); + std::cerr << "Core #" << std::setw(2) << ordinal << " usage: " << std::setw(9) + << usage << "%\n"; } } // namespace diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 117e6f73e98..2e111ccd20c 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -842,6 +842,10 @@ run_lockstep_simulation(scheduler_t &scheduler, int num_outputs, memref_tid_t ti sched_as_string[i] += '-'; continue; } + if (status == scheduler_t::STATUS_IDLE) { + sched_as_string[i] += '_'; + continue; + } assert(status == scheduler_t::STATUS_OK); if (type_is_instr(memref.instr.type)) { sched_as_string[i] += @@ -895,7 +899,7 @@ test_synthetic() // core alternates; with an odd number the 2nd core finishes early. // The dots are thread exits. static const char *const CORE0_SCHED_STRING = "AAACCCEEEGGGBBBDDDFFFAAA.CCC.EEE.GGG."; - static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBB.DDD.FFF."; + static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBB.DDD.FFF.____"; { // Test instruction quanta. std::vector sched_inputs; @@ -1048,7 +1052,7 @@ test_synthetic_time_quanta() check_next(cpu0, ++time, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); check_next(cpu0, ++time, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_INSTR); check_next(cpu0, time, scheduler_t::STATUS_OK, TID_C, TRACE_TYPE_THREAD_EXIT); - check_next(cpu0, time, scheduler_t::STATUS_EOF); + check_next(cpu0, time, scheduler_t::STATUS_IDLE); check_next(cpu1, time, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_THREAD_EXIT); check_next(cpu1, time, scheduler_t::STATUS_EOF); if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) @@ -1079,7 +1083,7 @@ test_synthetic_time_quanta() for (int i = 0; i < NUM_OUTPUTS; i++) { std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; } - assert(sched_as_string[0] == "..A..CCC."); + assert(sched_as_string[0] == "..A..CCC._"); assert(sched_as_string[1] == "..BAA.BB."); } #endif @@ -1158,8 +1162,9 @@ test_synthetic_with_timestamps() // workloads we should start with {C,F,I,J} and then move on to {B,E,H} and finish // with {A,D,G}. We should interleave within each group -- except once we reach J // we should completely finish it. - assert(sched_as_string[0] == - ".CC.C.II.IC.CC.F.FF.I.II.FF.F..BB.B.HH.HE.EE.BB.B.HH.H..DD.DA.AA.G.GG.DD.D."); + assert( + sched_as_string[0] == + ".CC.C.II.IC.CC.F.FF.I.II.FF.F..BB.B.HH.HE.EE.BB.B.HH.H..DD.DA.AA.G.GG.DD.D._"); assert(sched_as_string[1] == ".FF.F.JJ.JJ.JJ.JJ.J.CC.C.II.I..EE.EB.BB.H.HH.EE.E..AA.A.GG.GD.DD.AA.A.GG.G."); } @@ -1241,8 +1246,9 @@ test_synthetic_with_priorities() // See the test_synthetic_with_timestamps() test which has our base sequence. // We've elevated B, E, and H to higher priorities so they go // first. J remains uninterrupted due to lower timestamps. - assert(sched_as_string[0] == - ".BB.B.HH.HE.EE.BB.B.HH.H..FF.F.JJ.JJ.JJ.JJ.J.CC.C.II.I..DD.DA.AA.G.GG.DD.D."); + assert( + sched_as_string[0] == + ".BB.B.HH.HE.EE.BB.B.HH.H..FF.F.JJ.JJ.JJ.JJ.J.CC.C.II.I..DD.DA.AA.G.GG.DD.D._"); assert(sched_as_string[1] == ".EE.EB.BB.H.HH.EE.E..CC.C.II.IC.CC.F.FF.I.II.FF.F..AA.A.GG.GD.DD.AA.A.GG.G."); } @@ -1308,11 +1314,11 @@ test_synthetic_with_bindings() std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; } // We have {A,B,C} on {2,4}, {D,E,F} on {0,1}, and {G,H,I} on {1,2,3}: - assert(sched_as_string[0] == ".DD.D.FF.FD.DD.F.FF.DD.D.FF.F."); - assert(sched_as_string[1] == ".EE.E.HH.HE.EE.I.II.EE.E."); + assert(sched_as_string[0] == ".DD.D.FF.FD.DD.F.FF.DD.D.FF.F._"); + assert(sched_as_string[1] == ".EE.E.HH.HE.EE.I.II.EE.E.______"); assert(sched_as_string[2] == ".AA.A.CC.CG.GG.C.CC.HH.H.CC.C."); - assert(sched_as_string[3] == ".GG.G.II.IH.HH.GG.G.II.I."); - assert(sched_as_string[4] == ".BB.BA.AA.B.BB.AA.A.BB.B."); + assert(sched_as_string[3] == ".GG.G.II.IH.HH.GG.G.II.I._____"); + assert(sched_as_string[4] == ".BB.BA.AA.B.BB.AA.A.BB.B._____"); } static void @@ -1379,11 +1385,11 @@ test_synthetic_with_bindings_weighted() std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; } // We have {A,B,C} on {2,4}, {D,E,F} on {0,1}, and {G,H,I} on {1,2,3}: - assert(sched_as_string[0] == ".FF.FF.FF.FF.F..EE.EE.EE.EE.E."); - assert(sched_as_string[1] == ".II.II.II.II.I..DD.DD.DD.DD.D."); - assert(sched_as_string[2] == ".CC.CC.CC.CC.C..AA.AA.AA.AA.A."); + assert(sched_as_string[0] == ".FF.FF.FF.FF.F..EE.EE.EE.EE.E._"); + assert(sched_as_string[1] == ".II.II.II.II.I..DD.DD.DD.DD.D._"); + assert(sched_as_string[2] == ".CC.CC.CC.CC.C..AA.AA.AA.AA.A._"); assert(sched_as_string[3] == ".HH.HH.HH.HH.H..GG.GG.GG.GG.G."); - assert(sched_as_string[4] == ".BB.BB.BB.BB.B."); + assert(sched_as_string[4] == ".BB.BB.BB.BB.B._______________"); } static void @@ -1472,7 +1478,7 @@ test_synthetic_with_syscalls_multiple() // blocking syscall. assert(sched_as_string[0] == ".B..HH.H.B.H.HH..B.HH.H..B.E.B...II.I.JJ.JJ.JJ.JJ.J.CC.C.II.I..DD.DA.AA.G.GG." - "DD.D."); + "DD.D.___"); assert(sched_as_string[1] == ".EE..B..EE..B..EE..B..EE...CC.C.FF.FB..C.CC.F.FF.I.II.FF.F..AA.A.GG.GD.DD.AA." "A.GG.G."); @@ -1538,7 +1544,7 @@ test_synthetic_with_syscalls_single() std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; } assert(sched_as_string[0] == ".AA..AA.A.A.AA..A."); - assert(sched_as_string[1] == ""); + assert(sched_as_string[1] == "__________________"); } static bool @@ -1778,7 +1784,7 @@ simulate_core(scheduler_t::stream_t *stream) memref_t record; for (scheduler_t::stream_status_t status = stream->next_record(record); status != scheduler_t::STATUS_EOF; status = stream->next_record(record)) { - if (status == scheduler_t::STATUS_WAIT) { + if (status == scheduler_t::STATUS_WAIT || status == scheduler_t::STATUS_IDLE) { std::this_thread::yield(); continue; } @@ -1998,7 +2004,7 @@ test_replay() // We expect 3 letter sequences (our quantum) alternating every-other as each // core alternates; with an odd number the 2nd core finishes early. static const char *const CORE0_SCHED_STRING = "AAACCCEEEGGGBBBDDDFFFAAA.CCC.EEE.GGG."; - static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBB.DDD.FFF."; + static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBB.DDD.FFF.____"; static constexpr memref_tid_t TID_BASE = 100; std::vector inputs[NUM_INPUTS]; @@ -2090,7 +2096,7 @@ simulate_core_and_record_schedule(scheduler_t::stream_t *stream, memtrace_stream_t *prev_stream = nullptr; for (scheduler_t::stream_status_t status = stream->next_record(record); status != scheduler_t::STATUS_EOF; status = stream->next_record(record)) { - if (status == scheduler_t::STATUS_WAIT) { + if (status == scheduler_t::STATUS_WAIT || status == scheduler_t::STATUS_IDLE) { std::this_thread::yield(); continue; } @@ -2287,7 +2293,7 @@ test_replay_timestamps() // Create a record file with timestamps requiring waiting. // We cooperate with the test_scheduler_t class which constructs this schedule: - static const char *const CORE0_SCHED_STRING = ".AAA-------------------------CCC."; + static const char *const CORE0_SCHED_STRING = ".AAA-------------------------CCC.____"; static const char *const CORE1_SCHED_STRING = ".BBB.CCCCCC.DDDAAABBBDDDAAA.BBB.DDD."; std::string record_fname = "tmp_test_replay_timestamp.zip"; test_scheduler_t test_scheduler; @@ -2515,7 +2521,8 @@ test_replay_limit() memref_t memref; for (scheduler_t::stream_status_t status = stream->next_record(memref); status != scheduler_t::STATUS_EOF; status = stream->next_record(memref)) { - if (status == scheduler_t::STATUS_WAIT) { + if (status == scheduler_t::STATUS_WAIT || + status == scheduler_t::STATUS_IDLE) { std::this_thread::yield(); continue; } @@ -2696,7 +2703,7 @@ test_replay_as_traced() // Synthesize a cpu-schedule file. std::string cpu_fname = "tmp_test_cpu_as_traced.zip"; - static const char *const CORE0_SCHED_STRING = "EEE-AAA-CCCAAACCCBBB.DDD."; + static const char *const CORE0_SCHED_STRING = "EEE-AAA-CCCAAACCCBBB.DDD.___"; static const char *const CORE1_SCHED_STRING = "---EEE.BBBDDDBBBDDDAAA.CCC."; { std::vector sched0; @@ -3018,11 +3025,11 @@ test_inactive() // Make cpu1 inactive. status = stream1->set_active(false); assert(status == scheduler_t::STATUS_OK); - check_next(stream1, scheduler_t::STATUS_WAIT); + check_next(stream1, scheduler_t::STATUS_IDLE); // Test making cpu1 inactive while it's already inactive. status = stream1->set_active(false); assert(status == scheduler_t::STATUS_OK); - check_next(stream1, scheduler_t::STATUS_WAIT); + check_next(stream1, scheduler_t::STATUS_IDLE); // Advance cpu0 to its quantum end. check_next(stream0, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); // Ensure cpu0 now picks up the input that was on cpu1. @@ -3034,7 +3041,7 @@ test_inactive() // Make cpu0 inactive and cpu1 active. status = stream0->set_active(false); assert(status == scheduler_t::STATUS_OK); - check_next(stream0, scheduler_t::STATUS_WAIT); + check_next(stream0, scheduler_t::STATUS_IDLE); status = stream1->set_active(true); assert(status == scheduler_t::STATUS_OK); // Now cpu1 should finish things. @@ -3070,7 +3077,7 @@ test_inactive() for (int i = 0; i < NUM_OUTPUTS; i++) { std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; } - assert(sched_as_string[0] == "..AABBA."); + assert(sched_as_string[0] == "..AABBA._"); assert(sched_as_string[1] == "..B---B."); } #endif // HAS_ZIP diff --git a/clients/drcachesim/tools/basic_counts.cpp b/clients/drcachesim/tools/basic_counts.cpp index 620d28a02d8..0fbf02a47c5 100644 --- a/clients/drcachesim/tools/basic_counts.cpp +++ b/clients/drcachesim/tools/basic_counts.cpp @@ -169,7 +169,8 @@ basic_counts_t::parallel_shard_memref(void *shard_data, const memref_t &memref) } else if (memref.marker.marker_type == TRACE_MARKER_TYPE_KERNEL_EVENT || memref.marker.marker_type == TRACE_MARKER_TYPE_KERNEL_XFER) { ++counters->xfer_markers; - } else if (memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) { + } else if (memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT || + memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_IDLE) { // This is a synthetic record so do not increment any counts. } else { if (memref.marker.marker_type == TRACE_MARKER_TYPE_WINDOW_ID && diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index ad12644a3b5..fdc250fb4b8 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -136,9 +136,10 @@ schedule_stats_t::parallel_shard_error(void *shard_data) bool schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { - static constexpr char THREAD_LETTER_START = 'A'; - static constexpr char THREAD_SEPARATOR = ','; + static constexpr char THREAD_LETTER_INITIAL_START = 'A'; + static constexpr char THREAD_LETTER_SUBSEQUENT_START = 'a'; static constexpr char WAIT_SYMBOL = '-'; + static constexpr char IDLE_SYMBOL = '_'; per_shard_t *shard = reinterpret_cast(shard_data); if (knob_verbose_ >= 4) { std::ostringstream line; @@ -163,13 +164,29 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) { ++shard->counters.waits; if (!shard->prev_was_wait) { - shard->thread_sequence += '-'; + shard->thread_sequence += WAIT_SYMBOL; shard->cur_segment_instrs = 0; shard->prev_was_wait = true; } else { ++shard->cur_segment_instrs; if (shard->cur_segment_instrs == knob_print_every_) { shard->thread_sequence += WAIT_SYMBOL; + shard->cur_segment_instrs = 0; + } + } + return true; + } else if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_IDLE) { + ++shard->counters.idles; + if (!shard->prev_was_idle) { + shard->thread_sequence += IDLE_SYMBOL; + shard->cur_segment_instrs = 0; + shard->prev_was_idle = true; + } else { + ++shard->cur_segment_instrs; + if (shard->cur_segment_instrs == knob_print_every_) { + shard->thread_sequence += IDLE_SYMBOL; + shard->cur_segment_instrs = 0; } } return true; @@ -183,12 +200,9 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref ++shard->counters.voluntary_switches; if (shard->direct_switch_target == memref.marker.tid) ++shard->counters.direct_switches; - // A comma separating each sequence makes it a little easier to - // read, and helps distinguish a switch from two threads with the - // same %26 letter. (We could remove this though to compact it.) - shard->thread_sequence += THREAD_SEPARATOR; } - shard->thread_sequence += THREAD_LETTER_START + static_cast(input % 26); + shard->thread_sequence += + THREAD_LETTER_INITIAL_START + static_cast(input % 26); shard->cur_segment_instrs = 0; if (knob_verbose_ >= 2) { std::ostringstream line; @@ -213,7 +227,8 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref ++shard->counters.instrs; ++shard->cur_segment_instrs; if (shard->cur_segment_instrs == knob_print_every_) { - shard->thread_sequence += THREAD_LETTER_START + static_cast(input % 26); + shard->thread_sequence += + THREAD_LETTER_SUBSEQUENT_START + static_cast(input % 26); shard->cur_segment_instrs = 0; } shard->direct_switch_target = INVALID_THREAD_ID; @@ -237,6 +252,7 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) shard->saw_exit = true; shard->prev_was_wait = false; + shard->prev_was_idle = false; return true; } @@ -246,11 +262,15 @@ schedule_stats_t::print_counters(const counters_t &counters) std::cerr << std::setw(12) << counters.threads.size() << " threads\n"; std::cerr << std::setw(12) << counters.instrs << " instructions\n"; std::cerr << std::setw(12) << counters.total_switches << " total context switches\n"; - std::cerr << std::setw(12) << std::fixed << std::setprecision(7) - << (1000 * counters.total_switches / static_cast(counters.instrs)) + double cspki = 0.; + if (counters.instrs > 0) + cspki = 1000 * counters.total_switches / static_cast(counters.instrs); + std::cerr << std::setw(12) << std::fixed << std::setprecision(7) << cspki << " CSPKI (context switches per 1000 instructions)\n"; - std::cerr << std::setw(12) << std::fixed << std::setprecision(0) - << (counters.instrs / static_cast(counters.total_switches)) + double ipcs = 0.; + if (counters.total_switches > 0) + ipcs = counters.instrs / static_cast(counters.total_switches); + std::cerr << std::setw(12) << std::fixed << std::setprecision(0) << ipcs << " instructions per context switch\n"; std::cerr << std::setw(12) << std::fixed << std::setprecision(7) << counters.voluntary_switches << " voluntary context switches\n"; @@ -273,6 +293,11 @@ schedule_stats_t::print_counters(const counters_t &counters) std::cerr << std::setw(12) << counters.direct_switch_requests << " direct switch requests\n"; std::cerr << std::setw(12) << counters.waits << " waits\n"; + std::cerr << std::setw(12) << counters.idles << " idles\n"; + std::cerr << std::setw(12) << std::setprecision(2) + << 100 * + (counters.instrs / static_cast(counters.instrs + counters.idles)) + << "% cpu busy\n"; } bool diff --git a/clients/drcachesim/tools/schedule_stats.h b/clients/drcachesim/tools/schedule_stats.h index 073d754b6e8..9d15ccc0638 100644 --- a/clients/drcachesim/tools/schedule_stats.h +++ b/clients/drcachesim/tools/schedule_stats.h @@ -88,6 +88,7 @@ class schedule_stats_t : public analysis_tool_t { maybe_blocking_syscalls += rhs.maybe_blocking_syscalls; direct_switch_requests += rhs.direct_switch_requests; waits += rhs.waits; + idles += rhs.idles; for (const memref_tid_t tid : rhs.threads) { threads.insert(tid); } @@ -101,6 +102,7 @@ class schedule_stats_t : public analysis_tool_t { int64_t maybe_blocking_syscalls = 0; int64_t direct_switch_requests = 0; int64_t waits = 0; + int64_t idles = 0; std::unordered_set threads; }; counters_t @@ -121,6 +123,7 @@ class schedule_stats_t : public analysis_tool_t { std::string thread_sequence; uint64_t cur_segment_instrs = 0; bool prev_was_wait = false; + bool prev_was_idle = false; }; void diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index fcda9cb7682..2c77e04d682 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -432,6 +432,7 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) case TRACE_MARKER_TYPE_CORE_WAIT: std::cerr << "\n"; break; + case TRACE_MARKER_TYPE_CORE_IDLE: std::cerr << "\n"; break; default: std::cerr << "\n"; From 3358aee19713e51f233e26e340d9dcc4ec9b5c9b Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Wed, 22 Nov 2023 15:16:29 -0500 Subject: [PATCH 2/5] Fix Windows warning; fix nopreempt test templatex --- clients/drcachesim/scheduler/scheduler.cpp | 2 +- .../tests/schedule_stats_nopreempt.templatex | 58 +++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 8786d8d5e0b..1d58484ba64 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -632,7 +632,7 @@ scheduler_tmpl_t::init( } } VPRINT(this, 1, "%zu inputs\n", inputs_.size()); - live_input_count_.store(inputs_.size(), std::memory_order_release); + live_input_count_.store(static_cast(inputs_.size()), std::memory_order_release); return set_initial_schedule(workload2inputs); } diff --git a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex index f96422fd667..8eeada43d63 100644 --- a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex +++ b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex @@ -4,83 +4,83 @@ Total counts: 8 threads 638938 instructions 5 total context switches - 0.0078255 CSPKI \(context switches per 1000 instructions\) + 0\.0078255 CSPKI \(context switches per 1000 instructions\) 127788 instructions per context switch 5 voluntary context switches 0 direct context switches - 100.00% voluntary switches - 0.00% direct switches + 100\.00% voluntary switches + 0\.00% direct switches 161 system calls 2 maybe-blocking system calls 0 direct switch requests 0 waits - 0 idles - 100.00% cpu busy + *[0-9]* idles + *[0-9\.]*% cpu busy Core #0 counts: . threads *[0-9]* instructions . total context switches - 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + 0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\) *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches - 100.00% voluntary switches - 0.00% direct switches + 100\.00% voluntary switches + 0\.00% direct switches *[0-9]* system calls . maybe-blocking system calls 0 direct switch requests 0 waits - 0 idles - 100.00% cpu busy + *[0-9]* idles + *[0-9\.]*% cpu busy Core #1 counts: . threads *[0-9]* instructions . total context switches - 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + 0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\) *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches - 100.00% voluntary switches - 0.00% direct switches + 100\.00% voluntary switches + 0\.00% direct switches *[0-9]* system calls . maybe-blocking system calls 0 direct switch requests 0 waits - 0 idles - 100.00% cpu busy + *[0-9]* idles + *[0-9\.]*% cpu busy Core #2 counts: . threads *[0-9]* instructions . total context switches - 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + 0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\) *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches - 100.00% voluntary switches - 0.00% direct switches + 100\.00% voluntary switches + 0\.00% direct switches *[0-9]* system calls . maybe-blocking system calls 0 direct switch requests 0 waits - 0 idles - 100.00% cpu busy + *[0-9]* idles + *[0-9\.]*% cpu busy Core #3 counts: . threads *[0-9]* instructions . total context switches - 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + 0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\) *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches - 100.00% voluntary switches - 0.00% direct switches + 100\.00% voluntary switches + 0\.00% direct switches *[0-9]* system calls . maybe-blocking system calls 0 direct switch requests 0 waits - 0 idles - 100.00% cpu busy -Core #0 schedule: [A-H,]* -Core #1 schedule: [A-H,]* -Core #2 schedule: [A-H,]* -Core #3 schedule: [A-H,]* + *[0-9]* idles + *[0-9\.]*% cpu busy +Core #0 schedule: [A-Ha-h_]* +Core #1 schedule: [A-Ha-h_]* +Core #2 schedule: [A-Ha-h_]* +Core #3 schedule: [A-Ha-h_]* From 790ffd94c76c9304268d3e5ca1e074af8e90910c Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Wed, 22 Nov 2023 15:46:59 -0500 Subject: [PATCH 3/5] Do return EOF for MAP_TO_CONSISTENT_OUTPUT to avoid workers confusing analysis tools --- clients/drcachesim/analyzer.cpp | 1 + clients/drcachesim/scheduler/scheduler.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp index ddd51ba291f..c158c00cd84 100644 --- a/clients/drcachesim/analyzer.cpp +++ b/clients/drcachesim/analyzer.cpp @@ -560,6 +560,7 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w // stream ordinals we would need to add a scheduler API to inject it. record = create_wait_marker(); } else if (status == sched_type_t::STATUS_IDLE) { + assert(shard_type_ == SHARD_BY_CORE); // We let tools know about idle time so they can analyze cpu usage. // We synthesize a record here. If we wanted this to count toward output // stream ordinals we would need to add a scheduler API to inject it. diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 1d58484ba64..a4109158902 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -2280,7 +2280,8 @@ template typename scheduler_tmpl_t::stream_status_t scheduler_tmpl_t::eof_or_idle(output_ordinal_t output) { - if (live_input_count_.load(std::memory_order_acquire) == 0) { + if (options_.mapping == MAP_TO_CONSISTENT_OUTPUT || + live_input_count_.load(std::memory_order_acquire) == 0) { return sched_type_t::STATUS_EOF; } else { outputs_[output].waiting = true; From 6c06c536bb0a28ebbeb9890dfbd31beaa9857775 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Wed, 22 Nov 2023 17:18:04 -0500 Subject: [PATCH 4/5] Add check for idle/wait to analysis_unit_tests --- clients/drcachesim/tests/analysis_unit_tests.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clients/drcachesim/tests/analysis_unit_tests.cpp b/clients/drcachesim/tests/analysis_unit_tests.cpp index a4fa2646801..c580d27efbb 100644 --- a/clients/drcachesim/tests/analysis_unit_tests.cpp +++ b/clients/drcachesim/tests/analysis_unit_tests.cpp @@ -156,6 +156,10 @@ test_queries() parallel_shard_memref(void *shard_data, const memref_t &memref) override { per_shard_t *shard = reinterpret_cast(shard_data); + if (memref.marker.type == TRACE_TYPE_MARKER && + (memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT || + memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_IDLE)) + return true; // These are our testing goals: these queries. // We have one thread for each of our NUM_INPUTS workloads. assert(shard->stream->get_output_cpuid() == shard->index); From e0b56294d04c7914d1eec9aaada32cd5f875550a Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Mon, 27 Nov 2023 21:48:10 -0500 Subject: [PATCH 5/5] Review requests: more robust prev-wait/idle; named constants; improved comments --- clients/drcachesim/common/trace_entry.h | 6 ++-- clients/drcachesim/scheduler/scheduler.cpp | 5 +++ clients/drcachesim/scheduler/scheduler.h | 10 +++--- .../drcachesim/tests/scheduler_launcher.cpp | 35 ++++++++++++------- clients/drcachesim/tools/schedule_stats.cpp | 15 ++++---- 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 9cffd946533..166c5f32055 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -588,9 +588,11 @@ typedef enum { * core has no available inputs to run (all inputs are on other cores or are * blocked waiting for kernel resources). A new marker is emitted each * time the tool analysis framework requests a new record from the scheduler and - * is given a wait status. There are no units of time here but each repetition + * is given an idle status. There are no units of time here but each repetition * is roughly the time where a regular record could have been read and passed - * along. + * along. This idle marker indicates that a core actually had no work to do, + * as opposed to #TRACE_MARKER_TYPE_CORE_WAIT which is an artifact of an + * imposed re-created schedule. */ TRACE_MARKER_TYPE_CORE_IDLE, diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index a4109158902..e9494f44a80 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -1683,6 +1683,11 @@ scheduler_tmpl_t::pick_next_input_as_previously( // XXX i#5843: We may want to provide a kernel-mediated wait // feature so a multi-threaded simulator doesn't have to do a // spinning poll loop. + // XXX i#5843: For replaying a schedule as it was traced with + // MAP_TO_RECORDED_OUTPUT there may have been true idle periods during + // tracing where some other process than the traced workload was + // scheduled on a core. If we could identify those, we should return + // STATUS_IDLE rather than STATUS_WAIT. VPRINT(this, 3, "next_record[%d]: waiting for input %d instr #%" PRId64 "\n", output, index, segment.start_instruction); // Give up this input and go into a wait state. diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index e9f999117bb..bf5d1808921 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -110,12 +110,12 @@ template class scheduler_tmpl_t { * For dynamic scheduling with cross-stream dependencies, the scheduler may pause * a stream if it gets ahead of another stream it should have a dependence on. * This value is also used for schedules following the recorded timestamps - * (#DEPENDENCY_TIMESTAMPS) to avoid one stream getting ahead of another. For - * replaying a schedule as it was traced with #MAP_TO_RECORDED_OUTPUT this can - * indicate an idle period on a core where the traced workload was not currently - * scheduled, but generally #STATUS_WAIT should be treated as artificial. + * (#DEPENDENCY_TIMESTAMPS) to avoid one stream getting ahead of another. + * #STATUS_WAIT should be treated as artificial, an artifact of enforcing a + * recorded schedule on concurrent differently-timed output streams. * Simulators are suggested to not advance simulated time for #STATUS_WAIT while - * they should advance time for #STATUS_IDLE. + * they should advance time for #STATUS_IDLE as the latter indicates a true + * lack of work. */ STATUS_WAIT, STATUS_INVALID, /**< Error condition. */ diff --git a/clients/drcachesim/tests/scheduler_launcher.cpp b/clients/drcachesim/tests/scheduler_launcher.cpp index 814a0b703fa..b0cac30df6e 100644 --- a/clients/drcachesim/tests/scheduler_launcher.cpp +++ b/clients/drcachesim/tests/scheduler_launcher.cpp @@ -152,11 +152,15 @@ void simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &scheduler, std::string &thread_sequence) { + // XXX: Could we share some code with the schedule_stats analysis tool? + // Some features are now duplicated in both. + static constexpr char THREAD_LETTER_INITIAL_START = 'A'; + static constexpr char THREAD_LETTER_SUBSEQUENT_START = 'a'; + static constexpr char WAIT_SYMBOL = '-'; + static constexpr char IDLE_SYMBOL = '_'; memref_t record; uint64_t micros = op_sched_time.get_value() ? get_current_microseconds() : 0; uint64_t cur_segment_instrs = 0; - // XXX: Should we remove this tool as the schedule_stats analysis tool has - // superceded it? We're basically duplicating new features in both. bool prev_was_wait = false, prev_was_idle = false; // Measure cpu usage by counting each next_record() as one cycle. uint64_t cycles_total = 0, cycles_busy = 0; @@ -168,9 +172,14 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch if (op_sched_time.get_value()) micros = get_current_microseconds(); ++cycles_total; + // Cache and reset here to ensure we reset on early return paths. + bool was_wait = prev_was_wait; + bool was_idle = prev_was_idle; + prev_was_wait = false; + prev_was_idle = false; if (status == scheduler_t::STATUS_WAIT) { - if (!prev_was_wait || cur_segment_instrs == op_print_every.get_value()) - thread_sequence += '-'; + if (!was_wait || cur_segment_instrs == op_print_every.get_value()) + thread_sequence += WAIT_SYMBOL; ++cur_segment_instrs; if (cur_segment_instrs == op_print_every.get_value()) cur_segment_instrs = 0; @@ -178,8 +187,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch std::this_thread::yield(); continue; } else if (status == scheduler_t::STATUS_IDLE) { - if (!prev_was_idle || cur_segment_instrs == op_print_every.get_value()) - thread_sequence += '_'; + if (!was_idle || cur_segment_instrs == op_print_every.get_value()) + thread_sequence += IDLE_SYMBOL; ++cur_segment_instrs; if (cur_segment_instrs == op_print_every.get_value()) cur_segment_instrs = 0; @@ -215,9 +224,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch scheduler_t::input_ordinal_t input = stream->get_input_stream_ordinal(); if (input != prev_input) { // We convert to letters which only works well for <=26 inputs. - if (!thread_sequence.empty()) - thread_sequence += ','; - thread_sequence += 'A' + static_cast(input % 26); + thread_sequence += + THREAD_LETTER_INITIAL_START + static_cast(input % 26); cur_segment_instrs = 0; if (op_verbose.get_value() >= 2) { std::ostringstream line; @@ -248,7 +256,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch if (type_is_instr(record.instr.type)) { ++cur_segment_instrs; if (cur_segment_instrs == op_print_every.get_value()) { - thread_sequence += 'A' + static_cast(input % 26); + thread_sequence += + THREAD_LETTER_SUBSEQUENT_START + static_cast(input % 26); cur_segment_instrs = 0; } } @@ -272,8 +281,10 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch float usage = 0; if (cycles_total > 0) usage = 100.f * cycles_busy / static_cast(cycles_total); - std::cerr << "Core #" << std::setw(2) << ordinal << " usage: " << std::setw(9) - << usage << "%\n"; + std::ostringstream line; + line << "Core #" << std::setw(2) << ordinal << " usage: " << std::setw(9) << usage + << "%\n"; + std::cerr << line.str(); } } // namespace diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index fdc250fb4b8..67bfe69b0e5 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -160,13 +160,18 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref line << "\n"; std::cerr << line.str(); } + // Cache and reset here to ensure we reset on early return paths. + bool was_wait = shard->prev_was_wait; + bool was_idle = shard->prev_was_idle; + shard->prev_was_wait = false; + shard->prev_was_idle = false; if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) { ++shard->counters.waits; - if (!shard->prev_was_wait) { + shard->prev_was_wait = true; + if (!was_wait) { shard->thread_sequence += WAIT_SYMBOL; shard->cur_segment_instrs = 0; - shard->prev_was_wait = true; } else { ++shard->cur_segment_instrs; if (shard->cur_segment_instrs == knob_print_every_) { @@ -178,10 +183,10 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } else if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_IDLE) { ++shard->counters.idles; - if (!shard->prev_was_idle) { + shard->prev_was_idle = true; + if (!was_idle) { shard->thread_sequence += IDLE_SYMBOL; shard->cur_segment_instrs = 0; - shard->prev_was_idle = true; } else { ++shard->cur_segment_instrs; if (shard->cur_segment_instrs == knob_print_every_) { @@ -251,8 +256,6 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) shard->saw_exit = true; - shard->prev_was_wait = false; - shard->prev_was_idle = false; return true; }