diff --git a/api/docs/release.dox b/api/docs/release.dox index 3d864c96cd6..5acf8fb5e9b 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -142,6 +142,11 @@ changes: refers to timestamps and direct switches, which is what most users should want. - Rename the macro INSTR_CREATE_mul_sve to INSTR_CREATE_mul_sve_imm to differentiate it from the other SVE MUL instructions. + - Added a new drmemtrace analyzer option \p -interval_instr_count that enables trace + analyzer interval results for every given count of instrs in each shard. This mode + does not support merging the shard interval snapshots to output the whole-trace + interval snapshots. Instead, the print_interval_results() API is called separately + for each shard with the interval state snapshots of that shard. Further non-compatibility-affecting changes include: - Added DWARF-5 support to the drsyms library by linking in 4 static libraries diff --git a/clients/drcachesim/analysis_tool.h b/clients/drcachesim/analysis_tool.h index fe5a6c097d6..0c4a03534da 100644 --- a/clients/drcachesim/analysis_tool.h +++ b/clients/drcachesim/analysis_tool.h @@ -231,10 +231,10 @@ template class analysis_tool_tmpl_t { // to the specified -interval_microseconds. uint64_t interval_end_timestamp = 0; - // Count of instructions: cumulative till this interval, and the incremental - // delta in this interval vs the previous one. May be useful for tools to - // compute PKI (per kilo instruction) metrics; obviates the need for each - // tool to duplicate this. + // Count of instructions: cumulative till this interval's end, and the + // incremental delta in this interval vs the previous one. May be useful for + // tools to compute PKI (per kilo instruction) metrics; obviates the need for + // each tool to duplicate this. uint64_t instr_count_cumulative = 0; uint64_t instr_count_delta = 0; diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp index 851890684b4..62f9c29bd45 100644 --- a/clients/drcachesim/analyzer.cpp +++ b/clients/drcachesim/analyzer.cpp @@ -115,6 +115,13 @@ analyzer_t::record_is_timestamp(const memref_t &record) record.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP; } +template <> +bool +analyzer_t::record_is_instr(const memref_t &record) +{ + return type_is_instr(record.instr.type); +} + template <> memref_t analyzer_t::create_wait_marker() @@ -175,6 +182,13 @@ record_analyzer_t::record_is_timestamp(const trace_entry_t &record) return record.type == TRACE_TYPE_MARKER && record.size == TRACE_MARKER_TYPE_TIMESTAMP; } +template <> +bool +record_analyzer_t::record_is_instr(const trace_entry_t &record) +{ + return type_is_instr(static_cast(record.type)); +} + template <> trace_entry_t record_analyzer_t::create_wait_marker() @@ -321,7 +335,7 @@ template analyzer_tmpl_t::analyzer_tmpl_t( const std::string &trace_path, analysis_tool_tmpl_t **tools, int num_tools, int worker_count, uint64_t skip_instrs, uint64_t interval_microseconds, - int verbosity) + uint64_t interval_instr_count, int verbosity) : success_(true) , num_tools_(num_tools) , tools_(tools) @@ -329,8 +343,14 @@ analyzer_tmpl_t::analyzer_tmpl_t( , worker_count_(worker_count) , skip_instrs_(skip_instrs) , interval_microseconds_(interval_microseconds) + , interval_instr_count_(interval_instr_count) , verbosity_(verbosity) { + if (interval_microseconds_ > 0 && interval_instr_count_ > 0) { + success_ = false; + error_string_ = "Cannot enable both kinds of interval analysis"; + return; + } // The scheduler will call reader_t::init() for each input file. We assume // that won't block (analyzer_multi_t separates out IPC readers). typename sched_type_t::scheduler_options_t sched_ops; @@ -385,10 +405,11 @@ analyzer_tmpl_t::get_current_microseconds() template uint64_t -analyzer_tmpl_t::compute_interval_id(uint64_t first_timestamp, - uint64_t latest_timestamp) +analyzer_tmpl_t::compute_timestamp_interval_id( + uint64_t first_timestamp, uint64_t latest_timestamp) { assert(first_timestamp <= latest_timestamp); + assert(interval_microseconds_ > 0); // We keep the interval end timestamps independent of the first timestamp of the // trace. For the parallel mode, where we need to merge intervals from different // shards that were active during the same final whole-trace interval, having aligned @@ -399,17 +420,34 @@ analyzer_tmpl_t::compute_interval_id(uint64_t first_time first_timestamp / interval_microseconds_ + 1; } +template +uint64_t +analyzer_tmpl_t::compute_instr_count_interval_id( + uint64_t cur_instr_count) +{ + assert(interval_instr_count_ > 0); + if (cur_instr_count == 0) + return 1; + // We want all memory access entries following an instr to stay in the same + // interval as the instr, so we increment interval_id at instr entries. Also, + // we want the last instr in each interval to have an ordinal that's a multiple + // of interval_instr_count_. + return (cur_instr_count - 1) / interval_instr_count_ + 1; +} + template uint64_t analyzer_tmpl_t::compute_interval_end_timestamp( uint64_t first_timestamp, uint64_t interval_id) { + assert(interval_microseconds_ > 0); assert(interval_id >= 1); uint64_t end_timestamp = (first_timestamp / interval_microseconds_ + interval_id) * interval_microseconds_; // Since the interval's end timestamp is exclusive, the end_timestamp would actually // fall under the next interval. - assert(compute_interval_id(first_timestamp, end_timestamp) == interval_id + 1); + assert(compute_timestamp_interval_id(first_timestamp, end_timestamp) == + interval_id + 1); return end_timestamp; } @@ -418,19 +456,33 @@ bool analyzer_tmpl_t::advance_interval_id( typename scheduler_tmpl_t::stream_t *stream, analyzer_shard_data_t *shard, uint64_t &prev_interval_index, - uint64_t &prev_interval_init_instr_count) + uint64_t &prev_interval_init_instr_count, bool at_instr_record) { - if (interval_microseconds_ == 0) { + uint64_t next_interval_index = 0; + if (interval_microseconds_ > 0) { + next_interval_index = compute_timestamp_interval_id(stream->get_first_timestamp(), + stream->get_last_timestamp()); + } else if (interval_instr_count_ > 0) { + // The interval callbacks are invoked just prior to the process_memref or + // parallel_shard_memref callback for the first instr of the new interval; This + // keeps the instr's memory accesses in the same interval as the instr. + next_interval_index = + compute_instr_count_interval_id(stream->get_instruction_ordinal()); + } else { return false; } - uint64_t next_interval_index = - compute_interval_id(stream->get_first_timestamp(), stream->get_last_timestamp()); if (next_interval_index != shard->cur_interval_index) { assert(next_interval_index > shard->cur_interval_index); prev_interval_index = shard->cur_interval_index; prev_interval_init_instr_count = shard->cur_interval_init_instr_count; shard->cur_interval_index = next_interval_index; - shard->cur_interval_init_instr_count = stream->get_instruction_ordinal(); + // If the next record to be presented to the tools is an instr record, we need to + // adjust for the fact that the record has already been read from the stream. + // Since we know that the next record is a part of the new interval and + // cur_interval_init_instr_count is supposed to be the count just prior to the + // new interval, we need to subtract one count for the instr. + shard->cur_interval_init_instr_count = + stream->get_instruction_ordinal() - (at_instr_record ? 1 : 0); return true; } return false; @@ -443,7 +495,7 @@ analyzer_tmpl_t::process_serial(analyzer_worker_data_t & std::vector user_worker_data(num_tools_); worker.shard_data[0].tool_data.resize(num_tools_); - if (interval_microseconds_ != 0) + if (interval_microseconds_ != 0 || interval_instr_count_ != 0) worker.shard_data[0].cur_interval_index = 1; for (int i = 0; i < num_tools_; ++i) { worker.error = tools_[i]->initialize_stream(worker.stream); @@ -474,21 +526,22 @@ analyzer_tmpl_t::process_serial(analyzer_worker_data_t & worker.error = "Failed to read from trace: " + worker.stream->get_stream_name(); } - } else if (interval_microseconds_ != 0) { + } else if (interval_microseconds_ != 0 || interval_instr_count_ != 0) { process_interval(worker.shard_data[0].cur_interval_index, worker.shard_data[0].cur_interval_init_instr_count, &worker, - /*parallel=*/false); + /*parallel=*/false, /*at_instr_record=*/false); } return; } uint64_t prev_interval_index; uint64_t prev_interval_init_instr_count; - if (record_is_timestamp(record) && + if ((record_is_timestamp(record) || record_is_instr(record)) && advance_interval_id(worker.stream, &worker.shard_data[0], prev_interval_index, - prev_interval_init_instr_count) && + prev_interval_init_instr_count, + record_is_instr(record)) && !process_interval(prev_interval_index, prev_interval_init_instr_count, - &worker, /*parallel=*/false)) { + &worker, /*parallel=*/false, record_is_instr(record))) { return; } for (int i = 0; i < num_tools_; ++i) { @@ -511,11 +564,11 @@ analyzer_tmpl_t::process_shard_exit( VPRINT(this, 1, "Worker %d finished trace shard %s\n", worker->index, worker->stream->get_stream_name().c_str()); worker->shard_data[shard_index].exited = true; - if (interval_microseconds_ != 0 && + if ((interval_microseconds_ != 0 || interval_instr_count_ != 0) && !process_interval(worker->shard_data[shard_index].cur_interval_index, worker->shard_data[shard_index].cur_interval_init_instr_count, worker, - /*parallel=*/true, shard_index)) + /*parallel=*/true, /*at_instr_record=*/false, shard_index)) return false; for (int i = 0; i < num_tools_; ++i) { if (!tools_[i]->parallel_shard_exit( @@ -578,7 +631,7 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w VPRINT(this, 1, "Worker %d starting on trace shard %d stream is %p\n", worker->index, shard_index, worker->stream); worker->shard_data[shard_index].tool_data.resize(num_tools_); - if (interval_microseconds_ != 0) + if (interval_microseconds_ != 0 || interval_instr_count_ != 0) worker->shard_data[shard_index].cur_interval_index = 1; for (int i = 0; i < num_tools_; ++i) { worker->shard_data[shard_index].tool_data[i].shard_data = @@ -596,11 +649,12 @@ analyzer_tmpl_t::process_tasks(analyzer_worker_data_t *w } uint64_t prev_interval_index; uint64_t prev_interval_init_instr_count; - if (record_is_timestamp(record) && + if ((record_is_timestamp(record) || record_is_instr(record)) && advance_interval_id(worker->stream, &worker->shard_data[shard_index], - prev_interval_index, prev_interval_init_instr_count) && + prev_interval_index, prev_interval_init_instr_count, + record_is_instr(record)) && !process_interval(prev_interval_index, prev_interval_init_instr_count, worker, - /*parallel=*/true, shard_index)) { + /*parallel=*/true, record_is_instr(record), shard_index)) { return; } for (int i = 0; i < num_tools_; ++i) { @@ -654,7 +708,8 @@ analyzer_tmpl_t::combine_interval_snapshots( result = tools_[tool_idx]->combine_interval_snapshots(latest_shard_snapshots, interval_end_timestamp); if (result == nullptr) { - error_string_ = "combine_interval_snapshots unexpectedly returned nullptr"; + error_string_ = "combine_interval_snapshots unexpectedly returned nullptr: " + + tools_[tool_idx]->get_error_string(); return false; } result->instr_count_delta = 0; @@ -758,7 +813,7 @@ analyzer_tmpl_t::merge_shard_interval_results( cur_merged_interval->shard_id = analysis_tool_tmpl_t< RecordType>::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; cur_merged_interval->interval_end_timestamp = earliest_interval_end_timestamp; - cur_merged_interval->interval_id = compute_interval_id( + cur_merged_interval->interval_id = compute_timestamp_interval_id( earliest_ever_interval_end_timestamp, earliest_interval_end_timestamp); merged_intervals.push_back(cur_merged_interval); } @@ -772,10 +827,74 @@ analyzer_tmpl_t::merge_shard_interval_results( return true; } +template +void +analyzer_tmpl_t::drain_interval_snapshot_queue_to_vector( + std::queue::interval_state_snapshot_t *> + &que, + std::vector::interval_state_snapshot_t *> + &vec) +{ + // If the worker's interval snapshot data were an std::vector we could just + // std::move here, but that would make the implementation of + // merge_shard_interval_results more complex, so there are trade-offs. + while (!que.empty()) { + vec.push_back(que.front()); + que.pop(); + } +} + +template +void +analyzer_tmpl_t::populate_unmerged_shard_interval_results() +{ + for (auto &worker : worker_data_) { + for (auto &shard_data : worker.shard_data) { + assert(static_cast(shard_data.second.tool_data.size()) == num_tools_); + for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { + key_tool_shard_t tool_shard_key = { tool_idx, + shard_data.second.shard_index }; + drain_interval_snapshot_queue_to_vector( + shard_data.second.tool_data[tool_idx].interval_snapshot_data, + per_shard_interval_snapshots_[tool_shard_key]); + } + } + } +} + +template +void +analyzer_tmpl_t::populate_serial_interval_results() +{ + assert(whole_trace_interval_snapshots_.empty()); + whole_trace_interval_snapshots_.resize(num_tools_); + assert(worker_data_.size() == 1); + assert(worker_data_[0].shard_data.size() == 1 && + worker_data_[0].shard_data.count(0) == 1); + assert(static_cast(worker_data_[0].shard_data[0].tool_data.size()) == + num_tools_); + for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { + drain_interval_snapshot_queue_to_vector( + worker_data_[0].shard_data[0].tool_data[tool_idx].interval_snapshot_data, + whole_trace_interval_snapshots_[tool_idx]); + } +} + template bool analyzer_tmpl_t::collect_and_maybe_merge_shard_interval_results() { + assert(interval_microseconds_ != 0 || interval_instr_count_ != 0); + if (!parallel_) { + populate_serial_interval_results(); + return true; + } + if (interval_instr_count_ > 0) { + // We do not merge interval state snapshots across shards. See comment by + // per_shard_interval_snapshots for more details. + populate_unmerged_shard_interval_results(); + return true; + } // all_intervals[tool_idx][shard_idx] contains a queue of the // interval_state_snapshot_t* that were output by that tool for that shard. std::vector::collect_and_maybe_merge_shard_interval_ all_intervals(num_tools_); for (const auto &worker : worker_data_) { for (const auto &shard_data : worker.shard_data) { + assert(static_cast(shard_data.second.tool_data.size()) == num_tools_); for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { all_intervals[tool_idx].emplace_back(std::move( shard_data.second.tool_data[tool_idx].interval_snapshot_data)); } } } - assert(merged_interval_snapshots_.empty()); - merged_interval_snapshots_.resize(num_tools_); + assert(whole_trace_interval_snapshots_.empty()); + whole_trace_interval_snapshots_.resize(num_tools_); for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { // We need to do this separately per tool because all tools may not // generate an interval_state_snapshot_t for the same intervals (even though // the framework notifies all tools of all intervals). if (!merge_shard_interval_results(all_intervals[tool_idx], - merged_interval_snapshots_[tool_idx], + whole_trace_interval_snapshots_[tool_idx], tool_idx)) { return false; } @@ -844,12 +964,20 @@ analyzer_tmpl_t::run() } } } - if (interval_microseconds_ != 0) { + if (interval_microseconds_ != 0 || interval_instr_count_ != 0) { return collect_and_maybe_merge_shard_interval_results(); } return true; } +static void +print_output_separator() +{ + + std::cerr << "\n==========================================================" + "=================\n"; +} + template bool analyzer_tmpl_t::print_stats() @@ -861,25 +989,58 @@ analyzer_tmpl_t::print_stats() error_string_ = tools_[i]->get_error_string(); return false; } - if (interval_microseconds_ != 0 && !merged_interval_snapshots_.empty()) { - // merged_interval_snapshots_ may be empty depending on the derived class's - // implementation of collect_and_maybe_merge_shard_interval_results. - if (!merged_interval_snapshots_[i].empty() && - !tools_[i]->print_interval_results(merged_interval_snapshots_[i])) { + if (i + 1 < num_tools_) { + // Separate tool output. + print_output_separator(); + } + } + // Now print interval results. + // Should not have both whole-trace or per-shard interval snapshots. + assert(whole_trace_interval_snapshots_.empty() || + per_shard_interval_snapshots_.empty()); + // We may have whole-trace intervals snaphots for instr count intervals in serial + // mode, and for timestamp (microsecond) intervals in both serial and parallel mode. + if (!whole_trace_interval_snapshots_.empty()) { + // Separate non-interval and interval outputs. + print_output_separator(); + std::cerr << "Printing whole-trace interval results:\n"; + for (int i = 0; i < num_tools_; ++i) { + // whole_trace_interval_snapshots_[i] may be empty if the corresponding tool + // did not produce any interval results. + if (!whole_trace_interval_snapshots_[i].empty() && + !tools_[i]->print_interval_results(whole_trace_interval_snapshots_[i])) { error_string_ = tools_[i]->get_error_string(); return false; } - for (auto snapshot : merged_interval_snapshots_[i]) { + for (auto snapshot : whole_trace_interval_snapshots_[i]) { if (!tools_[i]->release_interval_snapshot(snapshot)) { error_string_ = tools_[i]->get_error_string(); return false; } } + if (i + 1 < num_tools_) { + // Separate tool output. + print_output_separator(); + } } - if (i + 1 < num_tools_) { - // Separate tool output. - std::cerr << "\n==========================================================" - "=================\n"; + } else if (!per_shard_interval_snapshots_.empty()) { + // Separate non-interval and interval outputs. + print_output_separator(); + std::cerr << "Printing unmerged per-shard interval results:\n"; + for (auto &interval_snapshots : per_shard_interval_snapshots_) { + int tool_idx = interval_snapshots.first.tool_idx; + if (!interval_snapshots.second.empty() && + !tools_[tool_idx]->print_interval_results(interval_snapshots.second)) { + error_string_ = tools_[tool_idx]->get_error_string(); + return false; + } + for (auto snapshot : interval_snapshots.second) { + if (!tools_[tool_idx]->release_interval_snapshot(snapshot)) { + error_string_ = tools_[tool_idx]->get_error_string(); + return false; + } + } + print_output_separator(); } } return true; @@ -889,7 +1050,7 @@ template bool analyzer_tmpl_t::process_interval( uint64_t interval_id, uint64_t interval_init_instr_count, - analyzer_worker_data_t *worker, bool parallel, int shard_idx) + analyzer_worker_data_t *worker, bool parallel, bool at_instr_record, int shard_idx) { assert(parallel || shard_idx == 0); // Default to zero for the serial mode. for (int tool_idx = 0; tool_idx < num_tools_; ++tool_idx) { @@ -917,9 +1078,21 @@ analyzer_tmpl_t::process_interval( : analysis_tool_tmpl_t< RecordType>::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; snapshot->interval_id = interval_id; - snapshot->interval_end_timestamp = compute_interval_end_timestamp( - worker->stream->get_first_timestamp(), interval_id); - snapshot->instr_count_cumulative = worker->stream->get_instruction_ordinal(); + if (interval_microseconds_ > 0) { + // For timestamp intervals, the interval_end_timestamp is the abstract + // non-inclusive end timestamp for the interval_id. This is to make it + // easier to line up the corresponding shard interval snapshots so that + // we can merge them to form the whole-trace interval snapshots. + snapshot->interval_end_timestamp = compute_interval_end_timestamp( + worker->stream->get_first_timestamp(), interval_id); + } else { + snapshot->interval_end_timestamp = worker->stream->get_last_timestamp(); + } + // instr_count_cumulative for the interval snapshot is supposed to be + // inclusive, so if the first record after the interval (that is, the record + // we're at right now) is an instr, it must be subtracted. + snapshot->instr_count_cumulative = + worker->stream->get_instruction_ordinal() - (at_instr_record ? 1 : 0); snapshot->instr_count_delta = snapshot->instr_count_cumulative - interval_init_instr_count; worker->shard_data[shard_idx].tool_data[tool_idx].interval_snapshot_data.push( diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h index 69696c6b91d..87857de7b7b 100644 --- a/clients/drcachesim/analyzer.h +++ b/clients/drcachesim/analyzer.h @@ -119,7 +119,8 @@ template class analyzer_tmpl_t { analyzer_tmpl_t(const std::string &trace_path, analysis_tool_tmpl_t **tools, int num_tools, int worker_count = 0, uint64_t skip_instrs = 0, - uint64_t interval_microseconds = 0, int verbosity = 0); + uint64_t interval_microseconds = 0, uint64_t interval_instr_count = 0, + int verbosity = 0); /** Launches the analysis process. */ virtual bool run(); @@ -167,6 +168,8 @@ template class analyzer_tmpl_t { } uint64_t cur_interval_index; + // Cumulative instr count as it was just before the start of the current + // interval. uint64_t cur_interval_init_instr_count; // Identifier for the shard (thread or core id). int64_t shard_id; @@ -250,6 +253,9 @@ template class analyzer_tmpl_t { bool record_is_timestamp(const RecordType &record); + bool + record_is_instr(const RecordType &record); + RecordType create_wait_marker(); @@ -262,15 +268,22 @@ template class analyzer_tmpl_t { // finished. For serial analysis, it should remain the default value. bool process_interval(uint64_t interval_id, uint64_t interval_init_instr_count, - analyzer_worker_data_t *worker, bool parallel, int shard_idx = 0); + analyzer_worker_data_t *worker, bool parallel, bool at_instr_record, + int shard_idx = 0); // Compute interval id for the given latest_timestamp, assuming the trace (or - // trace shard) starts at the given first_timestamp. + // trace shard) starts at the given first_timestamp. This is relevant when + // timestamp intervals are enabled using interval_microseconds_. uint64_t - compute_interval_id(uint64_t first_timestamp, uint64_t latest_timestamp); + compute_timestamp_interval_id(uint64_t first_timestamp, uint64_t latest_timestamp); - // Compute the interval end timestamp for the given interval_id, assuming the trace - // (or trace shard) starts at the given first_timestamp. + // Compute interval id at the given instr count. This is relevant when instr count + // intervals are enabled using interval_instr_count_. + uint64_t + compute_instr_count_interval_id(uint64_t cur_instr_count); + + // Compute the interval end timestamp (non-inclusive) for the given interval_id, + // assuming the trace (or trace shard) starts at the given first_timestamp. uint64_t compute_interval_end_timestamp(uint64_t first_timestamp, uint64_t interval_id); @@ -278,11 +291,13 @@ template class analyzer_tmpl_t { // on the most recent seen timestamp in the trace stream. Returns whether the // current interval id was updated, and if so also sets the previous interval index // in prev_interval_index. + // at_instr_record indicates that the next record that will be presented to + // the analysis tools is an instr record. bool advance_interval_id( typename scheduler_tmpl_t::stream_t *stream, analyzer_shard_data_t *shard, uint64_t &prev_interval_index, - uint64_t &prev_interval_init_instr_count); + uint64_t &prev_interval_init_instr_count, bool at_instr_record); // Collects interval results for all shards from the workers, and then optional // merges the shard-local intervals to form the whole-trace interval results using @@ -291,7 +306,7 @@ template class analyzer_tmpl_t { virtual bool collect_and_maybe_merge_shard_interval_results(); - // Computes and stores the interval results in merged_interval_snapshots_. For + // Computes and stores the interval results in whole_trace_interval_snapshots_. For // serial analysis where we already have only a single shard, this involves // simply copying interval_state_snapshot_t* from the input. For parallel // analysis, this involves merging results from multiple shards for intervals @@ -305,6 +320,16 @@ template class analyzer_tmpl_t { *> &merged_intervals, int tool_idx); + // Populates the per_shard_interval_snapshots_ field based on the interval snapshots + // stored in worker_data_. + void + populate_unmerged_shard_interval_results(); + + // Populates the whole_trace_interval_snapshots_ field based on the interval snapshots + // stored in the only entry of worker_data_. + void + populate_serial_interval_results(); + // Combines all interval snapshots in the given vector to create the interval // snapshot for the whole-trace interval ending at interval_end_timestamp and // stores it in 'result'. These snapshots are for the tool at tool_idx. Returns @@ -321,6 +346,13 @@ template class analyzer_tmpl_t { uint64_t get_current_microseconds(); + void + drain_interval_snapshot_queue_to_vector( + std::queue::interval_state_snapshot_t *> + &que, + std::vector< + typename analysis_tool_tmpl_t::interval_state_snapshot_t *> &vec); + bool success_; scheduler_tmpl_t scheduler_; std::string error_string_; @@ -329,21 +361,63 @@ template class analyzer_tmpl_t { std::vector worker_data_; int num_tools_; analysis_tool_tmpl_t **tools_; - // Stores the interval state snapshots for the whole trace, which for the parallel - // mode are the resulting interval state snapshots after merging from all shards - // in merge_shard_interval_results. - // merged_interval_snapshots_[tool_idx] is a vector of the interval snapshots - // (in order of the intervals) for that tool. - // This may not be set, depending on the derived class's implementation of - // collect_and_maybe_merge_shard_interval_results. + // Stores the interval state snapshots, merged across shards. These are + // produced when timestamp intervals are enabled using interval_microseconds_. + // + // whole_trace_interval_snapshots_[tool_idx] is a vector of the interval snapshots + // (in order of the intervals) for that tool. For the parallel mode, these + // interval state snapshots are produced after merging corresponding shard + // interval snapshots using merge_shard_interval_results. std::vector::interval_state_snapshot_t *>> - merged_interval_snapshots_; + whole_trace_interval_snapshots_; + + // Key that combines tool and shard idx for use with an std::unordered_map. + struct key_tool_shard_t { + int tool_idx; + int shard_idx; + bool + operator==(const key_tool_shard_t &rhs) const + { + return tool_idx == rhs.tool_idx && shard_idx == rhs.shard_idx; + } + }; + struct key_tool_shard_hash_t { + std::size_t + operator()(const key_tool_shard_t &t) const + { + return std::hash()(t.tool_idx ^ t.shard_idx); + } + }; + + // Stores the interval state snapshots, unmerged across shards. These are + // produced when instr count intervals are enabled using interval_instr_count_. + // + // per_shard_interval_snapshots_[(tool_idx, shard_idx)] is a vector + // of the interval snapshots for that tool and shard. Note that the snapshots for + // each shard are separate; they are not merged across shards. + // + // TODO i#6643: Figure out a useful way to merge instr count intervals across shards. + // One way is to merge the shard interval snapshots that correspond to the same + // [interval_instr_count_ * interval_id, interval_instr_count_ * (interval_id + 1)) + // shard-local instrs. But it is not clear whether this is useful. + // Another way is to merge the shard interval snapshots that correspond to the same + // [interval_instr_count_ * interval_id, interval_instr_count_ * (interval_id + 1)) + // whole-trace instrs. But that is much harder to compute. We'd need some way to + // identify the whole-trace interval boundaries in each shard's stream (since we + // process each shard separately); this would likely need a pre-processing pass. + std::unordered_map::interval_state_snapshot_t *>, + key_tool_shard_hash_t> + per_shard_interval_snapshots_; + bool parallel_; int worker_count_; const char *output_prefix_ = "[analyzer]"; uint64_t skip_instrs_ = 0; uint64_t interval_microseconds_ = 0; + uint64_t interval_instr_count_ = 0; int verbosity_ = 0; shard_type_t shard_type_ = SHARD_BY_THREAD; bool sched_by_time_ = false; diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index a4650803e53..eb1cbd21c2c 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -340,6 +340,7 @@ analyzer_multi_tmpl_t::analyzer_multi_tmpl_t() this->worker_count_ = op_jobs.get_value(); this->skip_instrs_ = op_skip_instrs.get_value(); this->interval_microseconds_ = op_interval_microseconds.get_value(); + this->interval_instr_count_ = op_interval_instr_count.get_value(); // Initial measurements show it's sometimes faster to keep the parallel model // of using single-file readers but use them sequentially, as opposed to // the every-file interleaving reader, but the user can specify -jobs 1, so diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 499cb860bc6..390db324ba6 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -521,7 +521,17 @@ droption_t op_interval_microseconds( "Enable periodic heartbeats for intervals of given microseconds in the trace.", "Desired length of each trace interval, defined in microseconds of trace time. " "Trace intervals are measured using the TRACE_MARKER_TYPE_TIMESTAMP marker values. " - "If set, analysis tools receive a callback at the end of each interval."); + "If set, analysis tools receive a callback at the end of each interval, and one " + "at the end of trace analysis to print the whole-trace interval results."); + +droption_t op_interval_instr_count( + DROPTION_SCOPE_FRONTEND, "interval_instr_count", 0, + "Enable periodic heartbeats for intervals of given per-shard instr count. ", + "Desired length of each trace interval, defined in instr count of each shard. " + "With -parallel, this does not support whole trace intervals, only per-shard " + "intervals. If set, analysis tools receive a callback at the end of each interval, " + "and separate callbacks per shard at the end of trace analysis to print each " + "shard's interval results."); droption_t op_only_thread(DROPTION_SCOPE_FRONTEND, "only_thread", 0, diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 6c29a1af9d0..dbd297f2966 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -158,6 +158,8 @@ extern dynamorio::droption::droption_t op_tracer_alt; extern dynamorio::droption::droption_t op_tracer_ops; extern dynamorio::droption::droption_t op_interval_microseconds; +extern dynamorio::droption::droption_t + op_interval_instr_count; extern dynamorio::droption::droption_t op_only_thread; extern dynamorio::droption::droption_t op_skip_instrs; extern dynamorio::droption::droption_t op_skip_refs; diff --git a/clients/drcachesim/tests/offline-interval-instr-count-output.templatex b/clients/drcachesim/tests/offline-interval-instr-count-output.templatex new file mode 100644 index 00000000000..e0a1268b7c7 --- /dev/null +++ b/clients/drcachesim/tests/offline-interval-instr-count-output.templatex @@ -0,0 +1,37 @@ +Hello, world! +Basic counts tool results: +Total counts: + [ 0-9]* total \(fetched\) instructions + [ 0-9]* total unique \(fetched\) instructions + [ 0-9]* total non-fetched instructions + [ 0-9]* total prefetches + [ 0-9]* total data loads + [ 0-9]* total data stores + [ 0-9]* total icache flushes + [ 0-9]* total dcache flushes + 1 total threads + [ 0-9]* total scheduling markers +.* +Thread [0-9]* counts: + [ 0-9]* \(fetched\) instructions + [ 0-9]* unique \(fetched\) instructions + [ 0-9]* non-fetched instructions + [ 0-9]* prefetches + [ 0-9]* data loads + [ 0-9]* data stores + [ 0-9]* icache flushes + [ 0-9]* dcache flushes + [ 0-9]* scheduling markers +.* +Printing unmerged per-shard interval results: +Counts per trace interval for TID.* +Interval #1 ending at timestamp [0-9]*: + 10000 interval delta \(fetched\) instructions + [ 0-9]* interval delta non-fetched instructions + [ 0-9]* interval delta prefetches + [ 0-9]* interval delta data loads + [ 0-9]* interval delta data stores + [ 0-9]* interval delta icache flushes + [ 0-9]* interval delta dcache flushes + [ 0-9]* interval delta scheduling markers +.* diff --git a/clients/drcachesim/tests/offline-interval-count-output.templatex b/clients/drcachesim/tests/offline-interval-microseconds-count-output.templatex similarity index 96% rename from clients/drcachesim/tests/offline-interval-count-output.templatex rename to clients/drcachesim/tests/offline-interval-microseconds-count-output.templatex index 08b84128398..bb1b946ce5d 100644 --- a/clients/drcachesim/tests/offline-interval-count-output.templatex +++ b/clients/drcachesim/tests/offline-interval-microseconds-count-output.templatex @@ -23,6 +23,7 @@ Thread [0-9]* counts: [ 0-9]* dcache flushes [ 0-9]* scheduling markers .* +Printing whole-trace interval results: Counts per trace interval for whole trace: Interval #1 ending at timestamp [0-9]*: [ 0-9]* interval delta \(fetched\) instructions diff --git a/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp b/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp index 384cd1ae440..533c5beeb00 100644 --- a/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp +++ b/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -140,8 +140,10 @@ class test_stream_t : public scheduler_t::stream_t { return instr_count_; assert(at_ >= 0 && at_ < refs_.size()); auto it = instr_counts_.find(refs_[at_].instr.tid); - assert(it != instr_counts_.end()); - return it->second; + if (it != instr_counts_.end()) { + return it->second; + } + return 0; } private: @@ -166,13 +168,15 @@ class test_stream_t : public scheduler_t::stream_t { class test_analyzer_t : public analyzer_t { public: test_analyzer_t(const std::vector &refs, analysis_tool_t **tools, - int num_tools, bool parallel, uint64_t interval_microseconds) + int num_tools, bool parallel, uint64_t interval_microseconds, + uint64_t interval_instr_count) : analyzer_t() { num_tools_ = num_tools; tools_ = tools; parallel_ = parallel; interval_microseconds_ = interval_microseconds; + interval_instr_count_ = interval_instr_count; verbosity_ = 1; worker_count_ = 1; test_stream_ = @@ -267,96 +271,105 @@ class dummy_analysis_tool_t : public analysis_tool_t { int generate_snapshot_count_; }; -// Test analysis_tool_t that records information about when the -// generate_shard_interval_snapshot and generate_interval_snapshot APIs were invoked. -class test_analysis_tool_t : public analysis_tool_t { -public: - // Describes the point in trace when an interval ends. This is same as the point - // when the generate_*interval_snapshot API is invoked. - struct interval_end_point_t { - memref_tid_t tid; - int seen_memrefs; // For parallel mode, this is the shard-local count. - uint64_t interval_id; +#define SERIAL_TID 0 - bool - operator==(const interval_end_point_t &rhs) const - { - return tid == rhs.tid && seen_memrefs == rhs.seen_memrefs && - interval_id == rhs.interval_id; - } - bool - operator<(const interval_end_point_t &rhs) const - { - if (tid != rhs.tid) - return tid < rhs.tid; - if (seen_memrefs != rhs.seen_memrefs) - return seen_memrefs < rhs.seen_memrefs; - return interval_id < rhs.interval_id; - } - }; +// Describes the point in trace when an interval ends. This is same as the point +// when the generate_*interval_snapshot API is invoked. +struct interval_end_point_t { + memref_tid_t tid; + int seen_memrefs; // For parallel mode, this is the shard-local count. + uint64_t interval_id; - // Describes the state recorded by test_analysis_tool_t at the end of each - // interval. - struct recorded_snapshot_t : public analysis_tool_t::interval_state_snapshot_t { - recorded_snapshot_t(uint64_t interval_id, uint64_t interval_end_timestamp, - uint64_t instr_count_cumulative, uint64_t instr_count_delta, - std::vector component_intervals) - // Actual tools do not need to supply values to construct the base - // interval_state_snapshot_t. This is only to make it easier to construct - // the expected snapshot objects in this test. - // Since the final verification happens only for the whole trace intervals, - // we simply use WHOLE_TRACE_SHARD_ID here and for tool_shard_id below. - : interval_state_snapshot_t(WHOLE_TRACE_SHARD_ID, interval_id, - interval_end_timestamp, instr_count_cumulative, - instr_count_delta) - , component_intervals(component_intervals) - , tool_shard_id(WHOLE_TRACE_SHARD_ID) - { - } - recorded_snapshot_t() - { - } + bool + operator==(const interval_end_point_t &rhs) const + { + return tid == rhs.tid && seen_memrefs == rhs.seen_memrefs && + interval_id == rhs.interval_id; + } + bool + operator<(const interval_end_point_t &rhs) const + { + if (tid != rhs.tid) + return tid < rhs.tid; + if (seen_memrefs != rhs.seen_memrefs) + return seen_memrefs < rhs.seen_memrefs; + return interval_id < rhs.interval_id; + } +}; - bool - operator==(const recorded_snapshot_t &rhs) const - { - return shard_id == rhs.shard_id && tool_shard_id == rhs.tool_shard_id && - interval_id == rhs.interval_id && - interval_end_timestamp == rhs.interval_end_timestamp && - instr_count_cumulative == rhs.instr_count_cumulative && - instr_count_delta == rhs.instr_count_delta && - component_intervals == rhs.component_intervals; - } - void - print() const - { - std::cerr << "(shard_id: " << shard_id << ", interval_id: " << interval_id - << ", tool_shard_id: " << tool_shard_id - << ", end_timestamp: " << interval_end_timestamp - << ", instr_count_cumulative: " << instr_count_cumulative - << ", instr_count_delta: " << instr_count_delta - << ", component_intervals: "; - for (const auto &s : component_intervals) { - std::cerr << "(tid:" << s.tid << ", seen_memrefs:" << s.seen_memrefs - << ", interval_id:" << s.interval_id << "),"; - } - std::cerr << ")\n"; +// Describes the state recorded by test_analysis_tool_t at the end of each +// interval. +struct recorded_snapshot_t : public analysis_tool_t::interval_state_snapshot_t { + recorded_snapshot_t(uint64_t shard_id, uint64_t interval_id, + uint64_t interval_end_timestamp, uint64_t instr_count_cumulative, + uint64_t instr_count_delta, + std::vector component_intervals) + // Actual tools do not need to supply values to construct the base + // interval_state_snapshot_t. This is only to make it easier to construct + // the expected snapshot objects in this test. + : interval_state_snapshot_t(shard_id, interval_id, interval_end_timestamp, + instr_count_cumulative, instr_count_delta) + , component_intervals(component_intervals) + , tool_shard_id(shard_id) + { + } + recorded_snapshot_t(uint64_t interval_id, uint64_t interval_end_timestamp, + uint64_t instr_count_cumulative, uint64_t instr_count_delta, + std::vector component_intervals) + : recorded_snapshot_t(WHOLE_TRACE_SHARD_ID, interval_id, interval_end_timestamp, + instr_count_cumulative, instr_count_delta, + component_intervals) + { + } + recorded_snapshot_t() + { + } + + bool + operator==(const recorded_snapshot_t &rhs) const + { + return shard_id == rhs.shard_id && tool_shard_id == rhs.tool_shard_id && + interval_id == rhs.interval_id && + interval_end_timestamp == rhs.interval_end_timestamp && + instr_count_cumulative == rhs.instr_count_cumulative && + instr_count_delta == rhs.instr_count_delta && + component_intervals == rhs.component_intervals; + } + void + print() const + { + std::cerr << "(shard_id: " << shard_id << ", interval_id: " << interval_id + << ", tool_shard_id: " << tool_shard_id + << ", end_timestamp: " << interval_end_timestamp + << ", instr_count_cumulative: " << instr_count_cumulative + << ", instr_count_delta: " << instr_count_delta + << ", component_intervals: "; + for (const auto &s : component_intervals) { + std::cerr << "(tid:" << s.tid << ", seen_memrefs:" << s.seen_memrefs + << ", interval_id:" << s.interval_id << "),"; } + std::cerr << ")\n"; + } - // Stores the list of intervals that were combined to produce this snapshot. - // In the serial case, this contains just a single value. In the parallel case, - // this contains a list of size equal to the count of shard interval snapshots - // that were combined to create this snapshot. - std::vector component_intervals; - // Stores the shard_id recorded by the test tool. Compared with the shard_id - // stored by the framework in the base struct. - int64_t tool_shard_id; - }; + // Stores the list of intervals that were combined to produce this snapshot. + // In the serial case, this contains just a single value. In the parallel case, + // this contains a list of size equal to the count of shard interval snapshots + // that were combined to create this snapshot. + std::vector component_intervals; + // Stores the shard_id recorded by the test tool. Compared with the shard_id + // stored by the framework in the base struct. + int64_t tool_shard_id; +}; +// Test analysis_tool_t that records information about when the +// generate_shard_interval_snapshot and generate_interval_snapshot APIs were invoked. +class test_analysis_tool_t : public analysis_tool_t { +public: // Constructs an analysis_tool_t that expects the given interval state snapshots to be // produced. - test_analysis_tool_t(const std::vector &expected_state_snapshots, - bool combine_only_active_shards) + test_analysis_tool_t( + const std::vector> &expected_state_snapshots, + bool combine_only_active_shards) : seen_memrefs_(0) , expected_state_snapshots_(expected_state_snapshots) , outstanding_snapshots_(0) @@ -376,7 +389,7 @@ class test_analysis_tool_t : public analysis_tool_t { snapshot->tool_shard_id = analysis_tool_t::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; snapshot->component_intervals.push_back( - { /*tid=*/0, seen_memrefs_, interval_id }); + { SERIAL_TID, seen_memrefs_, interval_id }); ++outstanding_snapshots_; return snapshot; } @@ -393,6 +406,7 @@ class test_analysis_tool_t : public analysis_tool_t { void * parallel_shard_init(int shard_index, void *worker_data) override { + parallel_mode_ = true; auto per_shard = new per_shard_t; per_shard->magic_num = kMagicNum; per_shard->tid = kInvalidTid; @@ -440,6 +454,18 @@ class test_analysis_tool_t : public analysis_tool_t { latest_shard_snapshots, uint64_t interval_end_timestamp) override { + // If we expect multiple std::vector of interval snapshots (one for each shard), + // it means we're not merging the snapshots across shards, so there should not + // be any combine_interval_snapshot calls. + if (expected_state_snapshots_.size() != 1) { + error_string_ = "Did not expect any combine_interval_snapshots() calls"; + return nullptr; + } + if (!parallel_mode_) { + error_string_ = + "Did not expect any combine_interval_snapshots() calls in serial mode."; + return nullptr; + } recorded_snapshot_t *result = new recorded_snapshot_t(); result->tool_shard_id = analysis_tool_t::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID; @@ -485,21 +511,29 @@ class test_analysis_tool_t : public analysis_tool_t { print_interval_results( const std::vector &snapshots) override { + if (seen_print_interval_results_calls_ >= expected_state_snapshots_.size()) { + error_string_ = "Saw more print_interval_results() calls than expected"; + return false; + } std::vector recorded_state_snapshots; for (const auto &p : snapshots) { recorded_state_snapshots.push_back( reinterpret_cast(p)); } - if (!compare_results(recorded_state_snapshots, expected_state_snapshots_)) { + if (!compare_results( + recorded_state_snapshots, + expected_state_snapshots_[seen_print_interval_results_calls_])) { error_string_ = "Unexpected state snapshots"; std::cerr << "Expected:\n"; - for (const auto &snapshot : expected_state_snapshots_) + for (const auto &snapshot : + expected_state_snapshots_[seen_print_interval_results_calls_]) snapshot.print(); std::cerr << "Found:\n"; for (const auto &snapshot : recorded_state_snapshots) snapshot->print(); return false; } + ++seen_print_interval_results_calls_; return true; } bool @@ -515,12 +549,23 @@ class test_analysis_tool_t : public analysis_tool_t { { return outstanding_snapshots_; } + int + get_outstanding_print_interval_results_calls() const + { + return expected_state_snapshots_.size() - seen_print_interval_results_calls_; + } private: int seen_memrefs_; - std::vector expected_state_snapshots_; + // We expect to see one print_interval_results call per shard (we do not merge + // the shard interval snapshots for instr count intervals), or exactly one + // print_interval_results call for the whole-trace (we merge shard interval + // snapshots for timestamp intervals). + std::vector> expected_state_snapshots_; int outstanding_snapshots_; bool combine_only_active_shards_; + int seen_print_interval_results_calls_ = 0; + bool parallel_mode_ = false; // Data tracked per shard. struct per_shard_t { @@ -537,6 +582,7 @@ static bool test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) { constexpr uint64_t kIntervalMicroseconds = 100; + constexpr uint64_t kNoIntervalInstrCount = 0; std::vector refs = { // Trace for a single worker which has two constituent shards. (scheduler_t // does not guarantee that workers will process shards one after the other.) @@ -563,52 +609,51 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) gen_exit(52) // _ | 6 | 7 }; - std::vector expected_state_snapshots; + std::vector> expected_state_snapshots; if (!parallel) { // Each whole trace interval is made up of only one snapshot, the // serial snapshot. - expected_state_snapshots = { - // Format for interval_end_point_t: - test_analysis_tool_t::recorded_snapshot_t(1, 100, 1, 1, { { 0, 3, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(2, 200, 3, 2, { { 0, 7, 2 } }), - test_analysis_tool_t::recorded_snapshot_t(3, 300, 6, 3, { { 0, 13, 3 } }), - test_analysis_tool_t::recorded_snapshot_t(5, 500, 7, 1, { { 0, 15, 5 } }), - test_analysis_tool_t::recorded_snapshot_t(6, 600, 7, 0, { { 0, 17, 6 } }), - test_analysis_tool_t::recorded_snapshot_t(7, 700, 8, 1, { { 0, 20, 7 } }), - }; + expected_state_snapshots = { { + // Format: + // > + recorded_snapshot_t(1, 100, 1, 1, { { SERIAL_TID, 3, 1 } }), + recorded_snapshot_t(2, 200, 3, 2, { { SERIAL_TID, 7, 2 } }), + recorded_snapshot_t(3, 300, 6, 3, { { SERIAL_TID, 13, 3 } }), + recorded_snapshot_t(5, 500, 7, 1, { { SERIAL_TID, 15, 5 } }), + recorded_snapshot_t(6, 600, 7, 0, { { SERIAL_TID, 17, 6 } }), + recorded_snapshot_t(7, 700, 8, 1, { { SERIAL_TID, 20, 7 } }), + } }; } else if (combine_only_active_shards) { // Each whole trace interval is made up of snapshots from each // shard that was active in that interval. expected_state_snapshots = { - // Format for interval_end_point_t: - test_analysis_tool_t::recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), - // Narration: The whole-trace interval_id=1 with interval_end_timestamp=200 - // is made up of the following two shard-local interval snapshots: - // - from shard_id=51, the interval_id=1 that ends at the local_memref=5 - // - from shard_id=52, the interval_id=0 that ends at the local_memref=2 - test_analysis_tool_t::recorded_snapshot_t(2, 200, 3, 2, - { { 51, 5, 2 }, { 52, 2, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(3, 300, 6, 3, - { { 51, 7, 3 }, { 52, 6, 2 } }), - test_analysis_tool_t::recorded_snapshot_t(5, 500, 7, 1, { { 52, 8, 4 } }), - test_analysis_tool_t::recorded_snapshot_t(6, 600, 7, 0, { { 51, 9, 6 } }), - test_analysis_tool_t::recorded_snapshot_t(7, 700, 8, 1, { { 52, 11, 6 } }) + { // Format: + // > + recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), + // Narration: The whole-trace interval_id=2 with interval_end_timestamp=200 + // is made up of the following two shard-local interval snapshots: + // - from shard_id=51, the interval_id=2 that ends at the local_memref=5 + // - from shard_id=52, the interval_id=1 that ends at the local_memref=2 + recorded_snapshot_t(2, 200, 3, 2, { { 51, 5, 2 }, { 52, 2, 1 } }), + recorded_snapshot_t(3, 300, 6, 3, { { 51, 7, 3 }, { 52, 6, 2 } }), + recorded_snapshot_t(5, 500, 7, 1, { { 52, 8, 4 } }), + recorded_snapshot_t(6, 600, 7, 0, { { 51, 9, 6 } }), + recorded_snapshot_t(7, 700, 8, 1, { { 52, 11, 6 } }) } }; } else { // Each whole trace interval is made up of last snapshots from all trace shards. expected_state_snapshots = { - // Format for interval_end_point_t: - test_analysis_tool_t::recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(2, 200, 3, 2, - { { 51, 5, 2 }, { 52, 2, 1 } }), - test_analysis_tool_t::recorded_snapshot_t(3, 300, 6, 3, - { { 51, 7, 3 }, { 52, 6, 2 } }), - test_analysis_tool_t::recorded_snapshot_t(5, 500, 7, 1, - { { 51, 7, 3 }, { 52, 8, 4 } }), - test_analysis_tool_t::recorded_snapshot_t(6, 600, 7, 0, - { { 51, 9, 6 }, { 52, 8, 4 } }), - test_analysis_tool_t::recorded_snapshot_t(7, 700, 8, 1, - { { 51, 9, 6 }, { 52, 11, 6 } }) + { { // Format: + // > + recorded_snapshot_t(1, 100, 1, 1, { { 51, 3, 1 } }), + recorded_snapshot_t(2, 200, 3, 2, { { 51, 5, 2 }, { 52, 2, 1 } }), + recorded_snapshot_t(3, 300, 6, 3, { { 51, 7, 3 }, { 52, 6, 2 } }), + recorded_snapshot_t(5, 500, 7, 1, { { 51, 7, 3 }, { 52, 8, 4 } }), + recorded_snapshot_t(6, 600, 7, 0, { { 51, 9, 6 }, { 52, 8, 4 } }), + recorded_snapshot_t(7, 700, 8, 1, { { 51, 9, 6 }, { 52, 11, 6 } }) } } }; } std::vector tools; @@ -619,7 +664,7 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) std::unique_ptr(new dummy_analysis_tool_t()); tools.push_back(dummy_analysis_tool.get()); test_analyzer_t test_analyzer(refs, &tools[0], (int)tools.size(), parallel, - kIntervalMicroseconds); + kIntervalMicroseconds, kNoIntervalInstrCount); if (!test_analyzer) { FATAL_ERROR("failed to initialize test analyzer: %s", test_analyzer.get_error_string().c_str()); @@ -638,6 +683,13 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) << " left\n"; return false; } + if (test_analysis_tool.get()->get_outstanding_print_interval_results_calls() != 0) { + std::cerr + << "Missing " + << test_analysis_tool.get()->get_outstanding_print_interval_results_calls() + << " print_interval_result() calls\n"; + return false; + } int expected_generate_call_count = parallel ? 8 : 6; if (dummy_analysis_tool.get()->get_generate_snapshot_count() != expected_generate_call_count) { @@ -654,11 +706,120 @@ test_non_zero_interval(bool parallel, bool combine_only_active_shards = true) return true; } +static bool +test_non_zero_instr_interval(bool parallel) +{ + constexpr uint64_t kNoIntervalMicroseconds = 0; + constexpr uint64_t kIntervalInstrCount = 2; + std::vector refs = { + // Trace for a single worker which has two constituent shards. (scheduler_t + // does not guarantee that workers will process shards one after the other.) + // Expected active interval_id: tid_51_local | tid_52_local | whole_trace + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 40), // 1 | _ | 1 + gen_instr(51, 10000), // 1 | _ | 1 + gen_data(51, true, 1234, 4), // 1 | _ | 1 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 151), // _ | 1 | 1 + gen_instr(52, 20000), // _ | 1 | 1 + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 170), // 1 | _ | 1 + gen_instr(51, 10008), // 1 | _ | 2 + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 201), // 1 | _ | 2 + gen_instr(51, 20004), // 2 | _ | 2 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 210), // _ | 1 | 2 + gen_instr(52, 20008), // _ | 1 | 3 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 270), // _ | 1 | 3 + gen_instr(52, 20008), // _ | 2 | 3 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 490), // _ | 2 | 3 + gen_instr(52, 20012), // _ | 2 | 4 + gen_marker(51, TRACE_MARKER_TYPE_TIMESTAMP, 590), // 2 | _ | 4 + gen_exit(51), // 2 | _ | 4 + gen_marker(52, TRACE_MARKER_TYPE_TIMESTAMP, 610), // _ | 2 | 4 + gen_instr(52, 20016), // _ | 3 | 4 + gen_exit(52) // _ | 3 | 4 + }; + + std::vector> expected_state_snapshots; + if (!parallel) { + // Each whole trace interval is made up of only one snapshot, the + // serial snapshot. + expected_state_snapshots = { + { // Format: + // > + recorded_snapshot_t(1, 170, 2, 2, { { SERIAL_TID, 6, 1 } }), + recorded_snapshot_t(2, 210, 4, 2, { { SERIAL_TID, 10, 2 } }), + recorded_snapshot_t(3, 490, 6, 2, { { SERIAL_TID, 14, 3 } }), + recorded_snapshot_t(4, 610, 8, 2, { { SERIAL_TID, 20, 4 } }) } + }; + } else { + // For instr count intervals, we do not merge the shard intervals to form the + // whole-trace intervals. Instead, there are multiple print_interval_result + // calls, one for the interval snapshots of each shard. The shard_id is + // included in the provided interval snapshots (see below). + expected_state_snapshots = { + // Format: + // > + { recorded_snapshot_t(51, 1, 201, 2, 2, { { 51, 6, 1 } }), + recorded_snapshot_t(51, 2, 590, 3, 1, { { 51, 9, 2 } }) }, + { recorded_snapshot_t(52, 1, 270, 2, 2, { { 52, 5, 1 } }), + recorded_snapshot_t(52, 2, 610, 4, 2, { { 52, 9, 2 } }), + recorded_snapshot_t(52, 3, 610, 5, 1, { { 52, 11, 3 } }) }, + }; + } + std::vector tools; + constexpr bool kNopCombineOnlyActiveShards = false; + auto test_analysis_tool = std::unique_ptr( + new test_analysis_tool_t(expected_state_snapshots, kNopCombineOnlyActiveShards)); + tools.push_back(test_analysis_tool.get()); + auto dummy_analysis_tool = + std::unique_ptr(new dummy_analysis_tool_t()); + tools.push_back(dummy_analysis_tool.get()); + test_analyzer_t test_analyzer(refs, &tools[0], (int)tools.size(), parallel, + kNoIntervalMicroseconds, kIntervalInstrCount); + if (!test_analyzer) { + FATAL_ERROR("failed to initialize test analyzer: %s", + test_analyzer.get_error_string().c_str()); + } + if (!test_analyzer.run()) { + FATAL_ERROR("failed to run test_analyzer: %s", + test_analyzer.get_error_string().c_str()); + } + if (!test_analyzer.print_stats()) { + FATAL_ERROR("failed to print stats: %s", + test_analyzer.get_error_string().c_str()); + } + if (test_analysis_tool.get()->get_outstanding_snapshot_count() != 0) { + std::cerr << "Failed to release all outstanding snapshots: " + << test_analysis_tool.get()->get_outstanding_snapshot_count() + << " left\n"; + return false; + } + if (test_analysis_tool.get()->get_outstanding_print_interval_results_calls() != 0) { + std::cerr + << "Missing " + << test_analysis_tool.get()->get_outstanding_print_interval_results_calls() + << " print_interval_result() calls\n"; + return false; + } + int expected_generate_call_count = parallel ? 5 : 4; + if (dummy_analysis_tool.get()->get_generate_snapshot_count() != + expected_generate_call_count) { + std::cerr << "Dummy analysis tool got " + << dummy_analysis_tool.get()->get_generate_snapshot_count() + << " interval API calls, but expected " << expected_generate_call_count + << "\n"; + return false; + } + fprintf(stderr, "test_non_zero_instr_interval done for parallel=%d\n", parallel); + return true; +} + int test_main(int argc, const char *argv[]) { if (!test_non_zero_interval(false) || !test_non_zero_interval(true, true) || - !test_non_zero_interval(true, false)) + !test_non_zero_interval(true, false) || !test_non_zero_instr_interval(false) || + !test_non_zero_instr_interval(true)) return 1; fprintf(stderr, "All done!\n"); return 0; diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 170ffeb787e..ab457b418fa 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4064,9 +4064,12 @@ if (BUILD_CLIENTS) endif () endif () - torunonly_drcacheoff(interval-count-output ${ci_shared_app} "" + torunonly_drcacheoff(interval-microseconds-count-output ${ci_shared_app} "" "@-simulator_type@basic_counts@-interval_microseconds@1M" "") + torunonly_drcacheoff(interval-instr-count-output ${ci_shared_app} "" + "@-simulator_type@basic_counts@-interval_instr_count@10000" "") + # As for the online test, we check that only 1 thread is in the final trace. torunonly_drcacheoff(max-global client.annotation-concurrency # Include function tracing to sanity test combining with delay and max.