Skip to content

Commit

Permalink
i#7028: Modify core-sharded cpu ids and timestamps (#7037)
Browse files Browse the repository at this point in the history
In core-sharded dynamically-scheduled mode, the as-traced cpu ids and
timestamps are interleaved in a way that makes them confusing and not
useful. We change the scheduler here to replace each cpu id value with
the shard id, and replace the timestamp with a synthetic value based on
the instruction and idle counts.

Adds a test.

Fixes #7028
  • Loading branch information
derekbruening authored Oct 11, 2024
1 parent 5f4a4c7 commit aeebb11
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 5 deletions.
75 changes: 75 additions & 0 deletions clients/drcachesim/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,17 @@ scheduler_tmpl_t<memref_t, reader_t>::record_type_is_non_marker_header(memref_t
return false;
}

template <>
bool
scheduler_tmpl_t<memref_t, reader_t>::record_type_set_marker_value(memref_t &record,
uintptr_t value)
{
if (record.marker.type != TRACE_TYPE_MARKER)
return false;
record.marker.marker_value = value;
return true;
}

template <>
bool
scheduler_tmpl_t<memref_t, reader_t>::record_type_is_timestamp(memref_t record,
Expand Down Expand Up @@ -473,6 +484,17 @@ scheduler_tmpl_t<trace_entry_t, record_reader_t>::record_type_is_instr_boundary(
!record_reader_t::record_is_pre_instr(&prev_record);
}

template <>
bool
scheduler_tmpl_t<trace_entry_t, record_reader_t>::record_type_set_marker_value(
trace_entry_t &record, uintptr_t value)
{
if (record.type != TRACE_TYPE_MARKER)
return false;
record.addr = value;
return true;
}

template <>
bool
scheduler_tmpl_t<trace_entry_t, record_reader_t>::record_type_is_timestamp(
Expand Down Expand Up @@ -4138,6 +4160,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
}
VPRINT(this, 4, "next_record[%d]: from %d @%" PRId64 ": ", output, input->index,
cur_time);
update_next_record(output, record);
VDO(this, 4, print_record(record););

outputs_[output].last_record = record;
Expand All @@ -4146,6 +4169,58 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
return sched_type_t::STATUS_OK;
}

template <typename RecordType, typename ReaderType>
void
scheduler_tmpl_t<RecordType, ReaderType>::update_next_record(output_ordinal_t output,
RecordType &record)
{
if (options_.mapping != MAP_TO_ANY_OUTPUT && options_.mapping != MAP_AS_PREVIOUSLY)
return; // Nothing to do.
if (options_.replay_as_traced_istream != nullptr) {
// Do not modify MAP_TO_RECORDED_OUTPUT (turned into MAP_AS_PREVIOUSLY).
return;
}
// For a dynamic schedule, the as-traced cpuids and timestamps no longer
// apply and are just confusing (causing problems like interval analysis
// failures), so we replace them.
trace_marker_type_t type;
uintptr_t value;
if (!record_type_is_marker(record, type, value))
return; // Nothing to do.
if (type == TRACE_MARKER_TYPE_TIMESTAMP) {
if (outputs_[output].base_timestamp == 0) {
// Record the first input's first timestamp, as a base value.
#ifndef NDEBUG
bool ok =
#endif
record_type_is_timestamp(record, outputs_[output].base_timestamp);
assert(ok);
assert(outputs_[output].base_timestamp != 0);
VPRINT(this, 2, "output %d base timestamp = %zu\n", output,
outputs_[output].base_timestamp);
}
uint64_t instr_ord = outputs_[output].stream->get_instruction_ordinal();
uint64_t idle_count = outputs_[output].idle_count;
uintptr_t new_time = static_cast<uintptr_t>(
outputs_[output].base_timestamp + (instr_ord + idle_count) / INSTRS_PER_US);
VPRINT(this, 4,
"New time in output %d: %zu from base %zu and instrs %" PRIu64
" idles %" PRIu64 "\n",
output, new_time, outputs_[output].base_timestamp, instr_ord, idle_count);
#ifndef NDEBUG
bool ok =
#endif
record_type_set_marker_value(record, new_time);
assert(ok);
} else if (type == TRACE_MARKER_TYPE_CPU_ID) {
#ifndef NDEBUG
bool ok =
#endif
record_type_set_marker_value(record, get_shard_index(output));
assert(ok);
}
}

template <typename RecordType, typename ReaderType>
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
scheduler_tmpl_t<RecordType, ReaderType>::unread_last_record(output_ordinal_t output,
Expand Down
11 changes: 11 additions & 0 deletions clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
// When no simulation time is passed to us, we use the idle count plus
// instruction count to measure time.
uint64_t idle_count = 0;
// The first timestamp (pre-update_next_record()) seen on the first input.
uintptr_t base_timestamp = 0;
};

// Used for reading as-traced schedules.
Expand Down Expand Up @@ -1754,6 +1756,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
uint64_t input_count = 0;
};

// We assume a 2GHz clock and IPC=1.
static constexpr uint64_t INSTRS_PER_US = 2000;

// Called just once at initialization time to set the initial input-to-output
// mappings and state.
scheduler_status_t
Expand Down Expand Up @@ -1950,6 +1955,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
bool
record_type_is_timestamp(RecordType record, uintptr_t &value);

bool
record_type_set_marker_value(RecordType &record, uintptr_t value);

bool
record_type_is_invalid(RecordType record);

Expand All @@ -1975,6 +1983,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
void
insert_switch_tid_pid(input_info_t &input);

void
update_next_record(output_ordinal_t output, RecordType &record);

// Used for diagnostics: prints record fields to stderr.
void
print_record(const RecordType &record);
Expand Down
107 changes: 102 additions & 5 deletions clients/drcachesim/tests/scheduler_unit_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <cstddef>
#include <cstring>
#include <iostream>
#include <random>
#include <set>
#include <string>
#include <thread>
Expand Down Expand Up @@ -3294,7 +3295,8 @@ test_replay_skip()
case 1:
assert(memref.marker.type == TRACE_TYPE_MARKER);
assert(memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID);
assert(memref.marker.marker_value == 1);
// The value should be replaced by the shard id == 0.
assert(memref.marker.marker_value == 0);
break;
case 2:
assert(type_is_instr(memref.instr.type));
Expand All @@ -3308,12 +3310,14 @@ test_replay_skip()
case 4:
assert(memref.marker.type == TRACE_TYPE_MARKER);
assert(memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP);
assert(memref.marker.marker_value == 40);
// The value should be replaced by a synthetic value: the initial (10)
// won't have advanced to the next microsecond.
assert(memref.marker.marker_value == 10);
break;
case 5:
assert(memref.marker.type == TRACE_TYPE_MARKER);
assert(memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID);
assert(memref.marker.marker_value == 4);
assert(memref.marker.marker_value == 0);
break;
case 6:
assert(type_is_instr(memref.instr.type));
Expand All @@ -3322,12 +3326,12 @@ test_replay_skip()
case 7:
assert(memref.marker.type == TRACE_TYPE_MARKER);
assert(memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP);
assert(memref.marker.marker_value == 50);
assert(memref.marker.marker_value == 10);
break;
case 8:
assert(memref.marker.type == TRACE_TYPE_MARKER);
assert(memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID);
assert(memref.marker.marker_value == 5);
assert(memref.marker.marker_value == 0);
break;
case 9:
assert(type_is_instr(memref.instr.type));
Expand Down Expand Up @@ -6112,6 +6116,98 @@ test_exit_early()
}
}

static void
test_marker_updates()
{
std::cerr << "\n----------------\nTesting marker updates\n";
static constexpr int NUM_INPUTS = 5;
static constexpr int NUM_OUTPUTS = 3;
// We need at least enough instrs to cover INSTRS_PER_US==2000.
static constexpr int NUM_INSTRS = 10000;
static constexpr memref_tid_t TID_BASE = 100;
static constexpr uint64_t TIMESTAMP_BASE = 12340000;

std::vector<trace_entry_t> inputs[NUM_INPUTS];

std::minstd_rand rand_gen;
rand_gen.seed(static_cast<int>(reinterpret_cast<int64_t>(&inputs[0])));

for (int i = 0; i < NUM_INPUTS; i++) {
memref_tid_t tid = TID_BASE + i;
inputs[i].push_back(make_thread(tid));
inputs[i].push_back(make_pid(1));
inputs[i].push_back(make_version(TRACE_ENTRY_VERSION));
// Add a randomly-increasing-value timestamp.
uint64_t cur_timestamp = TIMESTAMP_BASE;
cur_timestamp += rand_gen();
inputs[i].push_back(make_timestamp(cur_timestamp));
// Add a cpuid with a random value.
inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_CPU_ID, rand_gen()));
for (int j = 0; j < NUM_INSTRS; j++) {
inputs[i].push_back(make_instr(42 + j * 4));
// Add a randomly-increasing-value timestamp.
cur_timestamp += rand_gen();
inputs[i].push_back(make_timestamp(cur_timestamp));
// Add a cpuid with a random value.
inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_CPU_ID, rand_gen()));
}
inputs[i].push_back(make_exit(tid));
}
std::vector<scheduler_t::input_workload_t> sched_inputs;
for (int i = 0; i < NUM_INPUTS; i++) {
std::vector<scheduler_t::input_reader_t> readers;
readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(inputs[i])),
std::unique_ptr<mock_reader_t>(new mock_reader_t()),
TID_BASE + i);
sched_inputs.emplace_back(std::move(readers));
}
scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
scheduler_t::DEPENDENCY_IGNORE,
scheduler_t::SCHEDULER_DEFAULTS,
/*verbosity=*/2);
scheduler_t scheduler;
if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) !=
scheduler_t::STATUS_SUCCESS)
assert(false);
std::vector<scheduler_t::stream_t *> outputs(NUM_OUTPUTS, nullptr);
std::vector<uintptr_t> last_timestamp(NUM_OUTPUTS, 0);
std::vector<bool> eof(NUM_OUTPUTS, false);
for (int i = 0; i < NUM_OUTPUTS; i++)
outputs[i] = scheduler.get_stream(i);
int num_eof = 0;
while (num_eof < NUM_OUTPUTS) {
for (int i = 0; i < NUM_OUTPUTS; i++) {
if (eof[i])
continue;
memref_t memref;
scheduler_t::stream_status_t status = outputs[i]->next_record(memref);
if (status == scheduler_t::STATUS_EOF) {
++num_eof;
eof[i] = true;
continue;
}
if (status == scheduler_t::STATUS_IDLE)
continue;
assert(status == scheduler_t::STATUS_OK);
if (memref.marker.type != TRACE_TYPE_MARKER)
continue;
// Make sure the random values have some order now, satisfying invariants.
if (memref.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP) {
assert(memref.marker.marker_value >= last_timestamp[i]);
last_timestamp[i] = memref.marker.marker_value;
} else if (memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID) {
assert(memref.marker.marker_value ==
static_cast<uintptr_t>(outputs[i]->get_shard_index()));
}
}
}
// Ensure we didn't short-circuit or exit early.
uint64_t instrs_seen = 0;
for (int i = 0; i < NUM_OUTPUTS; i++)
instrs_seen += outputs[i]->get_instruction_ordinal();
assert(instrs_seen == NUM_INPUTS * NUM_INSTRS);
}

int
test_main(int argc, const char *argv[])
{
Expand Down Expand Up @@ -6154,6 +6250,7 @@ test_main(int argc, const char *argv[])
test_record_scheduler();
test_rebalancing();
test_exit_early();
test_marker_updates();

dr_standalone_exit();
return 0;
Expand Down

0 comments on commit aeebb11

Please sign in to comment.