diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml index 2ba40d9a760..70ced1ce403 100644 --- a/.github/workflows/ci-docs.yml +++ b/.github/workflows/ci-docs.yml @@ -90,7 +90,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi diff --git a/.github/workflows/ci-package.yml b/.github/workflows/ci-package.yml index 9bff2302ddc..abf38d6078e 100644 --- a/.github/workflows/ci-package.yml +++ b/.github/workflows/ci-package.yml @@ -103,7 +103,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -195,7 +195,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -283,7 +283,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -371,7 +371,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -451,7 +451,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -536,7 +536,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER="10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))" + export VERSION_NUMBER="10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))" export PREFIX="cronbuild-" else export VERSION_NUMBER=${{ github.event.inputs.version }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 2463024ee8c..8463e376f99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -575,7 +575,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn") # N.B.: When updating this, update all the default versions in ci-package.yml # and ci-docs.yml. We should find a way to share (xref i#1565). -set(VERSION_NUMBER_DEFAULT "10.90.${VERSION_NUMBER_PATCHLEVEL}") +set(VERSION_NUMBER_DEFAULT "10.91.${VERSION_NUMBER_PATCHLEVEL}") # do not store the default VERSION_NUMBER in the cache to prevent a stale one # from preventing future version updates in a pre-existing build dir set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default") diff --git a/api/docs/release.dox b/api/docs/release.dox index f108f45f4a5..a407e16edfb 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -150,6 +150,10 @@ changes: - Changed the type of the AArch64 #dr_mcontext_t members svep and ffr to #dr_svep_t. This breaks binary compatibility with clients that were built against versions of DynamoRIO before this change. + - Changed #dynamorio::drmemtrace::record_filter_t::record_filter_func_t + parallel_shard_filter() interface. Added a new parameter of type + #dynamorio::drmemtrace::record_filter_t::record_filter_info_t that allows + #dynamorio::drmemtrace::record_filter_t to share data with its filters. Further non-compatibility-affecting changes include: - Added DWARF-5 support to the drsyms library by linking in 4 static libraries @@ -226,6 +230,10 @@ Further non-compatibility-affecting changes include: purpose of preserving register dependencies. - Added instr_convert_to_isa_regdeps() API that converts an #instr_t from a real ISA (e.g., #DR_ISA_AMD64) to the #DR_ISA_REGDEPS synthetic ISA. + - Added encodings2regdeps_filter_t filter to #dynamorio::drmemtrace::record_filter_t to + generate #DR_ISA_REGDEPS traces. + - Added #dynamorio::drmemtrace::OFFLINE_FILE_TYPE_ARCH_REGDEPS file type for + #DR_ISA_REGDEPS traces. **************************************************
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 75b366ed495..9ec0d0ddc6c 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -192,12 +192,15 @@ add_exported_library(drmemtrace_simulator STATIC ) add_exported_library(drmemtrace_record_filter STATIC + tools/filter/record_filter.h tools/filter/record_filter.cpp tools/filter/cache_filter.h tools/filter/cache_filter.cpp tools/filter/type_filter.h + tools/filter/encodings2regdeps_filter.h tools/filter/null_filter.h) target_link_libraries(drmemtrace_record_filter drmemtrace_simulator) +configure_DynamoRIO_standalone(drmemtrace_record_filter) add_exported_library(directory_iterator STATIC common/directory_iterator.cpp) add_dependencies(directory_iterator api_headers) @@ -357,6 +360,8 @@ install_client_nonDR_header(drmemtrace simulator/cache_simulator_create.h) install_client_nonDR_header(drmemtrace simulator/tlb_simulator_create.h) install_client_nonDR_header(drmemtrace tools/view_create.h) install_client_nonDR_header(drmemtrace tools/func_view_create.h) +install_client_nonDR_header(drmemtrace tools/filter/record_filter_create.h) +install_client_nonDR_header(drmemtrace tools/filter/record_filter.h) # TODO i#6412: Create a separate directory for non-tracer headers so that # we can more cleanly separate tracer and raw2trace code. install_client_nonDR_header(drmemtrace tracer/raw2trace.h) @@ -393,8 +398,12 @@ add_executable(record_filter_launcher tools/record_filter_launcher.cpp tests/test_helpers.cpp) target_link_libraries(record_filter_launcher drmemtrace_analyzer drmemtrace_record_filter) +add_dependencies(record_filter_launcher api_headers) append_property_list(TARGET record_filter_launcher COMPILE_DEFINITIONS "NO_HELPER_MAIN") use_DynamoRIO_extension(record_filter_launcher droption) +if (NOT APPLE) + configure_DynamoRIO_static(record_filter_launcher) +endif() # We want to use test_helper's disable_popups() but we have _tmain and so do not want # the test_helper library's main symbol: so we compile ourselves and disable. diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index fb71f986f76..045f05bfd6f 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -334,7 +334,8 @@ record_analyzer_multi_t::create_analysis_tool_from_options( op_outdir.get_value(), op_filter_stop_timestamp.get_value(), op_filter_cache_size.get_value(), op_filter_trace_types.get_value(), op_filter_marker_types.get_value(), op_trim_before_timestamp.get_value(), - op_trim_after_timestamp.get_value(), op_verbose.get_value()); + op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(), + op_verbose.get_value()); } ERRMSG("Usage error: unsupported record analyzer type \"%s\". Only " RECORD_FILTER " is supported.\n", diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 7105192347b..39ab5112f6f 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -972,6 +972,16 @@ droption_t "Comma-separated integers for marker types to remove. " "See trace_marker_type_t for the list of marker types."); +/* XXX i#6369: we should partition our options by tool. This one should belong to the + * record_filter partition. For now we add the filter_ prefix to options that should be + * used in conjunction with record_filter. + */ +droption_t op_encodings2regdeps( + DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false, + "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", + "This option is for -simulator_type " RECORD_FILTER ". When present, it converts " + "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA."); + droption_t op_trim_before_timestamp( DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0, (std::numeric_limits::max)(), diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 316d1acf063..132f974c018 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -214,6 +214,7 @@ extern dynamorio::droption::droption_t op_filter_stop_timestamp; extern dynamorio::droption::droption_t op_filter_cache_size; extern dynamorio::droption::droption_t op_filter_trace_types; extern dynamorio::droption::droption_t op_filter_marker_types; +extern dynamorio::droption::droption_t op_encodings2regdeps; extern dynamorio::droption::droption_t op_trim_before_timestamp; extern dynamorio::droption::droption_t op_trim_after_timestamp; extern dynamorio::droption::droption_t op_abort_on_invariant_error; diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 49fd8528604..5b26180ba5c 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -665,6 +665,18 @@ type_is_instr(const trace_type_t type) type == TRACE_TYPE_INSTR_UNTAKEN_JUMP; } +/** + * Returns whether \p type represents any type of instruction record whether an + * instruction fetch or operation hint. This is a superset of type_is_instr() and includes + * #TRACE_TYPE_INSTR_NO_FETCH. + */ +static inline bool +is_any_instr_type(const trace_type_t type) +{ + return type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH || + type == TRACE_TYPE_INSTR_NO_FETCH; +} + /** Returns whether the type represents the fetch of a branch instruction. */ static inline bool type_is_instr_branch(const trace_type_t type) @@ -889,9 +901,6 @@ typedef enum { OFFLINE_FILE_TYPE_ARCH_ARM32 = 0x10, /**< Recorded on ARM (32-bit). */ OFFLINE_FILE_TYPE_ARCH_X86_32 = 0x20, /**< Recorded on x86 (32-bit). */ OFFLINE_FILE_TYPE_ARCH_X86_64 = 0x40, /**< Recorded on x86 (64-bit). */ - OFFLINE_FILE_TYPE_ARCH_ALL = OFFLINE_FILE_TYPE_ARCH_AARCH64 | - OFFLINE_FILE_TYPE_ARCH_ARM32 | OFFLINE_FILE_TYPE_ARCH_X86_32 | - OFFLINE_FILE_TYPE_ARCH_X86_64, /**< All possible architecture types. */ /** * Instruction addresses filtered online. * Note: this file type may transition to non-filtered. If so, the transition is @@ -968,19 +977,36 @@ typedef enum { * Each trace shard represents one core and contains interleaved software threads. */ OFFLINE_FILE_TYPE_CORE_SHARDED = 0x10000, + /** + * Trace filtered by the record_filter tool using -filter_encodings2regdeps. + * The encodings2regdeps filter replaces real ISA encodings with #DR_ISA_REGDEPS + * encodings. Note that these encoding changes do not update the instruction length, + * hence encoding size and instruction fetch size may not match. + */ + OFFLINE_FILE_TYPE_ARCH_REGDEPS = 0x20000, + /** + * All possible architecture types, including synthetic ones. + */ + OFFLINE_FILE_TYPE_ARCH_ALL = OFFLINE_FILE_TYPE_ARCH_AARCH64 | + OFFLINE_FILE_TYPE_ARCH_ARM32 | OFFLINE_FILE_TYPE_ARCH_X86_32 | + OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ARCH_REGDEPS, } offline_file_type_t; static inline const char * trace_arch_string(offline_file_type_t type) { - return TESTANY(OFFLINE_FILE_TYPE_ARCH_AARCH64, type) - ? "aarch64" - : (TESTANY(OFFLINE_FILE_TYPE_ARCH_ARM32, type) - ? "arm" - : (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_32, type) - ? "i386" - : (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_64, type) ? "x86_64" - : "unspecified"))); + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_AARCH64, type)) + return "aarch64"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ARM32, type)) + return "arm"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_32, type)) + return "i386"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_64, type)) + return "x86_64"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, type)) + return "regdeps"; + else + return "unspecified"; } /* We have non-client targets including this header that do not include API diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index b938e06dfc6..93489f6c631 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -210,7 +210,12 @@ reader_t::process_input_entry() ++cur_instr_count_; // Look for encoding bits that belong to this instr. if (last_encoding_.size > 0) { - if (last_encoding_.size != cur_ref_.instr.size) { + if (last_encoding_.size != cur_ref_.instr.size && + /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with + * size != ifetch. It's a design choice, not an error, hence + * we avoid this sanity check for these traces. + */ + !TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_)) { ERRMSG( "Encoding size %zu != instr size %zu for PC 0x%zx at ord %" PRIu64 " instr %" PRIu64 " last_timestamp=0x%" PRIx64 "\n", diff --git a/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex new file mode 100644 index 00000000000..a9271341d91 --- /dev/null +++ b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex @@ -0,0 +1,11 @@ +Hello, world! + +Trace invariant checks passed + +Output .* entries from .* entries. + +Opcode mix tool results: + + *[0-9]* : total executed instructions + *[0-9]* : +.* diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 0f7dd36975a..c20a10a3034 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -42,6 +42,8 @@ #include "tools/filter/record_filter.h" #include "tools/filter/trim_filter.h" #include "tools/filter/type_filter.h" +#include "tools/filter/encodings2regdeps_filter.h" +#include "trace_entry.h" #include "zipfile_ostream.h" #include @@ -287,6 +289,144 @@ process_entries_and_check_result(test_record_filter_t *record_filter, return true; } +static bool +test_encodings2regdeps_filter() +{ + constexpr addr_t PC = 0x7f6fdd3ec360; + constexpr addr_t PC2 = 0x7f6fdd3eb1f7; + // constexpr addr_t PC2 = 0x00007f6fdd3eb1f7; + constexpr addr_t PC3 = 0x7f6fdd3eb21a; + constexpr addr_t ENCODING_REAL_ISA = 0xe78948; + constexpr addr_t ENCODING_REAL_ISA_2_PART1 = 0x841f0f66; + constexpr addr_t ENCODING_REAL_ISA_2_PART2 = 0x0; + constexpr addr_t ENCODING_REAL_ISA_3 = 0xab48f3; + constexpr addr_t ENCODING_REGDEPS_ISA = 0x0006090600010011; + constexpr addr_t ENCODING_REGDEPS_ISA_2 = 0x0000020400004010; + constexpr addr_t ENCODING_REGDEPS_ISA_3_PART1 = 0x0209030600001042; + constexpr addr_t ENCODING_REGDEPS_ISA_3_PART2 = 0x0000000000220903; + std::vector entries = { + /* Trace shard header. + */ + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, + /* File type, modified by record_filter encodings2regdeps to add + * OFFLINE_FILE_TYPE_ARCH_REGDEPS. + */ + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ENCODINGS | + OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } }, + true, + { false } }, + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ARCH_REGDEPS | OFFLINE_FILE_TYPE_ENCODINGS | + OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } }, + false, + { true } }, + { { TRACE_TYPE_THREAD, 0, { 0x4 } }, true, { true } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 0x3 } }, + true, + { true } }, + + /* Chunk 1. + */ + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + /* Encoding, modified by the record_filter encodings2regdeps. + * encoding real ISA size == encoding regdeps ISA size + * (in terms of trace_entry_t). + */ + { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { true } }, + + /* Chunk 2. + */ + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xa } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + /* Duplicated encoding across chunk boundary. + */ + { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 1 } }, true, { true } }, + + /* Chunk 3. + */ + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xe } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + /* encoding real ISA size > encoding regdeps ISA size + */ + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REAL_ISA_2_PART1 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 1, { ENCODING_REAL_ISA_2_PART2 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA_2 } }, false, { true } }, + { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } }, + { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } }, + { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 2 } }, true, { true } }, + + /* Chunk 4. + */ + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0x12 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + /* encoding real ISA size < encoding regdeps ISA size + */ + { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA_3 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA_3_PART1 } }, false, { true } }, + { { TRACE_TYPE_ENCODING, 4, { ENCODING_REGDEPS_ISA_3_PART2 } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC3 } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC3 } }, true, { true } }, + + /* Trace shard footer. + */ + { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } }, + }; + + /* Construct encodings2regdeps_filter. + */ + std::vector> filters; + auto encodings2regdeps_filter = std::unique_ptr( + new dynamorio::drmemtrace::encodings2regdeps_filter_t()); + if (encodings2regdeps_filter->get_error_string() != "") { + fprintf(stderr, "Couldn't construct a encodings2regdeps_filter %s", + encodings2regdeps_filter->get_error_string().c_str()); + return false; + } + filters.push_back(std::move(encodings2regdeps_filter)); + + /* Construct record_filter_t. + */ + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + + /* Run the test. + */ + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) + return false; + + fprintf(stderr, "test_encodings2regdeps_filter passed\n"); + return true; +} + static bool test_cache_and_type_filter() { @@ -541,7 +681,9 @@ test_chunk_update() return nullptr; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { bool res = true; if (type_is_instr(static_cast(entry.type))) { @@ -1105,7 +1247,7 @@ test_main(int argc, const char *argv[]) droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str()); } if (!test_cache_and_type_filter() || !test_chunk_update() || !test_trim_filter() || - !test_null_filter() || !test_wait_filter()) + !test_null_filter() || !test_wait_filter() || !test_encodings2regdeps_filter()) return 1; fprintf(stderr, "All done!\n"); return 0; diff --git a/clients/drcachesim/tools/filter/cache_filter.cpp b/clients/drcachesim/tools/filter/cache_filter.cpp index 6dae8b80ac4..470bc3b958f 100644 --- a/clients/drcachesim/tools/filter/cache_filter.cpp +++ b/clients/drcachesim/tools/filter/cache_filter.cpp @@ -87,7 +87,9 @@ cache_filter_t::parallel_shard_init(memtrace_stream_t *shard_stream, return per_shard; } bool -cache_filter_t::parallel_shard_filter(trace_entry_t &entry, void *shard_data) +cache_filter_t::parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) { if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) { if (filter_instrs_) diff --git a/clients/drcachesim/tools/filter/cache_filter.h b/clients/drcachesim/tools/filter/cache_filter.h index 677a027bd11..d94152b6c19 100644 --- a/clients/drcachesim/tools/filter/cache_filter.h +++ b/clients/drcachesim/tools/filter/cache_filter.h @@ -55,7 +55,9 @@ class cache_filter_t : public record_filter_t::record_filter_func_t { parallel_shard_init(memtrace_stream_t *shard_stream, bool partial_trace_filter) override; bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override; + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override; bool parallel_shard_exit(void *shard_data) override; diff --git a/clients/drcachesim/tools/filter/encodings2regdeps_filter.h b/clients/drcachesim/tools/filter/encodings2regdeps_filter.h new file mode 100644 index 00000000000..cf521c176ab --- /dev/null +++ b/clients/drcachesim/tools/filter/encodings2regdeps_filter.h @@ -0,0 +1,210 @@ +/* ********************************************************** + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _ENCODING_FILTER_H_ +#define _ENCODING_FILTER_H_ 1 + +#include "record_filter.h" +#include "trace_entry.h" +#include "utils.h" + +#include +#include + +/* We are not exporting the defines in core/ir/isa_regdeps/encoding_common.h, so we + * redefine DR_ISA_REGDEPS alignment requirement here. + */ +#define REGDEPS_ALIGN_BYTES 4 + +#define REGDEPS_MAX_ENCODING_LENGTH 16 + +namespace dynamorio { +namespace drmemtrace { + +/* This filter changes the encoding of trace_entry_t and generates discrepancies between + * encoding size and instruction length. So, we need to tell reader_t, which here comes in + * the form of memref_counter_t used in record_filter, to ignore such discrepancies. We do + * so by adding OFFLINE_FILE_TYPE_ARCH_REGDEPS to the file type of the filtered trace. + * Note that simulators that deal with these filtered traces will also have to handle the + * fact that encoding_size != instruction_length. + */ +class encodings2regdeps_filter_t : public record_filter_t::record_filter_func_t { +public: + encodings2regdeps_filter_t() + { + } + + void * + parallel_shard_init(memtrace_stream_t *shard_stream, + bool partial_trace_filter) override + { + dcontext_.dcontext = dr_standalone_init(); + return nullptr; + } + + bool + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override + { + std::vector *last_encoding = record_filter_info.last_encoding; + + /* Modify file_type to regdeps ISA, removing the real ISA of the input trace. + */ + trace_type_t entry_type = static_cast(entry.type); + if (entry_type == TRACE_TYPE_MARKER) { + trace_marker_type_t marker_type = + static_cast(entry.size); + if (marker_type == TRACE_MARKER_TYPE_FILETYPE) { + uint64_t marker_value = static_cast(entry.addr); + marker_value = update_filetype(marker_value); + entry.addr = static_cast(marker_value); + } + } + + /* We have encoding to convert. + * Normally the sequence of trace_entry_t(s) looks like: + * [TRACE_TYPE_ENCODING,]+ [TRACE_TYPE_MARKER.TRACE_MARKER_TYPE_BRANCH_TARGET,] + * TRACE_TYPE_INSTR_, [TRACE_TYPE_READ | TRACE_TYPE_WRITE]* + * ([] = zero or one, + = one or more, * = zero or more) + * If we enter here, trace_entry_t is some TRACE_TYPE_INSTR_ for which + * last_encoding already contains its encoding. + */ + if (is_any_instr_type(static_cast(entry.type)) && + !last_encoding->empty()) { + /* Gather real ISA encoding bytes looping through all previously saved + * encoding bytes in last_encoding. + */ + const app_pc pc = reinterpret_cast(entry.addr); + byte encoding[MAX_ENCODING_LENGTH]; + memset(encoding, 0, sizeof(encoding)); + uint encoding_offset = 0; + for (auto &trace_encoding : *last_encoding) { + memcpy(encoding + encoding_offset, trace_encoding.encoding, + trace_encoding.size); + encoding_offset += trace_encoding.size; + } + + /* Genenerate the real ISA instr_t by decoding the encoding bytes. + */ + instr_t instr; + instr_init(dcontext_.dcontext, &instr); + app_pc next_pc = decode_from_copy(dcontext_.dcontext, encoding, pc, &instr); + if (next_pc == NULL || !instr_valid(&instr)) { + instr_free(dcontext_.dcontext, &instr); + error_string_ = + "Failed to decode instruction " + to_hex_string(entry.addr); + return false; + } + + /* Convert the real ISA instr_t into a regdeps ISA instr_t. + */ + instr_t instr_regdeps; + instr_init(dcontext_.dcontext, &instr_regdeps); + instr_convert_to_isa_regdeps(dcontext_.dcontext, &instr, &instr_regdeps); + instr_free(dcontext_.dcontext, &instr); + + /* Obtain regdeps ISA instr_t encoding bytes. + */ + byte ALIGN_VAR(REGDEPS_ALIGN_BYTES) + encoding_regdeps[REGDEPS_MAX_ENCODING_LENGTH]; + memset(encoding_regdeps, 0, sizeof(encoding_regdeps)); + app_pc next_pc_regdeps = + instr_encode(dcontext_.dcontext, &instr_regdeps, encoding_regdeps); + instr_free(dcontext_.dcontext, &instr_regdeps); + if (next_pc_regdeps == NULL) { + error_string_ = + "Failed to encode regdeps instruction " + to_hex_string(entry.addr); + return false; + } + + /* Compute number of trace_entry_t to contain regdeps ISA encoding. + * Each trace_entry_t record can contain pointer-sized byte encoding + * (i.e., 4 bytes for 32 bits architectures and 8 bytes for 64 bits). + */ + uint trace_entry_encoding_size = static_cast(sizeof(entry.addr)); + uint regdeps_encoding_size = + static_cast(next_pc_regdeps - encoding_regdeps); + uint num_regdeps_encoding_entries = + ALIGN_FORWARD(regdeps_encoding_size, trace_entry_encoding_size) / + trace_entry_encoding_size; + last_encoding->resize(num_regdeps_encoding_entries); + + /* Copy regdeps ISA encoding, splitting it among the last_encoding + * trace_entry_t records. + */ + uint regdeps_encoding_offset = 0; + for (trace_entry_t &encoding_entry : *last_encoding) { + encoding_entry.type = TRACE_TYPE_ENCODING; + uint size = std::min(regdeps_encoding_size, trace_entry_encoding_size); + encoding_entry.size = static_cast(size); + memset(encoding_entry.encoding, 0, trace_entry_encoding_size); + memcpy(encoding_entry.encoding, + encoding_regdeps + regdeps_encoding_offset, encoding_entry.size); + regdeps_encoding_size -= encoding_entry.size; + regdeps_encoding_offset += encoding_entry.size; + } + } + return true; + } + + bool + parallel_shard_exit(void *shard_data) override + { + return true; + } + + uint64_t + update_filetype(uint64_t filetype) override + { + filetype &= ~OFFLINE_FILE_TYPE_ARCH_ALL; + filetype |= OFFLINE_FILE_TYPE_ARCH_REGDEPS; + return filetype; + } + +private: + struct dcontext_cleanup_last_t { + public: + ~dcontext_cleanup_last_t() + { + if (dcontext != nullptr) + dr_standalone_exit(); + } + void *dcontext = nullptr; + }; + + dcontext_cleanup_last_t dcontext_; +}; + +} // namespace drmemtrace +} // namespace dynamorio +#endif /* _ENCODING_FILTER_H_ */ diff --git a/clients/drcachesim/tools/filter/null_filter.h b/clients/drcachesim/tools/filter/null_filter.h index e76f6e0df44..9ebf5377bd4 100644 --- a/clients/drcachesim/tools/filter/null_filter.h +++ b/clients/drcachesim/tools/filter/null_filter.h @@ -47,7 +47,9 @@ class null_filter_t : public record_filter_t::record_filter_func_t { return nullptr; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { return true; } diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 4f0b9cfb68a..2afd74b9fa4 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -60,6 +60,7 @@ #include "cache_filter.h" #include "trim_filter.h" #include "type_filter.h" +#include "encodings2regdeps_filter.h" #undef VPRINT #ifdef DEBUG @@ -83,13 +84,6 @@ namespace drmemtrace { namespace { -bool -is_any_instr_type(trace_type_t type) -{ - return type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH || - type == TRACE_TYPE_INSTR_NO_FETCH; -} - template std::vector parse_string(const std::string &s, char sep = ',') @@ -113,7 +107,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp int cache_filter_size, const std::string &remove_trace_types, const std::string &remove_marker_types, uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, - unsigned int verbose) + bool encodings2regdeps, unsigned int verbose) { std::vector< std::unique_ptr> @@ -143,6 +137,12 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp new dynamorio::drmemtrace::trim_filter_t(trim_before_timestamp, trim_after_timestamp))); } + if (encodings2regdeps) { + filter_funcs.emplace_back( + std::unique_ptr( + new dynamorio::drmemtrace::encodings2regdeps_filter_t())); + } + // TODO i#5675: Add other filters. return new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), @@ -386,6 +386,7 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data, success_ = false; } } + per_shard->record_filter_info.last_encoding = &per_shard->last_encoding; std::lock_guard guard(shard_map_mutex_); shard_map_[shard_index] = per_shard; return reinterpret_cast(per_shard); @@ -588,7 +589,10 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t & // XXX: What if there is a filter removing all encodings but only // to the stop point, so a partial remove that does not change // the filetype? For now we do not support that, and we re-add - // encodings at chunk boundaries regardless. + // encodings at chunk boundaries regardless. Note that filters that modify + // encodings (even if they add or remove trace_entry_t records) do not incur in + // this problem and we don't need support for partial removal of encodings in this + // case. An example of such filters is encodings2regdeps_filter_t. if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype) && per_shard->cur_chunk_pcs.find(entry.addr) == per_shard->cur_chunk_pcs.end()) { if (per_shard->per_input == nullptr) @@ -605,14 +609,19 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t & per_shard->chunk_ordinal, per_shard->cur_refs); // Sanity check that the encoding size is correct. const auto &enc = per_shard->per_input->pc2encoding[entry.addr]; - size_t enc_sz = 0; - // Since all but the last entry are fixed-size we could avoid a loop - // but the loop is easier to read and we have just 1 or 2 iters. - for (const auto &record : enc) - enc_sz += record.size; - if (enc_sz != entry.size) { - return "New-chunk encoding size " + std::to_string(enc_sz) + - " != instr size " + std::to_string(entry.size); + /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with size != ifetch. + * It's a design choice, not an error, hence we avoid this sanity check. + */ + if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, per_shard->filetype)) { + size_t enc_sz = 0; + // Since all but the last entry are fixed-size we could avoid a loop + // but the loop is easier to read and we have just 1 or 2 iters. + for (const auto &record : enc) + enc_sz += record.size; + if (enc_sz != entry.size) { + return "New-chunk encoding size " + std::to_string(enc_sz) + + " != instr size " + std::to_string(entry.size); + } } if (!write_trace_entries(per_shard, enc)) { return "Failed to write"; @@ -641,7 +650,10 @@ record_filter_t::process_delayed_encodings(per_shard_t *per_shard, trace_entry_t } else if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype)) { // Output if we have encodings that haven't yet been output, and // there is no filter removing all encodings (we don't support - // partial encoding removal). + // partial encoding removal). Note that filters that modify encodings (even if + // they add or remove trace_entry_t records) do not incur in this problem and we + // don't need support for partial removal of encodings in this case. An example + // of such filters is encodings2regdeps_filter_t. // We check prev_was_output to rule out filtered-out encodings // (we record all encodings for new-chunk insertion). if (!per_shard->last_encoding.empty() && per_shard->prev_was_output) { @@ -739,7 +751,8 @@ record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &in if (per_shard->enabled) { for (int i = 0; i < static_cast(filters_.size()); ++i) { if (!filters_[i]->parallel_shard_filter(entry, - per_shard->filter_shard_data[i])) { + per_shard->filter_shard_data[i], + per_shard->record_filter_info)) { output = false; } if (!filters_[i]->get_error_string().empty()) { diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index f5550b39cbd..50a76e23268 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -61,6 +61,17 @@ namespace drmemtrace { */ class record_filter_t : public record_analysis_tool_t { public: + /** + * Interface for the record_filter to share data with its filters. + */ + struct record_filter_info_t { + /** + * Stores the encoding of an instructions, which may be split among more than one + * #trace_entry_t, hence the vector. + */ + std::vector *last_encoding; + }; + /** * The base class for a single filter. */ @@ -86,17 +97,20 @@ class record_filter_t : public record_analysis_tool_t { /** * Invoked for each #trace_entry_t in the shard. It returns * whether or not this \p entry should be included in the result - * trace. \p shard_data is same as what was returned by - * parallel_shard_init(). The given \p entry is included in the result - * trace iff all provided #record_filter_func_t return true. The - * \p entry parameter can also be modified by the record_filter_func_t. + * trace. \p shard_data is same as what was returned by parallel_shard_init(). + * The given \p entry is included in the result trace iff all provided + * #dynamorio::drmemtrace::record_filter_t::record_filter_func_t return true. + * The \p entry parameter can also be modified by the record_filter_func_t. * The passed \p entry is not guaranteed to be the original one from * the trace if other filter tools are present, and may include changes * made by other tools. * An error is indicated by setting error_string_ to a non-empty value. + * \p record_filter_info is the interface used by record_filter to + * share data with its filters. */ virtual bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) = 0; + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + record_filter_info_t &record_filter_info) = 0; /** * Invoked when all #trace_entry_t in a shard have been processed * by parallel_shard_filter(). \p shard_data is same as what was @@ -114,6 +128,17 @@ class record_filter_t : public record_analysis_tool_t { return error_string_; } + /** + * If a filter modifies the file type of a trace, its changes should be made here, + * so they are visible to the record_filter even if the #trace_entry_t containing + * the file type marker is not modified directly by the filter. + */ + virtual uint64_t + update_filetype(uint64_t filetype) + { + return filetype; + } + protected: std::string error_string_; }; @@ -188,6 +213,7 @@ class record_filter_t : public record_analysis_tool_t { trace_entry_t last_written_record; // Cached value updated on context switches. per_input_t *per_input = nullptr; + record_filter_info_t record_filter_info; }; virtual std::string @@ -248,11 +274,14 @@ class record_filter_t : public record_analysis_tool_t { inline uint64_t add_to_filetype(uint64_t filetype) { - if (stop_timestamp_ != 0) { + if (stop_timestamp_ != 0) filetype |= OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP; - } - if (shard_type_ == SHARD_BY_CORE) { + if (shard_type_ == SHARD_BY_CORE) filetype |= OFFLINE_FILE_TYPE_CORE_SHARDED; + /* If filters modify the file type, add their changes here. + */ + for (auto &filter : filters_) { + filetype = filter->update_filetype(filetype); } return filetype; } diff --git a/clients/drcachesim/tools/filter/record_filter_create.h b/clients/drcachesim/tools/filter/record_filter_create.h index ae0665f3fef..7e3b750b595 100644 --- a/clients/drcachesim/tools/filter/record_filter_create.h +++ b/clients/drcachesim/tools/filter/record_filter_create.h @@ -62,13 +62,16 @@ namespace drmemtrace { * up to its first timestamp whose value is greater or equal to this parameter. * @param[in] trim_after_timestamp Trim records after the trace's first timestamp * whose value is greater than this parameter. + * @param[in] encodings2regdeps If true, converts instruction encodings from the real ISA + * of the input trace to the #DR_ISA_REGDEPS synthetic ISA. + * @param[in] verbose Verbosity level for notifications. */ record_analysis_tool_t * record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp, int cache_filter_size, const std::string &remove_trace_types, const std::string &remove_marker_types, uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, - unsigned int verbose); + bool encodings2regdeps, unsigned int verbose); } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/filter/trim_filter.h b/clients/drcachesim/tools/filter/trim_filter.h index 30b8395156f..0a7c464347b 100644 --- a/clients/drcachesim/tools/filter/trim_filter.h +++ b/clients/drcachesim/tools/filter/trim_filter.h @@ -69,7 +69,9 @@ class trim_filter_t : public record_filter_t::record_filter_func_t { return per_shard; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { per_shard_t *per_shard = reinterpret_cast(shard_data); if (entry.type == TRACE_TYPE_MARKER && diff --git a/clients/drcachesim/tools/filter/type_filter.h b/clients/drcachesim/tools/filter/type_filter.h index 3dda5c0592f..48d6aaef1db 100644 --- a/clients/drcachesim/tools/filter/type_filter.h +++ b/clients/drcachesim/tools/filter/type_filter.h @@ -86,7 +86,9 @@ class type_filter_t : public record_filter_t::record_filter_func_t { return per_shard; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { per_shard_t *per_shard = reinterpret_cast(shard_data); if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) { diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 995c810a578..58c4c75533e 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -157,7 +157,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { shard->filetype = static_cast(memref.marker.marker_value); - if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL, memref.marker.marker_value) && + /* We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS + * is not a real ISA and can coexist with any real architecture. + */ + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, + memref.marker.marker_value) && !TESTANY(build_target_arch_type(), memref.marker.marker_value)) { shard->error = std::string("Architecture mismatch: trace recorded on ") + trace_arch_string(static_cast( @@ -165,6 +169,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) " but tool built for " + trace_arch_string(build_target_arch_type()); return false; } + /* If we are dealing with a regdeps trace, we need to set the dcontext ISA mode + * to the correct synthetic ISA (i.e., DR_ISA_REGDEPS). + */ + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, memref.marker.marker_value)) + dr_set_isa_mode(dcontext_.dcontext, DR_ISA_REGDEPS, nullptr); } else if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { #ifdef AARCH64 diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp index 746481d79d5..53adf2ca1d8 100644 --- a/clients/drcachesim/tools/record_filter_launcher.cpp +++ b/clients/drcachesim/tools/record_filter_launcher.cpp @@ -121,6 +121,15 @@ static droption_t op_trim_after_timestamp( "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with " "timestamp larger than the specified value."); +/* XXX i#6369: we should partition our options by tool. This one should belong to the + * record_filter partition. For now we add the filter_ prefix to options that should be + * used in conjunction with record_filter. + */ +droption_t op_encodings2regdeps( + DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false, + "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", + "This option is for -simulator_type record_filter. When present, it converts " + "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA."); } // namespace int @@ -150,7 +159,8 @@ _tmain(int argc, const TCHAR *targv[]) op_output_dir.get_value(), op_stop_timestamp.get_value(), op_cache_filter_size.get_value(), op_remove_trace_types.get_value(), op_remove_marker_types.get_value(), op_trim_before_timestamp.get_value(), - op_trim_after_timestamp.get_value(), op_verbose.get_value())); + op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(), + op_verbose.get_value())); std::vector tools; tools.push_back(record_filter.get()); diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index fc88d7897b3..904e8aaf9b3 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -58,6 +58,7 @@ #include "../link.h" #include "decode.h" #include "decode_fast.h" +#include "opcode_api.h" #include "opnd.h" #include "instr_create_shared.h" /* FIXME i#1551: refactor this file and avoid this x86-specific include in base arch/ */ @@ -3142,6 +3143,12 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, */ instr_set_operands_valid(instr_regdeps_isa, true); + /* Set opcode as OP_UNDECODED, so routines like instr_valid() can still work. + * We can't use instr_set_opcode() because of its CLIENT_ASSERT when setting the + * opcode to OP_UNDECODED or OP_INVALID. + */ + instr_regdeps_isa->opcode = OP_UNDECODED; + /* Set converted instruction ISA mode to be DR_ISA_REGDEPS. */ instr_set_isa_mode(instr_regdeps_isa, DR_ISA_REGDEPS); diff --git a/core/ir/isa_regdeps/decode.c b/core/ir/isa_regdeps/decode.c index 2ac0a8a7f7f..e5afec31b33 100644 --- a/core/ir/isa_regdeps/decode.c +++ b/core/ir/isa_regdeps/decode.c @@ -144,6 +144,12 @@ decode_isa_regdeps(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr) */ instr_set_operands_valid(instr, true); + /* Set opcode as OP_UNDECODED, so routines like instr_valid() can still work. + * We can't use instr_set_opcode() because of its CLIENT_ASSERT when setting the + * opcode to OP_UNDECODED or OP_INVALID. + */ + instr->opcode = OP_UNDECODED; + /* Set decoded instruction ISA mode to be synthetic. */ instr_set_isa_mode(instr, DR_ISA_REGDEPS); diff --git a/core/ir/x86/decode.c b/core/ir/x86/decode.c index caceafd62c2..f608afea2db 100644 --- a/core/ir/x86/decode.c +++ b/core/ir/x86/decode.c @@ -2791,6 +2791,13 @@ const char * decode_opcode_name(int opcode) { const instr_info_t *info = op_instr[opcode]; + if (info == NULL) { + switch (opcode) { + case OP_INVALID: return ""; + case OP_UNDECODED: return ""; + default: return ""; + } + } return info->name; } diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 77723d43877..bc66f354c33 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4674,6 +4674,17 @@ if (BUILD_CLIENTS) # it checks encodings). "opcode_mix") + if (X86 AND X64 AND ZLIB_FOUND) + set(testname "tool.record_filter_encodings2regdeps") + torun_record_filter("${testname}" ${ci_shared_app} + "record_filter_encodings2regdeps" + # We assume the app name starts with "s" here to avoid colliding with + # our output dir, while still letting the single precmd remove both. + "${drcachesim_path}@-simulator_type@record_filter@-filter_encodings2regdeps@-indir@${testname}.s*.dir/trace@-core_sharded@-cores@4@-outdir@${testname}.filtered.dir" + # We run the opcode_mix analyzer to test econdings2regdeps filtered traces. + "opcode_mix") + endif () + if (UNIX) # Windows multi-thread tests are too slow. set(testname "tool.record_filter_bycore_multi") torun_record_filter("${testname}" pthreads.ptsig diff --git a/suite/tests/api/ir_regdeps.c b/suite/tests/api/ir_regdeps.c index 5c2f0a30bfb..e4a9edf1914 100644 --- a/suite/tests/api/ir_regdeps.c +++ b/suite/tests/api/ir_regdeps.c @@ -84,7 +84,7 @@ test_instr_encode_decode_synthetic(void *dc, instr_t *instr) /* Check that we do not have an opcode for the converted instruction. */ - ASSERT(instr_get_opcode(instr_synthetic_converted) == OP_INVALID); + ASSERT(instr_get_opcode(instr_synthetic_converted) == OP_UNDECODED); /* Encode the synthetic instruction. */