diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml
index 2ba40d9a760..70ced1ce403 100644
--- a/.github/workflows/ci-docs.yml
+++ b/.github/workflows/ci-docs.yml
@@ -90,7 +90,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
- export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
+ export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
diff --git a/.github/workflows/ci-package.yml b/.github/workflows/ci-package.yml
index 9bff2302ddc..abf38d6078e 100644
--- a/.github/workflows/ci-package.yml
+++ b/.github/workflows/ci-package.yml
@@ -103,7 +103,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
- export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
+ export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
@@ -195,7 +195,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
- export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
+ export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
@@ -283,7 +283,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
- export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
+ export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
@@ -371,7 +371,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
- export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
+ export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
@@ -451,7 +451,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
- export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
+ export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
@@ -536,7 +536,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
- export VERSION_NUMBER="10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))"
+ export VERSION_NUMBER="10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export PREFIX="cronbuild-"
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2463024ee8c..8463e376f99 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -575,7 +575,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn")
# N.B.: When updating this, update all the default versions in ci-package.yml
# and ci-docs.yml. We should find a way to share (xref i#1565).
-set(VERSION_NUMBER_DEFAULT "10.90.${VERSION_NUMBER_PATCHLEVEL}")
+set(VERSION_NUMBER_DEFAULT "10.91.${VERSION_NUMBER_PATCHLEVEL}")
# do not store the default VERSION_NUMBER in the cache to prevent a stale one
# from preventing future version updates in a pre-existing build dir
set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default")
diff --git a/api/docs/release.dox b/api/docs/release.dox
index f108f45f4a5..a407e16edfb 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -150,6 +150,10 @@ changes:
- Changed the type of the AArch64 #dr_mcontext_t members svep and ffr to #dr_svep_t.
This breaks binary compatibility with clients that were built against versions of
DynamoRIO before this change.
+ - Changed #dynamorio::drmemtrace::record_filter_t::record_filter_func_t
+ parallel_shard_filter() interface. Added a new parameter of type
+ #dynamorio::drmemtrace::record_filter_t::record_filter_info_t that allows
+ #dynamorio::drmemtrace::record_filter_t to share data with its filters.
Further non-compatibility-affecting changes include:
- Added DWARF-5 support to the drsyms library by linking in 4 static libraries
@@ -226,6 +230,10 @@ Further non-compatibility-affecting changes include:
purpose of preserving register dependencies.
- Added instr_convert_to_isa_regdeps() API that converts an #instr_t from a real ISA
(e.g., #DR_ISA_AMD64) to the #DR_ISA_REGDEPS synthetic ISA.
+ - Added encodings2regdeps_filter_t filter to #dynamorio::drmemtrace::record_filter_t to
+ generate #DR_ISA_REGDEPS traces.
+ - Added #dynamorio::drmemtrace::OFFLINE_FILE_TYPE_ARCH_REGDEPS file type for
+ #DR_ISA_REGDEPS traces.
**************************************************
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt
index 75b366ed495..9ec0d0ddc6c 100644
--- a/clients/drcachesim/CMakeLists.txt
+++ b/clients/drcachesim/CMakeLists.txt
@@ -192,12 +192,15 @@ add_exported_library(drmemtrace_simulator STATIC
)
add_exported_library(drmemtrace_record_filter STATIC
+ tools/filter/record_filter.h
tools/filter/record_filter.cpp
tools/filter/cache_filter.h
tools/filter/cache_filter.cpp
tools/filter/type_filter.h
+ tools/filter/encodings2regdeps_filter.h
tools/filter/null_filter.h)
target_link_libraries(drmemtrace_record_filter drmemtrace_simulator)
+configure_DynamoRIO_standalone(drmemtrace_record_filter)
add_exported_library(directory_iterator STATIC common/directory_iterator.cpp)
add_dependencies(directory_iterator api_headers)
@@ -357,6 +360,8 @@ install_client_nonDR_header(drmemtrace simulator/cache_simulator_create.h)
install_client_nonDR_header(drmemtrace simulator/tlb_simulator_create.h)
install_client_nonDR_header(drmemtrace tools/view_create.h)
install_client_nonDR_header(drmemtrace tools/func_view_create.h)
+install_client_nonDR_header(drmemtrace tools/filter/record_filter_create.h)
+install_client_nonDR_header(drmemtrace tools/filter/record_filter.h)
# TODO i#6412: Create a separate directory for non-tracer headers so that
# we can more cleanly separate tracer and raw2trace code.
install_client_nonDR_header(drmemtrace tracer/raw2trace.h)
@@ -393,8 +398,12 @@ add_executable(record_filter_launcher
tools/record_filter_launcher.cpp
tests/test_helpers.cpp)
target_link_libraries(record_filter_launcher drmemtrace_analyzer drmemtrace_record_filter)
+add_dependencies(record_filter_launcher api_headers)
append_property_list(TARGET record_filter_launcher COMPILE_DEFINITIONS "NO_HELPER_MAIN")
use_DynamoRIO_extension(record_filter_launcher droption)
+if (NOT APPLE)
+ configure_DynamoRIO_static(record_filter_launcher)
+endif()
# We want to use test_helper's disable_popups() but we have _tmain and so do not want
# the test_helper library's main symbol: so we compile ourselves and disable.
diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
index fb71f986f76..045f05bfd6f 100644
--- a/clients/drcachesim/analyzer_multi.cpp
+++ b/clients/drcachesim/analyzer_multi.cpp
@@ -334,7 +334,8 @@ record_analyzer_multi_t::create_analysis_tool_from_options(
op_outdir.get_value(), op_filter_stop_timestamp.get_value(),
op_filter_cache_size.get_value(), op_filter_trace_types.get_value(),
op_filter_marker_types.get_value(), op_trim_before_timestamp.get_value(),
- op_trim_after_timestamp.get_value(), op_verbose.get_value());
+ op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(),
+ op_verbose.get_value());
}
ERRMSG("Usage error: unsupported record analyzer type \"%s\". Only " RECORD_FILTER
" is supported.\n",
diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
index 7105192347b..39ab5112f6f 100644
--- a/clients/drcachesim/common/options.cpp
+++ b/clients/drcachesim/common/options.cpp
@@ -972,6 +972,16 @@ droption_t
"Comma-separated integers for marker types to remove. "
"See trace_marker_type_t for the list of marker types.");
+/* XXX i#6369: we should partition our options by tool. This one should belong to the
+ * record_filter partition. For now we add the filter_ prefix to options that should be
+ * used in conjunction with record_filter.
+ */
+droption_t op_encodings2regdeps(
+ DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false,
+ "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.",
+ "This option is for -simulator_type " RECORD_FILTER ". When present, it converts "
+ "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA.");
+
droption_t op_trim_before_timestamp(
DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0,
(std::numeric_limits::max)(),
diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h
index 316d1acf063..132f974c018 100644
--- a/clients/drcachesim/common/options.h
+++ b/clients/drcachesim/common/options.h
@@ -214,6 +214,7 @@ extern dynamorio::droption::droption_t op_filter_stop_timestamp;
extern dynamorio::droption::droption_t op_filter_cache_size;
extern dynamorio::droption::droption_t op_filter_trace_types;
extern dynamorio::droption::droption_t op_filter_marker_types;
+extern dynamorio::droption::droption_t op_encodings2regdeps;
extern dynamorio::droption::droption_t op_trim_before_timestamp;
extern dynamorio::droption::droption_t op_trim_after_timestamp;
extern dynamorio::droption::droption_t op_abort_on_invariant_error;
diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h
index 49fd8528604..5b26180ba5c 100644
--- a/clients/drcachesim/common/trace_entry.h
+++ b/clients/drcachesim/common/trace_entry.h
@@ -665,6 +665,18 @@ type_is_instr(const trace_type_t type)
type == TRACE_TYPE_INSTR_UNTAKEN_JUMP;
}
+/**
+ * Returns whether \p type represents any type of instruction record whether an
+ * instruction fetch or operation hint. This is a superset of type_is_instr() and includes
+ * #TRACE_TYPE_INSTR_NO_FETCH.
+ */
+static inline bool
+is_any_instr_type(const trace_type_t type)
+{
+ return type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH ||
+ type == TRACE_TYPE_INSTR_NO_FETCH;
+}
+
/** Returns whether the type represents the fetch of a branch instruction. */
static inline bool
type_is_instr_branch(const trace_type_t type)
@@ -889,9 +901,6 @@ typedef enum {
OFFLINE_FILE_TYPE_ARCH_ARM32 = 0x10, /**< Recorded on ARM (32-bit). */
OFFLINE_FILE_TYPE_ARCH_X86_32 = 0x20, /**< Recorded on x86 (32-bit). */
OFFLINE_FILE_TYPE_ARCH_X86_64 = 0x40, /**< Recorded on x86 (64-bit). */
- OFFLINE_FILE_TYPE_ARCH_ALL = OFFLINE_FILE_TYPE_ARCH_AARCH64 |
- OFFLINE_FILE_TYPE_ARCH_ARM32 | OFFLINE_FILE_TYPE_ARCH_X86_32 |
- OFFLINE_FILE_TYPE_ARCH_X86_64, /**< All possible architecture types. */
/**
* Instruction addresses filtered online.
* Note: this file type may transition to non-filtered. If so, the transition is
@@ -968,19 +977,36 @@ typedef enum {
* Each trace shard represents one core and contains interleaved software threads.
*/
OFFLINE_FILE_TYPE_CORE_SHARDED = 0x10000,
+ /**
+ * Trace filtered by the record_filter tool using -filter_encodings2regdeps.
+ * The encodings2regdeps filter replaces real ISA encodings with #DR_ISA_REGDEPS
+ * encodings. Note that these encoding changes do not update the instruction length,
+ * hence encoding size and instruction fetch size may not match.
+ */
+ OFFLINE_FILE_TYPE_ARCH_REGDEPS = 0x20000,
+ /**
+ * All possible architecture types, including synthetic ones.
+ */
+ OFFLINE_FILE_TYPE_ARCH_ALL = OFFLINE_FILE_TYPE_ARCH_AARCH64 |
+ OFFLINE_FILE_TYPE_ARCH_ARM32 | OFFLINE_FILE_TYPE_ARCH_X86_32 |
+ OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ARCH_REGDEPS,
} offline_file_type_t;
static inline const char *
trace_arch_string(offline_file_type_t type)
{
- return TESTANY(OFFLINE_FILE_TYPE_ARCH_AARCH64, type)
- ? "aarch64"
- : (TESTANY(OFFLINE_FILE_TYPE_ARCH_ARM32, type)
- ? "arm"
- : (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_32, type)
- ? "i386"
- : (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_64, type) ? "x86_64"
- : "unspecified")));
+ if (TESTANY(OFFLINE_FILE_TYPE_ARCH_AARCH64, type))
+ return "aarch64";
+ else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ARM32, type))
+ return "arm";
+ else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_32, type))
+ return "i386";
+ else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_64, type))
+ return "x86_64";
+ else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, type))
+ return "regdeps";
+ else
+ return "unspecified";
}
/* We have non-client targets including this header that do not include API
diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
index b938e06dfc6..93489f6c631 100644
--- a/clients/drcachesim/reader/reader.cpp
+++ b/clients/drcachesim/reader/reader.cpp
@@ -210,7 +210,12 @@ reader_t::process_input_entry()
++cur_instr_count_;
// Look for encoding bits that belong to this instr.
if (last_encoding_.size > 0) {
- if (last_encoding_.size != cur_ref_.instr.size) {
+ if (last_encoding_.size != cur_ref_.instr.size &&
+ /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with
+ * size != ifetch. It's a design choice, not an error, hence
+ * we avoid this sanity check for these traces.
+ */
+ !TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_)) {
ERRMSG(
"Encoding size %zu != instr size %zu for PC 0x%zx at ord %" PRIu64
" instr %" PRIu64 " last_timestamp=0x%" PRIx64 "\n",
diff --git a/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex
new file mode 100644
index 00000000000..a9271341d91
--- /dev/null
+++ b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex
@@ -0,0 +1,11 @@
+Hello, world!
+
+Trace invariant checks passed
+
+Output .* entries from .* entries.
+
+Opcode mix tool results:
+
+ *[0-9]* : total executed instructions
+ *[0-9]* :
+.*
diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp
index 0f7dd36975a..c20a10a3034 100644
--- a/clients/drcachesim/tests/record_filter_unit_tests.cpp
+++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp
@@ -42,6 +42,8 @@
#include "tools/filter/record_filter.h"
#include "tools/filter/trim_filter.h"
#include "tools/filter/type_filter.h"
+#include "tools/filter/encodings2regdeps_filter.h"
+#include "trace_entry.h"
#include "zipfile_ostream.h"
#include
@@ -287,6 +289,144 @@ process_entries_and_check_result(test_record_filter_t *record_filter,
return true;
}
+static bool
+test_encodings2regdeps_filter()
+{
+ constexpr addr_t PC = 0x7f6fdd3ec360;
+ constexpr addr_t PC2 = 0x7f6fdd3eb1f7;
+ // constexpr addr_t PC2 = 0x00007f6fdd3eb1f7;
+ constexpr addr_t PC3 = 0x7f6fdd3eb21a;
+ constexpr addr_t ENCODING_REAL_ISA = 0xe78948;
+ constexpr addr_t ENCODING_REAL_ISA_2_PART1 = 0x841f0f66;
+ constexpr addr_t ENCODING_REAL_ISA_2_PART2 = 0x0;
+ constexpr addr_t ENCODING_REAL_ISA_3 = 0xab48f3;
+ constexpr addr_t ENCODING_REGDEPS_ISA = 0x0006090600010011;
+ constexpr addr_t ENCODING_REGDEPS_ISA_2 = 0x0000020400004010;
+ constexpr addr_t ENCODING_REGDEPS_ISA_3_PART1 = 0x0209030600001042;
+ constexpr addr_t ENCODING_REGDEPS_ISA_3_PART2 = 0x0000000000220903;
+ std::vector entries = {
+ /* Trace shard header.
+ */
+ { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } },
+ /* File type, modified by record_filter encodings2regdeps to add
+ * OFFLINE_FILE_TYPE_ARCH_REGDEPS.
+ */
+ { { TRACE_TYPE_MARKER,
+ TRACE_MARKER_TYPE_FILETYPE,
+ { OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ENCODINGS |
+ OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } },
+ true,
+ { false } },
+ { { TRACE_TYPE_MARKER,
+ TRACE_MARKER_TYPE_FILETYPE,
+ { OFFLINE_FILE_TYPE_ARCH_REGDEPS | OFFLINE_FILE_TYPE_ENCODINGS |
+ OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } },
+ false,
+ { true } },
+ { { TRACE_TYPE_THREAD, 0, { 0x4 } }, true, { true } },
+ { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } },
+ true,
+ { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 0x3 } },
+ true,
+ { true } },
+
+ /* Chunk 1.
+ */
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } },
+ /* Encoding, modified by the record_filter encodings2regdeps.
+ * encoding real ISA size == encoding regdeps ISA size
+ * (in terms of trace_entry_t).
+ */
+ { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } },
+ { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { true } },
+
+ /* Chunk 2.
+ */
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xa } },
+ true,
+ { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } },
+ /* Duplicated encoding across chunk boundary.
+ */
+ { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } },
+ { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 1 } }, true, { true } },
+
+ /* Chunk 3.
+ */
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xe } },
+ true,
+ { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } },
+ /* encoding real ISA size > encoding regdeps ISA size
+ */
+ { { TRACE_TYPE_ENCODING, 8, { ENCODING_REAL_ISA_2_PART1 } }, true, { false } },
+ { { TRACE_TYPE_ENCODING, 1, { ENCODING_REAL_ISA_2_PART2 } }, true, { false } },
+ { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA_2 } }, false, { true } },
+ { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } },
+ { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } },
+ { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 2 } }, true, { true } },
+
+ /* Chunk 4.
+ */
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0x12 } },
+ true,
+ { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } },
+ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } },
+ /* encoding real ISA size < encoding regdeps ISA size
+ */
+ { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA_3 } }, true, { false } },
+ { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA_3_PART1 } }, false, { true } },
+ { { TRACE_TYPE_ENCODING, 4, { ENCODING_REGDEPS_ISA_3_PART2 } }, false, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC3 } }, true, { true } },
+ { { TRACE_TYPE_INSTR, 3, { PC3 } }, true, { true } },
+
+ /* Trace shard footer.
+ */
+ { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } },
+ };
+
+ /* Construct encodings2regdeps_filter.
+ */
+ std::vector> filters;
+ auto encodings2regdeps_filter = std::unique_ptr(
+ new dynamorio::drmemtrace::encodings2regdeps_filter_t());
+ if (encodings2regdeps_filter->get_error_string() != "") {
+ fprintf(stderr, "Couldn't construct a encodings2regdeps_filter %s",
+ encodings2regdeps_filter->get_error_string().c_str());
+ return false;
+ }
+ filters.push_back(std::move(encodings2regdeps_filter));
+
+ /* Construct record_filter_t.
+ */
+ auto record_filter = std::unique_ptr(
+ new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true));
+
+ /* Run the test.
+ */
+ if (!process_entries_and_check_result(record_filter.get(), entries, 0))
+ return false;
+
+ fprintf(stderr, "test_encodings2regdeps_filter passed\n");
+ return true;
+}
+
static bool
test_cache_and_type_filter()
{
@@ -541,7 +681,9 @@ test_chunk_update()
return nullptr;
}
bool
- parallel_shard_filter(trace_entry_t &entry, void *shard_data) override
+ parallel_shard_filter(
+ trace_entry_t &entry, void *shard_data,
+ record_filter_t::record_filter_info_t &record_filter_info) override
{
bool res = true;
if (type_is_instr(static_cast(entry.type))) {
@@ -1105,7 +1247,7 @@ test_main(int argc, const char *argv[])
droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str());
}
if (!test_cache_and_type_filter() || !test_chunk_update() || !test_trim_filter() ||
- !test_null_filter() || !test_wait_filter())
+ !test_null_filter() || !test_wait_filter() || !test_encodings2regdeps_filter())
return 1;
fprintf(stderr, "All done!\n");
return 0;
diff --git a/clients/drcachesim/tools/filter/cache_filter.cpp b/clients/drcachesim/tools/filter/cache_filter.cpp
index 6dae8b80ac4..470bc3b958f 100644
--- a/clients/drcachesim/tools/filter/cache_filter.cpp
+++ b/clients/drcachesim/tools/filter/cache_filter.cpp
@@ -87,7 +87,9 @@ cache_filter_t::parallel_shard_init(memtrace_stream_t *shard_stream,
return per_shard;
}
bool
-cache_filter_t::parallel_shard_filter(trace_entry_t &entry, void *shard_data)
+cache_filter_t::parallel_shard_filter(
+ trace_entry_t &entry, void *shard_data,
+ record_filter_t::record_filter_info_t &record_filter_info)
{
if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) {
if (filter_instrs_)
diff --git a/clients/drcachesim/tools/filter/cache_filter.h b/clients/drcachesim/tools/filter/cache_filter.h
index 677a027bd11..d94152b6c19 100644
--- a/clients/drcachesim/tools/filter/cache_filter.h
+++ b/clients/drcachesim/tools/filter/cache_filter.h
@@ -55,7 +55,9 @@ class cache_filter_t : public record_filter_t::record_filter_func_t {
parallel_shard_init(memtrace_stream_t *shard_stream,
bool partial_trace_filter) override;
bool
- parallel_shard_filter(trace_entry_t &entry, void *shard_data) override;
+ parallel_shard_filter(
+ trace_entry_t &entry, void *shard_data,
+ record_filter_t::record_filter_info_t &record_filter_info) override;
bool
parallel_shard_exit(void *shard_data) override;
diff --git a/clients/drcachesim/tools/filter/encodings2regdeps_filter.h b/clients/drcachesim/tools/filter/encodings2regdeps_filter.h
new file mode 100644
index 00000000000..cf521c176ab
--- /dev/null
+++ b/clients/drcachesim/tools/filter/encodings2regdeps_filter.h
@@ -0,0 +1,210 @@
+/* **********************************************************
+ * Copyright (c) 2022-2024 Google, Inc. All rights reserved.
+ * **********************************************************/
+
+/*
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Google, Inc. nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#ifndef _ENCODING_FILTER_H_
+#define _ENCODING_FILTER_H_ 1
+
+#include "record_filter.h"
+#include "trace_entry.h"
+#include "utils.h"
+
+#include
+#include
+
+/* We are not exporting the defines in core/ir/isa_regdeps/encoding_common.h, so we
+ * redefine DR_ISA_REGDEPS alignment requirement here.
+ */
+#define REGDEPS_ALIGN_BYTES 4
+
+#define REGDEPS_MAX_ENCODING_LENGTH 16
+
+namespace dynamorio {
+namespace drmemtrace {
+
+/* This filter changes the encoding of trace_entry_t and generates discrepancies between
+ * encoding size and instruction length. So, we need to tell reader_t, which here comes in
+ * the form of memref_counter_t used in record_filter, to ignore such discrepancies. We do
+ * so by adding OFFLINE_FILE_TYPE_ARCH_REGDEPS to the file type of the filtered trace.
+ * Note that simulators that deal with these filtered traces will also have to handle the
+ * fact that encoding_size != instruction_length.
+ */
+class encodings2regdeps_filter_t : public record_filter_t::record_filter_func_t {
+public:
+ encodings2regdeps_filter_t()
+ {
+ }
+
+ void *
+ parallel_shard_init(memtrace_stream_t *shard_stream,
+ bool partial_trace_filter) override
+ {
+ dcontext_.dcontext = dr_standalone_init();
+ return nullptr;
+ }
+
+ bool
+ parallel_shard_filter(
+ trace_entry_t &entry, void *shard_data,
+ record_filter_t::record_filter_info_t &record_filter_info) override
+ {
+ std::vector *last_encoding = record_filter_info.last_encoding;
+
+ /* Modify file_type to regdeps ISA, removing the real ISA of the input trace.
+ */
+ trace_type_t entry_type = static_cast(entry.type);
+ if (entry_type == TRACE_TYPE_MARKER) {
+ trace_marker_type_t marker_type =
+ static_cast(entry.size);
+ if (marker_type == TRACE_MARKER_TYPE_FILETYPE) {
+ uint64_t marker_value = static_cast(entry.addr);
+ marker_value = update_filetype(marker_value);
+ entry.addr = static_cast(marker_value);
+ }
+ }
+
+ /* We have encoding to convert.
+ * Normally the sequence of trace_entry_t(s) looks like:
+ * [TRACE_TYPE_ENCODING,]+ [TRACE_TYPE_MARKER.TRACE_MARKER_TYPE_BRANCH_TARGET,]
+ * TRACE_TYPE_INSTR_, [TRACE_TYPE_READ | TRACE_TYPE_WRITE]*
+ * ([] = zero or one, + = one or more, * = zero or more)
+ * If we enter here, trace_entry_t is some TRACE_TYPE_INSTR_ for which
+ * last_encoding already contains its encoding.
+ */
+ if (is_any_instr_type(static_cast(entry.type)) &&
+ !last_encoding->empty()) {
+ /* Gather real ISA encoding bytes looping through all previously saved
+ * encoding bytes in last_encoding.
+ */
+ const app_pc pc = reinterpret_cast(entry.addr);
+ byte encoding[MAX_ENCODING_LENGTH];
+ memset(encoding, 0, sizeof(encoding));
+ uint encoding_offset = 0;
+ for (auto &trace_encoding : *last_encoding) {
+ memcpy(encoding + encoding_offset, trace_encoding.encoding,
+ trace_encoding.size);
+ encoding_offset += trace_encoding.size;
+ }
+
+ /* Genenerate the real ISA instr_t by decoding the encoding bytes.
+ */
+ instr_t instr;
+ instr_init(dcontext_.dcontext, &instr);
+ app_pc next_pc = decode_from_copy(dcontext_.dcontext, encoding, pc, &instr);
+ if (next_pc == NULL || !instr_valid(&instr)) {
+ instr_free(dcontext_.dcontext, &instr);
+ error_string_ =
+ "Failed to decode instruction " + to_hex_string(entry.addr);
+ return false;
+ }
+
+ /* Convert the real ISA instr_t into a regdeps ISA instr_t.
+ */
+ instr_t instr_regdeps;
+ instr_init(dcontext_.dcontext, &instr_regdeps);
+ instr_convert_to_isa_regdeps(dcontext_.dcontext, &instr, &instr_regdeps);
+ instr_free(dcontext_.dcontext, &instr);
+
+ /* Obtain regdeps ISA instr_t encoding bytes.
+ */
+ byte ALIGN_VAR(REGDEPS_ALIGN_BYTES)
+ encoding_regdeps[REGDEPS_MAX_ENCODING_LENGTH];
+ memset(encoding_regdeps, 0, sizeof(encoding_regdeps));
+ app_pc next_pc_regdeps =
+ instr_encode(dcontext_.dcontext, &instr_regdeps, encoding_regdeps);
+ instr_free(dcontext_.dcontext, &instr_regdeps);
+ if (next_pc_regdeps == NULL) {
+ error_string_ =
+ "Failed to encode regdeps instruction " + to_hex_string(entry.addr);
+ return false;
+ }
+
+ /* Compute number of trace_entry_t to contain regdeps ISA encoding.
+ * Each trace_entry_t record can contain pointer-sized byte encoding
+ * (i.e., 4 bytes for 32 bits architectures and 8 bytes for 64 bits).
+ */
+ uint trace_entry_encoding_size = static_cast(sizeof(entry.addr));
+ uint regdeps_encoding_size =
+ static_cast(next_pc_regdeps - encoding_regdeps);
+ uint num_regdeps_encoding_entries =
+ ALIGN_FORWARD(regdeps_encoding_size, trace_entry_encoding_size) /
+ trace_entry_encoding_size;
+ last_encoding->resize(num_regdeps_encoding_entries);
+
+ /* Copy regdeps ISA encoding, splitting it among the last_encoding
+ * trace_entry_t records.
+ */
+ uint regdeps_encoding_offset = 0;
+ for (trace_entry_t &encoding_entry : *last_encoding) {
+ encoding_entry.type = TRACE_TYPE_ENCODING;
+ uint size = std::min(regdeps_encoding_size, trace_entry_encoding_size);
+ encoding_entry.size = static_cast(size);
+ memset(encoding_entry.encoding, 0, trace_entry_encoding_size);
+ memcpy(encoding_entry.encoding,
+ encoding_regdeps + regdeps_encoding_offset, encoding_entry.size);
+ regdeps_encoding_size -= encoding_entry.size;
+ regdeps_encoding_offset += encoding_entry.size;
+ }
+ }
+ return true;
+ }
+
+ bool
+ parallel_shard_exit(void *shard_data) override
+ {
+ return true;
+ }
+
+ uint64_t
+ update_filetype(uint64_t filetype) override
+ {
+ filetype &= ~OFFLINE_FILE_TYPE_ARCH_ALL;
+ filetype |= OFFLINE_FILE_TYPE_ARCH_REGDEPS;
+ return filetype;
+ }
+
+private:
+ struct dcontext_cleanup_last_t {
+ public:
+ ~dcontext_cleanup_last_t()
+ {
+ if (dcontext != nullptr)
+ dr_standalone_exit();
+ }
+ void *dcontext = nullptr;
+ };
+
+ dcontext_cleanup_last_t dcontext_;
+};
+
+} // namespace drmemtrace
+} // namespace dynamorio
+#endif /* _ENCODING_FILTER_H_ */
diff --git a/clients/drcachesim/tools/filter/null_filter.h b/clients/drcachesim/tools/filter/null_filter.h
index e76f6e0df44..9ebf5377bd4 100644
--- a/clients/drcachesim/tools/filter/null_filter.h
+++ b/clients/drcachesim/tools/filter/null_filter.h
@@ -47,7 +47,9 @@ class null_filter_t : public record_filter_t::record_filter_func_t {
return nullptr;
}
bool
- parallel_shard_filter(trace_entry_t &entry, void *shard_data) override
+ parallel_shard_filter(
+ trace_entry_t &entry, void *shard_data,
+ record_filter_t::record_filter_info_t &record_filter_info) override
{
return true;
}
diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp
index 4f0b9cfb68a..2afd74b9fa4 100644
--- a/clients/drcachesim/tools/filter/record_filter.cpp
+++ b/clients/drcachesim/tools/filter/record_filter.cpp
@@ -60,6 +60,7 @@
#include "cache_filter.h"
#include "trim_filter.h"
#include "type_filter.h"
+#include "encodings2regdeps_filter.h"
#undef VPRINT
#ifdef DEBUG
@@ -83,13 +84,6 @@ namespace drmemtrace {
namespace {
-bool
-is_any_instr_type(trace_type_t type)
-{
- return type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH ||
- type == TRACE_TYPE_INSTR_NO_FETCH;
-}
-
template
std::vector
parse_string(const std::string &s, char sep = ',')
@@ -113,7 +107,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp
int cache_filter_size, const std::string &remove_trace_types,
const std::string &remove_marker_types,
uint64_t trim_before_timestamp, uint64_t trim_after_timestamp,
- unsigned int verbose)
+ bool encodings2regdeps, unsigned int verbose)
{
std::vector<
std::unique_ptr>
@@ -143,6 +137,12 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp
new dynamorio::drmemtrace::trim_filter_t(trim_before_timestamp,
trim_after_timestamp)));
}
+ if (encodings2regdeps) {
+ filter_funcs.emplace_back(
+ std::unique_ptr(
+ new dynamorio::drmemtrace::encodings2regdeps_filter_t()));
+ }
+
// TODO i#5675: Add other filters.
return new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs),
@@ -386,6 +386,7 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data,
success_ = false;
}
}
+ per_shard->record_filter_info.last_encoding = &per_shard->last_encoding;
std::lock_guard guard(shard_map_mutex_);
shard_map_[shard_index] = per_shard;
return reinterpret_cast(per_shard);
@@ -588,7 +589,10 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t &
// XXX: What if there is a filter removing all encodings but only
// to the stop point, so a partial remove that does not change
// the filetype? For now we do not support that, and we re-add
- // encodings at chunk boundaries regardless.
+ // encodings at chunk boundaries regardless. Note that filters that modify
+ // encodings (even if they add or remove trace_entry_t records) do not incur in
+ // this problem and we don't need support for partial removal of encodings in this
+ // case. An example of such filters is encodings2regdeps_filter_t.
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype) &&
per_shard->cur_chunk_pcs.find(entry.addr) == per_shard->cur_chunk_pcs.end()) {
if (per_shard->per_input == nullptr)
@@ -605,14 +609,19 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t &
per_shard->chunk_ordinal, per_shard->cur_refs);
// Sanity check that the encoding size is correct.
const auto &enc = per_shard->per_input->pc2encoding[entry.addr];
- size_t enc_sz = 0;
- // Since all but the last entry are fixed-size we could avoid a loop
- // but the loop is easier to read and we have just 1 or 2 iters.
- for (const auto &record : enc)
- enc_sz += record.size;
- if (enc_sz != entry.size) {
- return "New-chunk encoding size " + std::to_string(enc_sz) +
- " != instr size " + std::to_string(entry.size);
+ /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with size != ifetch.
+ * It's a design choice, not an error, hence we avoid this sanity check.
+ */
+ if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, per_shard->filetype)) {
+ size_t enc_sz = 0;
+ // Since all but the last entry are fixed-size we could avoid a loop
+ // but the loop is easier to read and we have just 1 or 2 iters.
+ for (const auto &record : enc)
+ enc_sz += record.size;
+ if (enc_sz != entry.size) {
+ return "New-chunk encoding size " + std::to_string(enc_sz) +
+ " != instr size " + std::to_string(entry.size);
+ }
}
if (!write_trace_entries(per_shard, enc)) {
return "Failed to write";
@@ -641,7 +650,10 @@ record_filter_t::process_delayed_encodings(per_shard_t *per_shard, trace_entry_t
} else if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype)) {
// Output if we have encodings that haven't yet been output, and
// there is no filter removing all encodings (we don't support
- // partial encoding removal).
+ // partial encoding removal). Note that filters that modify encodings (even if
+ // they add or remove trace_entry_t records) do not incur in this problem and we
+ // don't need support for partial removal of encodings in this case. An example
+ // of such filters is encodings2regdeps_filter_t.
// We check prev_was_output to rule out filtered-out encodings
// (we record all encodings for new-chunk insertion).
if (!per_shard->last_encoding.empty() && per_shard->prev_was_output) {
@@ -739,7 +751,8 @@ record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &in
if (per_shard->enabled) {
for (int i = 0; i < static_cast(filters_.size()); ++i) {
if (!filters_[i]->parallel_shard_filter(entry,
- per_shard->filter_shard_data[i])) {
+ per_shard->filter_shard_data[i],
+ per_shard->record_filter_info)) {
output = false;
}
if (!filters_[i]->get_error_string().empty()) {
diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h
index f5550b39cbd..50a76e23268 100644
--- a/clients/drcachesim/tools/filter/record_filter.h
+++ b/clients/drcachesim/tools/filter/record_filter.h
@@ -61,6 +61,17 @@ namespace drmemtrace {
*/
class record_filter_t : public record_analysis_tool_t {
public:
+ /**
+ * Interface for the record_filter to share data with its filters.
+ */
+ struct record_filter_info_t {
+ /**
+ * Stores the encoding of an instructions, which may be split among more than one
+ * #trace_entry_t, hence the vector.
+ */
+ std::vector *last_encoding;
+ };
+
/**
* The base class for a single filter.
*/
@@ -86,17 +97,20 @@ class record_filter_t : public record_analysis_tool_t {
/**
* Invoked for each #trace_entry_t in the shard. It returns
* whether or not this \p entry should be included in the result
- * trace. \p shard_data is same as what was returned by
- * parallel_shard_init(). The given \p entry is included in the result
- * trace iff all provided #record_filter_func_t return true. The
- * \p entry parameter can also be modified by the record_filter_func_t.
+ * trace. \p shard_data is same as what was returned by parallel_shard_init().
+ * The given \p entry is included in the result trace iff all provided
+ * #dynamorio::drmemtrace::record_filter_t::record_filter_func_t return true.
+ * The \p entry parameter can also be modified by the record_filter_func_t.
* The passed \p entry is not guaranteed to be the original one from
* the trace if other filter tools are present, and may include changes
* made by other tools.
* An error is indicated by setting error_string_ to a non-empty value.
+ * \p record_filter_info is the interface used by record_filter to
+ * share data with its filters.
*/
virtual bool
- parallel_shard_filter(trace_entry_t &entry, void *shard_data) = 0;
+ parallel_shard_filter(trace_entry_t &entry, void *shard_data,
+ record_filter_info_t &record_filter_info) = 0;
/**
* Invoked when all #trace_entry_t in a shard have been processed
* by parallel_shard_filter(). \p shard_data is same as what was
@@ -114,6 +128,17 @@ class record_filter_t : public record_analysis_tool_t {
return error_string_;
}
+ /**
+ * If a filter modifies the file type of a trace, its changes should be made here,
+ * so they are visible to the record_filter even if the #trace_entry_t containing
+ * the file type marker is not modified directly by the filter.
+ */
+ virtual uint64_t
+ update_filetype(uint64_t filetype)
+ {
+ return filetype;
+ }
+
protected:
std::string error_string_;
};
@@ -188,6 +213,7 @@ class record_filter_t : public record_analysis_tool_t {
trace_entry_t last_written_record;
// Cached value updated on context switches.
per_input_t *per_input = nullptr;
+ record_filter_info_t record_filter_info;
};
virtual std::string
@@ -248,11 +274,14 @@ class record_filter_t : public record_analysis_tool_t {
inline uint64_t
add_to_filetype(uint64_t filetype)
{
- if (stop_timestamp_ != 0) {
+ if (stop_timestamp_ != 0)
filetype |= OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP;
- }
- if (shard_type_ == SHARD_BY_CORE) {
+ if (shard_type_ == SHARD_BY_CORE)
filetype |= OFFLINE_FILE_TYPE_CORE_SHARDED;
+ /* If filters modify the file type, add their changes here.
+ */
+ for (auto &filter : filters_) {
+ filetype = filter->update_filetype(filetype);
}
return filetype;
}
diff --git a/clients/drcachesim/tools/filter/record_filter_create.h b/clients/drcachesim/tools/filter/record_filter_create.h
index ae0665f3fef..7e3b750b595 100644
--- a/clients/drcachesim/tools/filter/record_filter_create.h
+++ b/clients/drcachesim/tools/filter/record_filter_create.h
@@ -62,13 +62,16 @@ namespace drmemtrace {
* up to its first timestamp whose value is greater or equal to this parameter.
* @param[in] trim_after_timestamp Trim records after the trace's first timestamp
* whose value is greater than this parameter.
+ * @param[in] encodings2regdeps If true, converts instruction encodings from the real ISA
+ * of the input trace to the #DR_ISA_REGDEPS synthetic ISA.
+ * @param[in] verbose Verbosity level for notifications.
*/
record_analysis_tool_t *
record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp,
int cache_filter_size, const std::string &remove_trace_types,
const std::string &remove_marker_types,
uint64_t trim_before_timestamp, uint64_t trim_after_timestamp,
- unsigned int verbose);
+ bool encodings2regdeps, unsigned int verbose);
} // namespace drmemtrace
} // namespace dynamorio
diff --git a/clients/drcachesim/tools/filter/trim_filter.h b/clients/drcachesim/tools/filter/trim_filter.h
index 30b8395156f..0a7c464347b 100644
--- a/clients/drcachesim/tools/filter/trim_filter.h
+++ b/clients/drcachesim/tools/filter/trim_filter.h
@@ -69,7 +69,9 @@ class trim_filter_t : public record_filter_t::record_filter_func_t {
return per_shard;
}
bool
- parallel_shard_filter(trace_entry_t &entry, void *shard_data) override
+ parallel_shard_filter(
+ trace_entry_t &entry, void *shard_data,
+ record_filter_t::record_filter_info_t &record_filter_info) override
{
per_shard_t *per_shard = reinterpret_cast(shard_data);
if (entry.type == TRACE_TYPE_MARKER &&
diff --git a/clients/drcachesim/tools/filter/type_filter.h b/clients/drcachesim/tools/filter/type_filter.h
index 3dda5c0592f..48d6aaef1db 100644
--- a/clients/drcachesim/tools/filter/type_filter.h
+++ b/clients/drcachesim/tools/filter/type_filter.h
@@ -86,7 +86,9 @@ class type_filter_t : public record_filter_t::record_filter_func_t {
return per_shard;
}
bool
- parallel_shard_filter(trace_entry_t &entry, void *shard_data) override
+ parallel_shard_filter(
+ trace_entry_t &entry, void *shard_data,
+ record_filter_t::record_filter_info_t &record_filter_info) override
{
per_shard_t *per_shard = reinterpret_cast(shard_data);
if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) {
diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp
index 995c810a578..58c4c75533e 100644
--- a/clients/drcachesim/tools/opcode_mix.cpp
+++ b/clients/drcachesim/tools/opcode_mix.cpp
@@ -157,7 +157,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
shard->filetype = static_cast(memref.marker.marker_value);
- if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL, memref.marker.marker_value) &&
+ /* We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS
+ * is not a real ISA and can coexist with any real architecture.
+ */
+ if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS,
+ memref.marker.marker_value) &&
!TESTANY(build_target_arch_type(), memref.marker.marker_value)) {
shard->error = std::string("Architecture mismatch: trace recorded on ") +
trace_arch_string(static_cast(
@@ -165,6 +169,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
" but tool built for " + trace_arch_string(build_target_arch_type());
return false;
}
+ /* If we are dealing with a regdeps trace, we need to set the dcontext ISA mode
+ * to the correct synthetic ISA (i.e., DR_ISA_REGDEPS).
+ */
+ if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, memref.marker.marker_value))
+ dr_set_isa_mode(dcontext_.dcontext, DR_ISA_REGDEPS, nullptr);
} else if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
#ifdef AARCH64
diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp
index 746481d79d5..53adf2ca1d8 100644
--- a/clients/drcachesim/tools/record_filter_launcher.cpp
+++ b/clients/drcachesim/tools/record_filter_launcher.cpp
@@ -121,6 +121,15 @@ static droption_t op_trim_after_timestamp(
"Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with "
"timestamp larger than the specified value.");
+/* XXX i#6369: we should partition our options by tool. This one should belong to the
+ * record_filter partition. For now we add the filter_ prefix to options that should be
+ * used in conjunction with record_filter.
+ */
+droption_t op_encodings2regdeps(
+ DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false,
+ "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.",
+ "This option is for -simulator_type record_filter. When present, it converts "
+ "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA.");
} // namespace
int
@@ -150,7 +159,8 @@ _tmain(int argc, const TCHAR *targv[])
op_output_dir.get_value(), op_stop_timestamp.get_value(),
op_cache_filter_size.get_value(), op_remove_trace_types.get_value(),
op_remove_marker_types.get_value(), op_trim_before_timestamp.get_value(),
- op_trim_after_timestamp.get_value(), op_verbose.get_value()));
+ op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(),
+ op_verbose.get_value()));
std::vector tools;
tools.push_back(record_filter.get());
diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c
index fc88d7897b3..904e8aaf9b3 100644
--- a/core/ir/instr_shared.c
+++ b/core/ir/instr_shared.c
@@ -58,6 +58,7 @@
#include "../link.h"
#include "decode.h"
#include "decode_fast.h"
+#include "opcode_api.h"
#include "opnd.h"
#include "instr_create_shared.h"
/* FIXME i#1551: refactor this file and avoid this x86-specific include in base arch/ */
@@ -3142,6 +3143,12 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa,
*/
instr_set_operands_valid(instr_regdeps_isa, true);
+ /* Set opcode as OP_UNDECODED, so routines like instr_valid() can still work.
+ * We can't use instr_set_opcode() because of its CLIENT_ASSERT when setting the
+ * opcode to OP_UNDECODED or OP_INVALID.
+ */
+ instr_regdeps_isa->opcode = OP_UNDECODED;
+
/* Set converted instruction ISA mode to be DR_ISA_REGDEPS.
*/
instr_set_isa_mode(instr_regdeps_isa, DR_ISA_REGDEPS);
diff --git a/core/ir/isa_regdeps/decode.c b/core/ir/isa_regdeps/decode.c
index 2ac0a8a7f7f..e5afec31b33 100644
--- a/core/ir/isa_regdeps/decode.c
+++ b/core/ir/isa_regdeps/decode.c
@@ -144,6 +144,12 @@ decode_isa_regdeps(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr)
*/
instr_set_operands_valid(instr, true);
+ /* Set opcode as OP_UNDECODED, so routines like instr_valid() can still work.
+ * We can't use instr_set_opcode() because of its CLIENT_ASSERT when setting the
+ * opcode to OP_UNDECODED or OP_INVALID.
+ */
+ instr->opcode = OP_UNDECODED;
+
/* Set decoded instruction ISA mode to be synthetic.
*/
instr_set_isa_mode(instr, DR_ISA_REGDEPS);
diff --git a/core/ir/x86/decode.c b/core/ir/x86/decode.c
index caceafd62c2..f608afea2db 100644
--- a/core/ir/x86/decode.c
+++ b/core/ir/x86/decode.c
@@ -2791,6 +2791,13 @@ const char *
decode_opcode_name(int opcode)
{
const instr_info_t *info = op_instr[opcode];
+ if (info == NULL) {
+ switch (opcode) {
+ case OP_INVALID: return "";
+ case OP_UNDECODED: return "";
+ default: return "";
+ }
+ }
return info->name;
}
diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt
index 77723d43877..bc66f354c33 100644
--- a/suite/tests/CMakeLists.txt
+++ b/suite/tests/CMakeLists.txt
@@ -4674,6 +4674,17 @@ if (BUILD_CLIENTS)
# it checks encodings).
"opcode_mix")
+ if (X86 AND X64 AND ZLIB_FOUND)
+ set(testname "tool.record_filter_encodings2regdeps")
+ torun_record_filter("${testname}" ${ci_shared_app}
+ "record_filter_encodings2regdeps"
+ # We assume the app name starts with "s" here to avoid colliding with
+ # our output dir, while still letting the single precmd remove both.
+ "${drcachesim_path}@-simulator_type@record_filter@-filter_encodings2regdeps@-indir@${testname}.s*.dir/trace@-core_sharded@-cores@4@-outdir@${testname}.filtered.dir"
+ # We run the opcode_mix analyzer to test econdings2regdeps filtered traces.
+ "opcode_mix")
+ endif ()
+
if (UNIX) # Windows multi-thread tests are too slow.
set(testname "tool.record_filter_bycore_multi")
torun_record_filter("${testname}" pthreads.ptsig
diff --git a/suite/tests/api/ir_regdeps.c b/suite/tests/api/ir_regdeps.c
index 5c2f0a30bfb..e4a9edf1914 100644
--- a/suite/tests/api/ir_regdeps.c
+++ b/suite/tests/api/ir_regdeps.c
@@ -84,7 +84,7 @@ test_instr_encode_decode_synthetic(void *dc, instr_t *instr)
/* Check that we do not have an opcode for the converted instruction.
*/
- ASSERT(instr_get_opcode(instr_synthetic_converted) == OP_INVALID);
+ ASSERT(instr_get_opcode(instr_synthetic_converted) == OP_UNDECODED);
/* Encode the synthetic instruction.
*/