From 4d9ccbacbfbec0742c03d6d0c187684f7ae414e7 Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Wed, 21 Feb 2024 16:26:04 -0500
Subject: [PATCH 1/5] i#6648 trim tool: Move file removal to the end (#6669)

Since file removal of now-empty shards can involve global operations
that could race among shards, we move shard removal to the aggregation
stage print_results() in the record filter.

Issue: #6648, #6593
---
 .../tests/record_filter_unit_tests.cpp           |  4 ++++
 .../drcachesim/tools/filter/record_filter.cpp    | 16 +++++++++++-----
 clients/drcachesim/tools/filter/record_filter.h  |  1 +
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp
index 3b1e27e838f..c23b66052ac 100644
--- a/clients/drcachesim/tests/record_filter_unit_tests.cpp
+++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp
@@ -226,6 +226,10 @@ process_entries_and_check_result(test_record_filter_t *record_filter,
         fprintf(stderr, "Filtering exit failed\n");
         return false;
     }
+    if (!record_filter->print_results()) {
+        fprintf(stderr, "Filtering results failed\n");
+        return false;
+    }
 
     std::vector<trace_entry_t> filtered = record_filter->get_output_entries();
     // Verbose output for easier debugging.
diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp
index 1d5cbd3a719..e42b3246e73 100644
--- a/clients/drcachesim/tools/filter/record_filter.cpp
+++ b/clients/drcachesim/tools/filter/record_filter.cpp
@@ -328,9 +328,9 @@ record_filter_t::parallel_shard_exit(void *shard_data)
         // chunk_ordinal is 1 after the init-time call for archives; it
         // remains 0 for non-archives.
         per_shard->chunk_ordinal <= 1 && per_shard->cur_chunk_instrs == 0) {
-        per_shard->error = remove_output_file(per_shard);
-        if (!per_shard->error.empty())
-            res = false;
+        // Mark for removal.  We delay removal in case it involves global
+        // operations that might race with other workers.
+        per_shard->now_empty = true;
     }
     return res;
 }
@@ -617,15 +617,21 @@ record_filter_t::process_memref(const trace_entry_t &memref)
 bool
 record_filter_t::print_results()
 {
+    bool res = true;
     uint64_t input_entry_count = 0;
     uint64_t output_entry_count = 0;
     for (const auto &shard : shard_map_) {
         input_entry_count += shard.second->input_entry_count;
-        output_entry_count += shard.second->output_entry_count;
+        if (shard.second->now_empty) {
+            error_string_ = remove_output_file(shard.second);
+            if (!error_string_.empty())
+                res = false;
+        } else
+            output_entry_count += shard.second->output_entry_count;
     }
     std::cerr << "Output " << output_entry_count << " entries from " << input_entry_count
               << " entries.\n";
-    return true;
+    return res;
 }
 
 } // namespace drmemtrace
diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h
index e2014f00dbe..64cc173787a 100644
--- a/clients/drcachesim/tools/filter/record_filter.h
+++ b/clients/drcachesim/tools/filter/record_filter.h
@@ -168,6 +168,7 @@ class record_filter_t : public record_analysis_tool_t {
         bool prev_was_output = false;
         addr_t filetype = 0;
         memref_tid_t tid = 0; // For thread-sharded.
+        bool now_empty = false;
     };
 
     virtual std::string

From 168569efa9b50f6045de0bdbc197b908f652d767 Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Wed, 21 Feb 2024 17:35:33 -0500
Subject: [PATCH 2/5] i#6648 trim tool: Remove over-threshold timestamp (#6671)

The original PR #6651 ended up keeping the timestamp (+ cpuid) whose
value is beyond the trim-after threshold, as it logically makes sense to
have that endpoint's timestamp. However, when that's across a blocking
syscall, it canbe far into the future and result in large time gaps in
the trimmed trace. Instead, we throw out the timestamp, as the PR #6551
did in its first form. (Other options such as editing the timestamp seem
worse.)

Adjusts the unit test that tests this.

Issue: #6648
---
 clients/drcachesim/common/options.cpp         |  5 ++--
 .../drcachesim/tests/offline-trim.templatex   |  6 ++---
 .../tests/record_filter_unit_tests.cpp        |  6 ++---
 clients/drcachesim/tools/filter/trim_filter.h | 24 +++++++------------
 .../tools/record_filter_launcher.cpp          |  5 ++--
 5 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
index e8a4ddf2daa..5bdee7aef05 100644
--- a/clients/drcachesim/common/options.cpp
+++ b/clients/drcachesim/common/options.cpp
@@ -973,9 +973,8 @@ droption_t<uint64_t> op_trim_after_timestamp(
     DROPTION_SCOPE_ALL, "trim_after_timestamp", (std::numeric_limits<uint64_t>::max)(), 0,
     (std::numeric_limits<uint64_t>::max)(),
     "Trim records after this timestamp (in us) in the trace.",
-    "Removes all records after the first TRACE_MARKER_TYPE_TIMESTAMP marker with "
-    "timestamp larger than the specified value (keeps a TRACE_MARKER_TYPE_CPU_ID "
-    "immediately following the transition timestamp).");
+    "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with "
+    "timestamp larger than the specified value.");
 
 } // namespace drmemtrace
 } // namespace dynamorio
diff --git a/clients/drcachesim/tests/offline-trim.templatex b/clients/drcachesim/tests/offline-trim.templatex
index 0df8f0eef6d..ac6a55cf090 100644
--- a/clients/drcachesim/tests/offline-trim.templatex
+++ b/clients/drcachesim/tests/offline-trim.templatex
@@ -1,4 +1,4 @@
-Output 290 entries from 329 entries.
+Output 288 entries from 329 entries.
 Output format:
 <--record#-> <--instr#->: <---tid---> <record details>
 ------------------------------------------------------------
@@ -21,9 +21,7 @@ Output format:
          215         119:     1992554 ifetch       5 byte\(s\) @ 0x0000000000401021 b8 01 00 00 00       mov    \$0x00000001 -> %eax
          216         120:     1992554 ifetch       2 byte\(s\) @ 0x0000000000401026 0f 05                syscall  -> %rcx %r11
          217         120:     1992554 <marker: chunk footer #5>
-         218         120:     1992554 <marker: timestamp 13352268558646666>
-         219         120:     1992554 <marker: tid 1992554 on core 10>
-         220         120:     1992554 <thread 1992554 exited>
+         218         120:     1992554 <thread 1992554 exited>
 View tool results:
             120 : total instructions
 
diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp
index c23b66052ac..a4a2461ccdc 100644
--- a/clients/drcachesim/tests/record_filter_unit_tests.cpp
+++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp
@@ -687,11 +687,11 @@ test_trim_filter()
             { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { true } },
             { { TRACE_TYPE_ENCODING, 2, { ENCODING_A } }, true, { true } },
             { { TRACE_TYPE_INSTR, 2, { PC_A } }, true, { true } },
-            // Removal starts right after here.
+            // Removal starts here.
             { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 200 } },
               true,
-              { true } },
-            { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { true } },
+              { false } },
+            { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0 } }, true, { false } },
             { { TRACE_TYPE_ENCODING, 2, { ENCODING_B } }, true, { false } },
             { { TRACE_TYPE_INSTR, 2, { PC_B } }, true, { false } },
             { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } },
diff --git a/clients/drcachesim/tools/filter/trim_filter.h b/clients/drcachesim/tools/filter/trim_filter.h
index c4180fb1544..30b8395156f 100644
--- a/clients/drcachesim/tools/filter/trim_filter.h
+++ b/clients/drcachesim/tools/filter/trim_filter.h
@@ -72,23 +72,18 @@ class trim_filter_t : public record_filter_t::record_filter_func_t {
     parallel_shard_filter(trace_entry_t &entry, void *shard_data) override
     {
         per_shard_t *per_shard = reinterpret_cast<per_shard_t *>(shard_data);
-        if (per_shard->just_saw_trim_start_after) {
-            if (entry.type == TRACE_TYPE_MARKER &&
-                entry.size == TRACE_MARKER_TYPE_CPU_ID) {
-                // Wait for the next record to start removing.
-                return true;
-            }
-            per_shard->in_removed_region = true;
-            per_shard->just_saw_trim_start_after = false;
-        }
         if (entry.type == TRACE_TYPE_MARKER &&
             entry.size == TRACE_MARKER_TYPE_TIMESTAMP) {
-            if (entry.addr < trim_before_timestamp_)
+            // While it seems theoretically nice to keep the timestamp,cpuid that
+            // is over the threshold so we have a timestamp at the end, that results
+            // in large time gaps if across a blocking syscall.  Trying to edit
+            // that timestamp a la -align_endpoints is not deal either as it can
+            // distort syscall durations.  The least-bad solution seems to be to
+            // keep the regular trace content right up to the timestamp and
+            // throw away the timestamp.
+            if (entry.addr < trim_before_timestamp_ || entry.addr > trim_after_timestamp_)
                 per_shard->in_removed_region = true;
-            else if (entry.addr > trim_after_timestamp_) {
-                if (!per_shard->in_removed_region)
-                    per_shard->just_saw_trim_start_after = true;
-            } else
+            else
                 per_shard->in_removed_region = false;
         }
         if (entry.type == TRACE_TYPE_THREAD_EXIT || entry.type == TRACE_TYPE_FOOTER) {
@@ -116,7 +111,6 @@ class trim_filter_t : public record_filter_t::record_filter_func_t {
 private:
     struct per_shard_t {
         bool in_removed_region = false;
-        bool just_saw_trim_start_after = false;
     };
 
     uint64_t trim_before_timestamp_;
diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp
index d8949ce3a6d..746481d79d5 100644
--- a/clients/drcachesim/tools/record_filter_launcher.cpp
+++ b/clients/drcachesim/tools/record_filter_launcher.cpp
@@ -118,9 +118,8 @@ static droption_t<uint64_t> op_trim_after_timestamp(
     DROPTION_SCOPE_ALL, "trim_after_timestamp", (std::numeric_limits<uint64_t>::max)(), 0,
     (std::numeric_limits<uint64_t>::max)(),
     "Trim records after this timestamp (in us) in the trace.",
-    "Removes all records after the first TRACE_MARKER_TYPE_TIMESTAMP marker with "
-    "timestamp larger than the specified value (keeps a TRACE_MARKER_TYPE_CPU_ID "
-    "immediately following the transition timestamp).");
+    "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with "
+    "timestamp larger than the specified value.");
 
 } // namespace
 

From 6a06ccbe3945386ae4dc55bb9beff8ed203b424a Mon Sep 17 00:00:00 2001
From: Kaiyeung Luk <kyluk@google.com>
Date: Wed, 21 Feb 2024 16:16:55 -0800
Subject: [PATCH 3/5] i#6668: Replace drwrap retaddr sentinel on all
 architectures, not just x86 (#6670)

Expand drwrap's return address sentinel replacement from just x86 (as
was added in PR #6395) to cover all architectures.

When -record_replace_retaddr is used in ARM (as well as x86) platforms,
the return address in the stack is replaced by the sentinel.
For a tailcall, the current implementation uses the address in the
stack, the sentinel, as the return address. The change is to check if
the return address in the stack is the sentinel or not. If it is,
replace it with the return address of the outer level.

The problem can be reproduced by

./build/bin64/drrun -t drcachesim -offline -record_replace_retaddr
-record_function 'tailcall_with_retaddr|1&foo|1' --
../build_suite/build_debug-internal-64/suite/tests/bin/common.getretaddr

on an ARM machine.

This change expends PR #6395  to cover ARM.

Issues: #6394, #6668
Fixes: #6668
---
 ext/drwrap/drwrap.c        | 4 +---
 suite/tests/CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/ext/drwrap/drwrap.c b/ext/drwrap/drwrap.c
index 3881f0868e8..1f4621334ca 100644
--- a/ext/drwrap/drwrap.c
+++ b/ext/drwrap/drwrap.c
@@ -2021,9 +2021,8 @@ drwrap_in_callee(void *arg1, reg_t xsp _IF_NOT_X86(reg_t lr))
     NOTIFY(2, "%s: level %d function " PFX "\n", __FUNCTION__, pt->wrap_level + 1, pc);
 
     app_pc retaddr = IF_X86_ELSE(get_retaddr_from_stack(xsp), (app_pc)lr);
-#ifdef X86
     if (TEST(DRWRAP_REPLACE_RETADDR, global_flags)) {
-        /* In case of a tailcall for X86, the return address has already been replaced by
+        /* In case of a tailcall, the return address has already been replaced by
          * the sentinel in the stack, we need to retrieve the return address from the
          * outer level.
          */
@@ -2034,7 +2033,6 @@ drwrap_in_callee(void *arg1, reg_t xsp _IF_NOT_X86(reg_t lr))
                    pt->wrap_level, retaddr);
         }
     }
-#endif
     drwrap_context_init(drcontext, &wrapcxt, pc, &mc, DRWRAP_WHERE_PRE_FUNC, retaddr);
 
     drwrap_in_callee_check_unwind(drcontext, pt, &mc);
diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt
index 1e4c1eb24a6..8df22fa5c1b 100644
--- a/suite/tests/CMakeLists.txt
+++ b/suite/tests/CMakeLists.txt
@@ -4777,7 +4777,7 @@ if (BUILD_CLIENTS)
     endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR)
   endif (proc_supports_pt)
 
-  if (X86)
+  if ((AARCHXX AND NOT ANDROID) OR X86)
     torunonly_drcacheoff(getretaddr_record_replace_retaddr common.getretaddr
             "-record_replace_retaddr -record_function tailcall_with_retaddr|1&foo|1"
             "@-simulator_type@invariant_checker" "")

From afdc470f2248e519665b8ece060297ede90f183c Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Wed, 21 Feb 2024 22:34:36 -0500
Subject: [PATCH 4/5] i#6666: Avoid hang when analyzer worker returns early
 (#6667)

Adds a call to set_active(false) on the output stream when an analyzer
worker thread returns early due to an error. This frees up the current
input and avoids a hang in the scheduler after the error is printed.

Adds a unit test which hangs without this fix.

Fixes #6666
---
 clients/drcachesim/analyzer.cpp               |  50 ++++++--
 clients/drcachesim/analyzer.h                 |   7 +-
 .../drcachesim/tests/analysis_unit_tests.cpp  | 117 +++++++++++++++++-
 3 files changed, 159 insertions(+), 15 deletions(-)

diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
index 7829a11ee9b..12916f1e3b0 100644
--- a/clients/drcachesim/analyzer.cpp
+++ b/clients/drcachesim/analyzer.cpp
@@ -317,6 +317,7 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler_common(
         worker_count_ = 1;
         output_count = 1;
     }
+    sched_mapping_ = options.mapping;
     if (scheduler_.init(sched_inputs, output_count, std::move(sched_ops)) !=
         sched_type_t::STATUS_SUCCESS) {
         ERRMSG("Failed to initialize scheduler: %s\n",
@@ -585,8 +586,9 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_shard_exit(
 }
 
 template <typename RecordType, typename ReaderType>
-void
-analyzer_tmpl_t<RecordType, ReaderType>::process_tasks(analyzer_worker_data_t *worker)
+bool
+analyzer_tmpl_t<RecordType, ReaderType>::process_tasks_internal(
+    analyzer_worker_data_t *worker)
 {
     std::vector<void *> user_worker_data(num_tools_);
 
@@ -622,7 +624,7 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_tasks(analyzer_worker_data_t *w
                 worker->error =
                     "Failed to read from trace: " + worker->stream->get_stream_name();
             }
-            return;
+            return false;
         }
         int shard_index = shard_type_ == SHARD_BY_CORE
             ? worker->index
@@ -655,7 +657,7 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_tasks(analyzer_worker_data_t *w
                                 record_is_instr(record)) &&
             !process_interval(prev_interval_index, prev_interval_init_instr_count, worker,
                               /*parallel=*/true, record_is_instr(record), shard_index)) {
-            return;
+            return false;
         }
         for (int i = 0; i < num_tools_; ++i) {
             if (!tools_[i]->parallel_shard_memref(
@@ -665,24 +667,27 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_tasks(analyzer_worker_data_t *w
                 VPRINT(this, 1, "Worker %d hit shard memref error %s on trace shard %s\n",
                        worker->index, worker->error.c_str(),
                        worker->stream->get_stream_name().c_str());
-                return;
+                return false;
             }
         }
         if (record_is_thread_final(record) && shard_type_ != SHARD_BY_CORE) {
-            if (!process_shard_exit(worker, shard_index))
-                return;
+            if (!process_shard_exit(worker, shard_index)) {
+                return false;
+            }
         }
     }
     if (shard_type_ == SHARD_BY_CORE) {
         if (worker->shard_data.find(worker->index) != worker->shard_data.end()) {
-            if (!process_shard_exit(worker, worker->index))
-                return;
+            if (!process_shard_exit(worker, worker->index)) {
+                return false;
+            }
         }
     }
     for (const auto &keyval : worker->shard_data) {
         if (!keyval.second.exited) {
-            if (!process_shard_exit(worker, keyval.second.shard_index))
-                return;
+            if (!process_shard_exit(worker, keyval.second.shard_index)) {
+                return false;
+            }
         }
     }
     for (int i = 0; i < num_tools_; ++i) {
@@ -691,7 +696,28 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_tasks(analyzer_worker_data_t *w
             worker->error = error;
             VPRINT(this, 1, "Worker %d hit worker exit error %s\n", worker->index,
                    error.c_str());
-            return;
+            return false;
+        }
+    }
+    return true;
+}
+
+template <typename RecordType, typename ReaderType>
+void
+analyzer_tmpl_t<RecordType, ReaderType>::process_tasks(analyzer_worker_data_t *worker)
+{
+    if (!process_tasks_internal(worker)) {
+        if (sched_mapping_ == sched_type_t::MAP_TO_ANY_OUTPUT) {
+            // Avoid a hang in the scheduler if we leave our current input stranded.
+            // XXX: Better to just do a global exit and not let the other threads
+            // keep running?  That breaks the current model where errors are
+            // propagated to the user to decide what to do.
+            // We could perhaps add thread synch points to have other threads
+            // exit earlier: but maybe some uses cases consider one shard error
+            // to not affect others and not be fatal?
+            if (worker->stream->set_active(false) != sched_type_t::STATUS_OK) {
+                ERRMSG("Failed to set failing worker to inactive; may hang");
+            }
         }
     }
 }
diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h
index 87857de7b7b..964159c6aee 100644
--- a/clients/drcachesim/analyzer.h
+++ b/clients/drcachesim/analyzer.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2016-2023 Google, Inc.  All rights reserved.
+ * Copyright (c) 2016-2024 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -239,6 +239,10 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
     void
     process_serial(analyzer_worker_data_t &worker);
 
+    // Helper for process_tasks().
+    bool
+    process_tasks_internal(analyzer_worker_data_t *worker);
+
     // Helper for process_tasks() which calls parallel_shard_exit() in each tool.
     // Returns false if there was an error and the caller should return early.
     bool
@@ -421,6 +425,7 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
     int verbosity_ = 0;
     shard_type_t shard_type_ = SHARD_BY_THREAD;
     bool sched_by_time_ = false;
+    typename sched_type_t::mapping_t sched_mapping_ = sched_type_t::MAP_TO_ANY_OUTPUT;
 
 private:
     bool
diff --git a/clients/drcachesim/tests/analysis_unit_tests.cpp b/clients/drcachesim/tests/analysis_unit_tests.cpp
index aefb6e78b5d..1329cd217c9 100644
--- a/clients/drcachesim/tests/analysis_unit_tests.cpp
+++ b/clients/drcachesim/tests/analysis_unit_tests.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2023 Google, Inc.  All rights reserved.
+ * Copyright (c) 2023-2024 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -76,6 +76,7 @@ class mock_analyzer_t : public analyzer_t {
             sched_ops = scheduler_t::make_scheduler_parallel_options(verbosity_);
         else
             sched_ops = scheduler_t::make_scheduler_serial_options(verbosity_);
+        sched_mapping_ = sched_ops.mapping;
         if (scheduler_.init(sched_inputs, worker_count_, std::move(sched_ops)) !=
             sched_type_t::STATUS_SUCCESS) {
             assert(false);
@@ -366,10 +367,122 @@ test_wait_records()
     return true;
 }
 
+bool
+test_tool_errors()
+{
+    // Tool errors can hang the analyzer if it doesn't tell the scheduler
+    // it's giving up on its input.  We test that here.
+    std::cerr << "\n----------------\nTesting tool errors\n";
+
+    static constexpr int NUM_INPUTS = 5;
+    static constexpr int NUM_OUTPUTS = 2;
+    static constexpr int NUM_INSTRS = 9;
+    static constexpr memref_tid_t TID_BASE = 100;
+    std::vector<trace_entry_t> inputs[NUM_INPUTS];
+    for (int i = 0; i < NUM_INPUTS; i++) {
+        memref_tid_t tid = TID_BASE + i;
+        inputs[i].push_back(make_thread(tid));
+        inputs[i].push_back(make_pid(1));
+        for (int j = 0; j < NUM_INSTRS; j++)
+            inputs[i].push_back(make_instr(42 + j * 4));
+        if (i == 4) {
+            // This one input will trigger an error in our error_tool_t.
+            inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_CPU_ID, 4));
+        }
+        inputs[i].push_back(make_exit(tid));
+    }
+
+    std::vector<scheduler_t::input_workload_t> sched_inputs;
+    for (int i = 0; i < NUM_INPUTS; i++) {
+        memref_tid_t tid = TID_BASE + i;
+        std::vector<scheduler_t::input_reader_t> readers;
+        readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(inputs[i])),
+                             std::unique_ptr<mock_reader_t>(new mock_reader_t()), tid);
+        sched_inputs.emplace_back(std::move(readers));
+    }
+    scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
+                                               scheduler_t::DEPENDENCY_IGNORE,
+                                               scheduler_t::SCHEDULER_DEFAULTS,
+                                               /*verbosity=*/1);
+
+    static const char *const TOOL_ERROR_STRING = "cpuid not supported";
+
+    class error_tool_t : public analysis_tool_t {
+    public:
+        bool
+        process_memref(const memref_t &memref) override
+        {
+            assert(false); // Only expect parallel mode.
+            return false;
+        }
+        bool
+        print_results() override
+        {
+            return true;
+        }
+        bool
+        parallel_shard_supported() override
+        {
+            return true;
+        }
+        void *
+        parallel_shard_init_stream(int shard_index, void *worker_data,
+                                   memtrace_stream_t *stream) override
+        {
+            auto per_shard = new per_shard_t;
+            return reinterpret_cast<void *>(per_shard);
+        }
+        bool
+        parallel_shard_exit(void *shard_data) override
+        {
+            per_shard_t *shard = reinterpret_cast<per_shard_t *>(shard_data);
+            delete shard;
+            return true;
+        }
+        std::string
+        parallel_shard_error(void *shard_data) override
+        {
+            per_shard_t *shard = reinterpret_cast<per_shard_t *>(shard_data);
+            return shard->error;
+        }
+        bool
+        parallel_shard_memref(void *shard_data, const memref_t &memref) override
+        {
+            per_shard_t *shard = reinterpret_cast<per_shard_t *>(shard_data);
+            // Return an error in one of the inputs.
+            if (memref.marker.type == TRACE_TYPE_MARKER &&
+                memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID) {
+                shard->error = TOOL_ERROR_STRING;
+                return false;
+            }
+            return true;
+        }
+
+    private:
+        struct per_shard_t {
+            std::string error;
+        };
+    };
+
+    std::vector<analysis_tool_t *> tools;
+    auto test_tool = std::unique_ptr<error_tool_t>(new error_tool_t);
+    tools.push_back(test_tool.get());
+    mock_analyzer_t analyzer(sched_inputs, &tools[0], (int)tools.size(),
+                             /*parallel=*/true, NUM_OUTPUTS, &sched_ops);
+    assert(!!analyzer);
+    // If the analyzer doesn't give up the input in the output stream that
+    // encounters it, the scheduler will hang waiting for that input,
+    // so failure in this test would be a CTest timeout.
+    bool res = analyzer.run();
+    assert(!res);
+    assert(analyzer.get_error_string() == TOOL_ERROR_STRING);
+    return true;
+}
+
 int
 test_main(int argc, const char *argv[])
 {
-    if (!test_queries() || !test_wait_records())
+    if (!test_queries() || !test_wait_records() || !test_tool_errors())
         return 1;
     std::cerr << "All done!\n";
     return 0;

From 4bf1163916d6058eb707d23e7dd5d9f26607a345 Mon Sep 17 00:00:00 2001
From: Abhinav Anil Sharma <sharmaabhinav@google.com>
Date: Thu, 22 Feb 2024 09:06:52 -0500
Subject: [PATCH 5/5] i#6643: Mark interval_state_snapshot_t data members
 private (#6665)

Marks the data members of the base interval_state_snapshot_t as private.
This is to formally disallow analysis tools that subclass this from
modifying the data members such as shard_id, interval_end_timestamp,
etc., some of which are essential to the interval merge logic
implemented by the analyzer framework.

The analyzer framework implementation in analyzer_tmpl_t is marked as a
friend class so that it can still set those private data members.

Converted interval_state_snapshot_t from a struct to a class to follow
our style guide.

Issue: #6643, #6020
---
 api/docs/release.dox                          |  5 ++
 clients/drcachesim/analysis_tool.h            | 68 ++++++++++++++-----
 clients/drcachesim/analyzer.cpp               | 34 +++++-----
 .../trace_interval_analysis_unit_tests.cpp    | 27 ++++----
 clients/drcachesim/tools/basic_counts.cpp     | 19 +++---
 5 files changed, 99 insertions(+), 54 deletions(-)

diff --git a/api/docs/release.dox b/api/docs/release.dox
index 2d4b4414caa..d82ca7bf109 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -203,6 +203,11 @@ Further non-compatibility-affecting changes include:
  - Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker to indicate the
    current vector length for architectures with a hardware defined or runtime changeable
    vector length (such as AArch64's SVE scalable vectors).
+ - Renamed a protected data member in #dynamorio::drmemtrace::analyzer_tmpl_t from
+   merged_interval_snapshots_ to whole_trace_interval_snapshots_ (may be relevant for
+   users sub-classing analyzer_tmpl_t).
+ - Converted #dynamorio::drmemtrace::analysis_tool_tmpl_t::interval_state_snapshot_t
+   into a class with all its data members marked private with public accessor functions.
 
 **************************************************
 <hr>
diff --git a/clients/drcachesim/analysis_tool.h b/clients/drcachesim/analysis_tool.h
index 0c4a03534da..a724d06a3af 100644
--- a/clients/drcachesim/analysis_tool.h
+++ b/clients/drcachesim/analysis_tool.h
@@ -196,7 +196,16 @@ template <typename RecordType> class analysis_tool_tmpl_t {
      * default constructor. The members of this base class will be set by the
      * framework automatically.
      */
-    struct interval_state_snapshot_t {
+    class interval_state_snapshot_t {
+        // Allow the analyzer framework access to private data members to set them
+        // during trace interval analysis. Tools have read-only access via the public
+        // accessor functions.
+        // Note that we expect X to be same as RecordType. But friend declarations
+        // cannot refer to partial specializations so we go with the separate template
+        // parameter X.
+        template <typename X, typename Y> friend class analyzer_tmpl_t;
+
+    public:
         // This constructor is only for convenience in unit tests. The tool does not
         // need to provide these values, and can simply use the default constructor
         // below.
@@ -204,43 +213,70 @@ template <typename RecordType> class analysis_tool_tmpl_t {
                                   uint64_t interval_end_timestamp,
                                   uint64_t instr_count_cumulative,
                                   uint64_t instr_count_delta)
-            : shard_id(shard_id)
-            , interval_id(interval_id)
-            , interval_end_timestamp(interval_end_timestamp)
-            , instr_count_cumulative(instr_count_cumulative)
-            , instr_count_delta(instr_count_delta)
+            : shard_id_(shard_id)
+            , interval_id_(interval_id)
+            , interval_end_timestamp_(interval_end_timestamp)
+            , instr_count_cumulative_(instr_count_cumulative)
+            , instr_count_delta_(instr_count_delta)
         {
         }
         interval_state_snapshot_t()
         {
         }
+        virtual ~interval_state_snapshot_t() = default;
 
+        int64_t
+        get_shard_id() const
+        {
+            return shard_id_;
+        }
+        uint64_t
+        get_interval_id() const
+        {
+            return interval_id_;
+        }
+        uint64_t
+        get_interval_end_timestamp() const
+        {
+            return interval_end_timestamp_;
+        }
+        uint64_t
+        get_instr_count_cumulative() const
+        {
+            return instr_count_cumulative_;
+        }
+        uint64_t
+        get_instr_count_delta() const
+        {
+            return instr_count_delta_;
+        }
+
+        static constexpr int64_t WHOLE_TRACE_SHARD_ID = -1;
+
+    private:
         // The following fields are set automatically by the analyzer framework after
         // the tool returns the interval_state_snapshot_t* in the
         // generate_*interval_snapshot APIs. So they'll be available to the tool in
-        // the combine_interval_snapshots and print_interval_results APIs.
+        // the combine_interval_snapshots (for the parameter snapshots) and
+        // print_interval_results APIs via the above public accessor functions.
 
         // Identifier for the shard to which this interval belongs. Currently, shards
         // map only to threads, so this is the thread id. Set to WHOLE_TRACE_SHARD_ID
         // for the whole trace interval snapshots.
-        int64_t shard_id = 0;
-        uint64_t interval_id = 0;
+        int64_t shard_id_ = 0;
+        uint64_t interval_id_ = 0;
         // Stores the timestamp (exclusive) when the above interval ends. Note
         // that this is not the last timestamp actually seen in the trace interval,
         // but simply the abstract boundary of the interval. This will be aligned
         // to the specified -interval_microseconds.
-        uint64_t interval_end_timestamp = 0;
+        uint64_t interval_end_timestamp_ = 0;
 
         // Count of instructions: cumulative till this interval's end, and the
         // incremental delta in this interval vs the previous one. May be useful for
         // tools to compute PKI (per kilo instruction) metrics; obviates the need for
         // each tool to duplicate this.
-        uint64_t instr_count_cumulative = 0;
-        uint64_t instr_count_delta = 0;
-
-        static constexpr int64_t WHOLE_TRACE_SHARD_ID = -1;
-
-        virtual ~interval_state_snapshot_t() = default;
+        uint64_t instr_count_cumulative_ = 0;
+        uint64_t instr_count_delta_ = 0;
     };
     /**
      * Notifies the analysis tool that the given trace \p interval_id has ended so
diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
index 12916f1e3b0..f0425eb586c 100644
--- a/clients/drcachesim/analyzer.cpp
+++ b/clients/drcachesim/analyzer.cpp
@@ -738,17 +738,17 @@ analyzer_tmpl_t<RecordType, ReaderType>::combine_interval_snapshots(
             tools_[tool_idx]->get_error_string();
         return false;
     }
-    result->instr_count_delta = 0;
-    result->instr_count_cumulative = 0;
+    result->instr_count_delta_ = 0;
+    result->instr_count_cumulative_ = 0;
     for (auto snapshot : latest_shard_snapshots) {
         if (snapshot == nullptr)
             continue;
         // As discussed in the doc for analysis_tool_t::combine_interval_snapshots,
         // we combine all shard's latest snapshots for cumulative metrics, whereas
         // we combine only the shards active in current interval for delta metrics.
-        result->instr_count_cumulative += snapshot->instr_count_cumulative;
-        if (snapshot->interval_end_timestamp == interval_end_timestamp)
-            result->instr_count_delta += snapshot->instr_count_delta;
+        result->instr_count_cumulative_ += snapshot->instr_count_cumulative_;
+        if (snapshot->interval_end_timestamp_ == interval_end_timestamp)
+            result->instr_count_delta_ += snapshot->instr_count_delta_;
     }
     return true;
 }
@@ -787,7 +787,7 @@ analyzer_tmpl_t<RecordType, ReaderType>::merge_shard_interval_results(
                 continue;
             earliest_interval_end_timestamp =
                 std::min(earliest_interval_end_timestamp,
-                         intervals[shard_idx].front()->interval_end_timestamp);
+                         intervals[shard_idx].front()->interval_end_timestamp_);
         }
         // We're done if no shard has any interval left unprocessed.
         if (earliest_interval_end_timestamp == std::numeric_limits<uint64_t>::max()) {
@@ -805,7 +805,7 @@ analyzer_tmpl_t<RecordType, ReaderType>::merge_shard_interval_results(
             if (intervals[shard_idx].empty())
                 continue;
             uint64_t cur_interval_end_timestamp =
-                intervals[shard_idx].front()->interval_end_timestamp;
+                intervals[shard_idx].front()->interval_end_timestamp_;
             assert(cur_interval_end_timestamp >= earliest_interval_end_timestamp);
             if (cur_interval_end_timestamp > earliest_interval_end_timestamp)
                 continue;
@@ -836,10 +836,10 @@ analyzer_tmpl_t<RecordType, ReaderType>::merge_shard_interval_results(
                                         cur_merged_interval))
             return false;
         // Add the merged interval to the result list of whole trace intervals.
-        cur_merged_interval->shard_id = analysis_tool_tmpl_t<
+        cur_merged_interval->shard_id_ = analysis_tool_tmpl_t<
             RecordType>::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID;
-        cur_merged_interval->interval_end_timestamp = earliest_interval_end_timestamp;
-        cur_merged_interval->interval_id = compute_timestamp_interval_id(
+        cur_merged_interval->interval_end_timestamp_ = earliest_interval_end_timestamp;
+        cur_merged_interval->interval_id_ = compute_timestamp_interval_id(
             earliest_ever_interval_end_timestamp, earliest_interval_end_timestamp);
         merged_intervals.push_back(cur_merged_interval);
     }
@@ -1099,28 +1099,28 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_interval(
             return false;
         }
         if (snapshot != nullptr) {
-            snapshot->shard_id = parallel
+            snapshot->shard_id_ = parallel
                 ? worker->shard_data[shard_idx].shard_id
                 : analysis_tool_tmpl_t<
                       RecordType>::interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID;
-            snapshot->interval_id = interval_id;
+            snapshot->interval_id_ = interval_id;
             if (interval_microseconds_ > 0) {
                 // For timestamp intervals, the interval_end_timestamp is the abstract
                 // non-inclusive end timestamp for the interval_id. This is to make it
                 // easier to line up the corresponding shard interval snapshots so that
                 // we can merge them to form the whole-trace interval snapshots.
-                snapshot->interval_end_timestamp = compute_interval_end_timestamp(
+                snapshot->interval_end_timestamp_ = compute_interval_end_timestamp(
                     worker->stream->get_first_timestamp(), interval_id);
             } else {
-                snapshot->interval_end_timestamp = worker->stream->get_last_timestamp();
+                snapshot->interval_end_timestamp_ = worker->stream->get_last_timestamp();
             }
             // instr_count_cumulative for the interval snapshot is supposed to be
             // inclusive, so if the first record after the interval (that is, the record
             // we're at right now) is an instr, it must be subtracted.
-            snapshot->instr_count_cumulative =
+            snapshot->instr_count_cumulative_ =
                 worker->stream->get_instruction_ordinal() - (at_instr_record ? 1 : 0);
-            snapshot->instr_count_delta =
-                snapshot->instr_count_cumulative - interval_init_instr_count;
+            snapshot->instr_count_delta_ =
+                snapshot->instr_count_cumulative_ - interval_init_instr_count;
             worker->shard_data[shard_idx].tool_data[tool_idx].interval_snapshot_data.push(
                 snapshot);
         }
diff --git a/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp b/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp
index 533c5beeb00..ec5546f32e5 100644
--- a/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp
+++ b/clients/drcachesim/tests/trace_interval_analysis_unit_tests.cpp
@@ -328,21 +328,23 @@ struct recorded_snapshot_t : public analysis_tool_t::interval_state_snapshot_t {
     bool
     operator==(const recorded_snapshot_t &rhs) const
     {
-        return shard_id == rhs.shard_id && tool_shard_id == rhs.tool_shard_id &&
-            interval_id == rhs.interval_id &&
-            interval_end_timestamp == rhs.interval_end_timestamp &&
-            instr_count_cumulative == rhs.instr_count_cumulative &&
-            instr_count_delta == rhs.instr_count_delta &&
+        return get_shard_id() == rhs.get_shard_id() &&
+            tool_shard_id == rhs.tool_shard_id &&
+            get_interval_id() == rhs.get_interval_id() &&
+            get_interval_end_timestamp() == rhs.get_interval_end_timestamp() &&
+            get_instr_count_cumulative() == rhs.get_instr_count_cumulative() &&
+            get_instr_count_delta() == rhs.get_instr_count_delta() &&
             component_intervals == rhs.component_intervals;
     }
     void
     print() const
     {
-        std::cerr << "(shard_id: " << shard_id << ", interval_id: " << interval_id
+        std::cerr << "(shard_id: " << get_shard_id()
+                  << ", interval_id: " << get_interval_id()
                   << ", tool_shard_id: " << tool_shard_id
-                  << ", end_timestamp: " << interval_end_timestamp
-                  << ", instr_count_cumulative: " << instr_count_cumulative
-                  << ", instr_count_delta: " << instr_count_delta
+                  << ", end_timestamp: " << get_interval_end_timestamp()
+                  << ", instr_count_cumulative: " << get_instr_count_cumulative()
+                  << ", instr_count_delta: " << get_instr_count_delta()
                   << ", component_intervals: ";
         for (const auto &s : component_intervals) {
             std::cerr << "(tid:" << s.tid << ", seen_memrefs:" << s.seen_memrefs
@@ -473,14 +475,15 @@ class test_analysis_tool_t : public analysis_tool_t {
         for (auto snapshot : latest_shard_snapshots) {
             if (snapshot != nullptr &&
                 (!combine_only_active_shards_ ||
-                 snapshot->interval_end_timestamp == interval_end_timestamp)) {
+                 snapshot->get_interval_end_timestamp() == interval_end_timestamp)) {
                 auto recorded_snapshot =
                     dynamic_cast<const recorded_snapshot_t *const>(snapshot);
-                if (recorded_snapshot->tool_shard_id != recorded_snapshot->shard_id) {
+                if (recorded_snapshot->tool_shard_id !=
+                    recorded_snapshot->get_shard_id()) {
                     FATAL_ERROR("shard_id stored by tool (%" PRIi64
                                 ") and framework (%" PRIi64 ") mismatch",
                                 recorded_snapshot->tool_shard_id,
-                                recorded_snapshot->shard_id);
+                                recorded_snapshot->get_shard_id());
                     return nullptr;
                 }
                 result->component_intervals.insert(
diff --git a/clients/drcachesim/tools/basic_counts.cpp b/clients/drcachesim/tools/basic_counts.cpp
index e340425b9a3..a0b2ab886cf 100644
--- a/clients/drcachesim/tools/basic_counts.cpp
+++ b/clients/drcachesim/tools/basic_counts.cpp
@@ -451,37 +451,38 @@ basic_counts_t::print_interval_results(
 {
     std::cerr << "Counts per trace interval for ";
     if (!interval_snapshots.empty() &&
-        interval_snapshots[0]->shard_id !=
+        interval_snapshots[0]->get_shard_id() !=
             interval_state_snapshot_t::WHOLE_TRACE_SHARD_ID) {
-        std::cerr << "TID " << interval_snapshots[0]->shard_id << ":\n";
+        std::cerr << "TID " << interval_snapshots[0]->get_shard_id() << ":\n";
     } else {
         std::cerr << "whole trace:\n";
     }
     counters_t last;
     for (const auto &snapshot_base : interval_snapshots) {
         auto *snapshot = dynamic_cast<count_snapshot_t *>(snapshot_base);
-        std::cerr << "Interval #" << snapshot->interval_id << " ending at timestamp "
-                  << snapshot->interval_end_timestamp << ":\n";
+        std::cerr << "Interval #" << snapshot->get_interval_id()
+                  << " ending at timestamp " << snapshot->get_interval_end_timestamp()
+                  << ":\n";
         counters_t diff = snapshot->counters;
         diff -= last;
         print_counters(diff, " interval delta");
         last = snapshot->counters;
         if (knob_verbose_ > 0) {
-            if (snapshot->instr_count_cumulative !=
+            if (snapshot->get_instr_count_cumulative() !=
                 static_cast<uint64_t>(snapshot->counters.instrs)) {
                 std::stringstream err_stream;
                 err_stream << "Cumulative instr count value provided by framework ("
-                           << snapshot->instr_count_cumulative
+                           << snapshot->get_instr_count_cumulative()
                            << ") not equal to tool value (" << snapshot->counters.instrs
                            << ")\n";
                 error_string_ = err_stream.str();
                 return false;
             }
-            if (snapshot->instr_count_delta != static_cast<uint64_t>(diff.instrs)) {
+            if (snapshot->get_instr_count_delta() != static_cast<uint64_t>(diff.instrs)) {
                 std::stringstream err_stream;
                 err_stream << "Delta instr count value provided by framework ("
-                           << snapshot->instr_count_delta << ") not equal to tool value ("
-                           << diff.instrs << ")\n";
+                           << snapshot->get_instr_count_delta()
+                           << ") not equal to tool value (" << diff.instrs << ")\n";
                 error_string_ = err_stream.str();
                 return false;
             }