i#6685: Add core-sharded-on-disk analysis support (#6680)

Add core-sharded-on-disk analysis support: Adds a new filetype OFFLINE_FILE_TYPE_CORE_SHARDED set by record_filter when core-sharded. Has the scheduler read ahead to the filetype in every input, and arrange to make it available at init time to output streams. The analyzer looks for the filetype and sets the shard type as core-sharded if set. This is different from a dynamic-scheduling core-sharded as we do not need a 1:1 shard:workers mapping. Allows header records in reader_t::process_input_entry(), which are now expected with core-sharded-on-disk traces. Adds core-sharded reader support for seeing only portions of some threads, where we have to relax some tid and pid checks. Checks in 6 core-sharded-on-disk x64 threadsig traces for use in a test of basic_counts analyzing them. That analysis fails without all of these framework changes. The changes to the record filter to support generating those traces in the first place will come in separately. Issue: #6685, #6635 Fixes #6685
DynamoRIO · Feb 29, 2024 · 8cec618 · 8cec618
1 parent 4ce6943
commit 8cec618
Show file tree

Hide file tree

Showing 18 changed files with 288 additions and 41 deletions.
diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
@@ -308,11 +308,13 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler_common(
         }
     } else if (parallel_) {
         sched_ops = sched_type_t::make_scheduler_parallel_options(verbosity_);
+        sched_ops.read_inputs_in_init = options.read_inputs_in_init;
         if (worker_count_ <= 0)
             worker_count_ = std::thread::hardware_concurrency();
         output_count = worker_count_;
     } else {
         sched_ops = sched_type_t::make_scheduler_serial_options(verbosity_);
+        sched_ops.read_inputs_in_init = options.read_inputs_in_init;
         worker_count_ = 1;
         output_count = 1;
     }
@@ -326,6 +328,14 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler_common(
 
     for (int i = 0; i < worker_count_; ++i) {
         worker_data_.push_back(analyzer_worker_data_t(i, scheduler_.get_stream(i)));
+        if (options.read_inputs_in_init) {
+            // The docs say we can query the filetype up front.
+            uint64_t filetype = scheduler_.get_stream(i)->get_filetype();
+            VPRINT(this, 2, "Worker %d filetype %" PRIx64 "\n", i, filetype);
+            if (TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, filetype)) {
+                shard_type_ = SHARD_BY_CORE;
+            }
+        }
     }
 
     return true;
@@ -628,9 +638,7 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_tasks_internal(
             }
             return false;
         }
-        int shard_index = shard_type_ == SHARD_BY_CORE
-            ? worker->index
-            : worker->stream->get_input_stream_ordinal();
+        int shard_index = worker->stream->get_shard_index();
         if (worker->shard_data.find(shard_index) == worker->shard_data.end()) {
             VPRINT(this, 1, "Worker %d starting on trace shard %d stream is %p\n",
                    worker->index, shard_index, worker->stream);

diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h
@@ -223,6 +223,8 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
 
     // For core-sharded, worker_count_ must be set prior to calling this; for parallel
     // mode if it is not set it will be set to the underlying core count.
+    // For core-sharded, all of "options" is used; otherwise, only the
+    // read_inputs_in_init field is preserved.
     bool
     init_scheduler(std::unique_ptr<ReaderType> reader,
                    std::unique_ptr<ReaderType> reader_end, int verbosity,

diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
@@ -455,6 +455,8 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
             return;
         }
         auto end = create_ipc_reader_end();
+        // We do not want the scheduler's init() to block.
+        sched_ops.read_inputs_in_init = false;
         if (!this->init_scheduler(std::move(reader), std::move(end),
                                   op_verbose.get_value(), std::move(sched_ops))) {
             this->success_ = false;

diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2015-2023 Google, Inc.  All rights reserved.
+ * Copyright (c) 2015-2024 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -963,6 +963,10 @@ typedef enum {
      * execution.
      */
     OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY = 0x8000,
+    /**
+     * Each trace shard represents one core and contains interleaved software threads.
+     */
+    OFFLINE_FILE_TYPE_CORE_SHARDED = 0x10000,
 } offline_file_type_t;
 
 static inline const char *

diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2016-2023 Google, Inc.  All rights reserved.
+ * Copyright (c) 2016-2024 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -135,7 +135,7 @@ reader_t::process_input_entry()
     case TRACE_TYPE_PREFETCH_WRITE_L3:
     case TRACE_TYPE_PREFETCH_WRITE_L3_NT:
         have_memref = true;
-        assert(cur_tid_ != 0 && cur_pid_ != 0);
+        assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_);
         cur_ref_.data.pid = cur_pid_;
         cur_ref_.data.tid = cur_tid_;
         cur_ref_.data.type = (trace_type_t)input_entry_->type;
@@ -177,7 +177,7 @@ reader_t::process_input_entry()
     case TRACE_TYPE_INSTR_RETURN:
     case TRACE_TYPE_INSTR_SYSENTER:
     case TRACE_TYPE_INSTR_NO_FETCH:
-        assert(cur_tid_ != 0 && cur_pid_ != 0);
+        assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_);
         if (input_entry_->size == 0) {
             // Just an entry to tell us the PC of the subsequent memref,
             // used with -L0_filter where we don't reliably have icache
@@ -250,7 +250,7 @@ reader_t::process_input_entry()
         break;
     case TRACE_TYPE_INSTR_FLUSH:
     case TRACE_TYPE_DATA_FLUSH:
-        assert(cur_tid_ != 0 && cur_pid_ != 0);
+        assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_);
         cur_ref_.flush.pid = cur_pid_;
         cur_ref_.flush.tid = cur_tid_;
         cur_ref_.flush.type = (trace_type_t)input_entry_->type;
@@ -274,7 +274,7 @@ reader_t::process_input_entry()
     case TRACE_TYPE_THREAD_EXIT:
         cur_tid_ = (memref_tid_t)input_entry_->addr;
         cur_pid_ = tid2pid_[cur_tid_];
-        assert(cur_tid_ != 0 && cur_pid_ != 0);
+        assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_);
         // We do pass this to the caller but only some fields are valid:
         cur_ref_.exit.pid = cur_pid_;
         cur_ref_.exit.tid = cur_tid_;
@@ -288,7 +288,7 @@ reader_t::process_input_entry()
         break;
     case TRACE_TYPE_MARKER:
         cur_ref_.marker.type = (trace_type_t)input_entry_->type;
-        assert(cur_tid_ != 0 && cur_pid_ != 0);
+        assert((cur_tid_ != 0 && cur_pid_ != 0) || core_sharded_);
         cur_ref_.marker.pid = cur_pid_;
         cur_ref_.marker.tid = cur_tid_;
         cur_ref_.marker.marker_type = (trace_marker_type_t)input_entry_->size;
@@ -330,6 +330,9 @@ reader_t::process_input_entry()
             if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) {
                 expect_no_encodings_ = false;
             }
+            if (TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, filetype_)) {
+                core_sharded_ = true;
+            }
         } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE)
             cache_line_size_ = cur_ref_.marker.marker_value;
         else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_PAGE_SIZE)
@@ -346,6 +349,14 @@ reader_t::process_input_entry()
             in_kernel_trace_ = false;
         }
         break;
+    case TRACE_TYPE_HEADER:
+        // We support complete traces being packaged in archives and then read
+        // sequentially, or core-sharded record_filter operation.
+        // We just keep going past the header.
+        VPRINT(
+            this, 2,
+            "Assuming header is part of concatenated or on-disk-core-sharded traces\n");
+        break;
     default:
         ERRMSG("Unknown trace entry type %s (%d)\n", trace_type_names[input_entry_->type],
                input_entry_->type);

diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2015-2023 Google, Inc.  All rights reserved.
+ * Copyright (c) 2015-2024 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -272,6 +272,7 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
     };
 
     std::unordered_map<addr_t, encoding_info_t> encodings_;
+    bool core_sharded_ = false;
 
 private:
     memref_t cur_ref_;