Skip to content

Commit

Permalink
i#5036 A64 scatter/gather, part 6: Expand multi-register instrs (#6403)
Browse files Browse the repository at this point in the history
Adds support to drx_expand_scatter_gather() for SVE multi-register
scalar+scalar and scalar+immediate predicated contiguous load and store
instructions, along with tests.

Issue: #5036

---------

Co-authored-by: Assad Hashmi <[email protected]>
  • Loading branch information
jackgallagher-arm and AssadHashmi authored Nov 21, 2023
1 parent b9bb6d0 commit b42815f
Show file tree
Hide file tree
Showing 6 changed files with 2,611 additions and 677 deletions.
60 changes: 60 additions & 0 deletions clients/drcachesim/tests/scattergather-aarch64.templatex
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,18 @@ ldnt1w scalar\+scalar: PASS
ld1sw scalar\+scalar: PASS
ld1d scalar\+scalar: PASS
ldnt1d scalar\+scalar: PASS
ld2b scalar\+scalar: PASS
ld2h scalar\+scalar: PASS
ld2w scalar\+scalar: PASS
ld2d scalar\+scalar: PASS
ld3b scalar\+scalar: PASS
ld3h scalar\+scalar: PASS
ld3w scalar\+scalar: PASS
ld3d scalar\+scalar: PASS
ld4b scalar\+scalar: PASS
ld4h scalar\+scalar: PASS
ld4w scalar\+scalar: PASS
ld4d scalar\+scalar: PASS
st1b scalar\+scalar 8bit element: PASS
st1b scalar\+scalar 16bit element: PASS
st1b scalar\+scalar 32bit element: PASS
Expand All @@ -154,6 +166,18 @@ st1h scalar\+scalar 64bit element: PASS
st1w scalar\+scalar 32bit element: PASS
st1w scalar\+scalar 64bit element: PASS
st1d scalar\+scalar: PASS
st2b scalar\+scalar: PASS
st2h scalar\+scalar: PASS
st2w scalar\+scalar: PASS
st2d scalar\+scalar: PASS
st3b scalar\+scalar: PASS
st3h scalar\+scalar: PASS
st3w scalar\+scalar: PASS
st3d scalar\+scalar: PASS
st4b scalar\+scalar: PASS
st4h scalar\+scalar: PASS
st4w scalar\+scalar: PASS
st4d scalar\+scalar: PASS
ld1b scalar\+immediate 8bit element: PASS
ld1b scalar\+immediate 16bit element: PASS
ld1b scalar\+immediate 32bit element: PASS
Expand Down Expand Up @@ -188,6 +212,24 @@ ld1d scalar\+immediate 64bit element: PASS
ld1d scalar\+immediate 64bit element \(min index\): PASS
ld1d scalar\+immediate 64bit element \(max index\): PASS
ldnt1d scalar\+immediate 64bit element: PASS
ld2b scalar\+immediate: PASS
ld2h scalar\+immediate: PASS
ld2w scalar\+immediate: PASS
ld2d scalar\+immediate: PASS
ld2d scalar\+immediate \(min index\): PASS
ld2d scalar\+immediate \(max index\): PASS
ld3b scalar\+immediate: PASS
ld3h scalar\+immediate: PASS
ld3w scalar\+immediate: PASS
ld3d scalar\+immediate: PASS
ld3d scalar\+immediate \(min index\): PASS
ld3d scalar\+immediate \(max index\): PASS
ld4b scalar\+immediate: PASS
ld4h scalar\+immediate: PASS
ld4w scalar\+immediate: PASS
ld4d scalar\+immediate: PASS
ld4d scalar\+immediate \(min index\): PASS
ld4d scalar\+immediate \(max index\): PASS
st1b scalar\+immediate 8bit element: PASS
st1b scalar\+immediate 16bit element: PASS
st1b scalar\+immediate 32bit element: PASS
Expand All @@ -206,6 +248,24 @@ st1w scalar\+immediate 64bit element \(max index\): PASS
st1d scalar\+immediate 64bit element: PASS
st1d scalar\+immediate 64bit element \(min index\): PASS
st1d scalar\+immediate 64bit element \(max index\): PASS
st2b scalar\+immediate: PASS
st2h scalar\+immediate: PASS
st2w scalar\+immediate: PASS
st2d scalar\+immediate: PASS
st2d scalar\+immediate \(min index\): PASS
st2d scalar\+immediate \(max index\): PASS
st3b scalar\+immediate: PASS
st3h scalar\+immediate: PASS
st3w scalar\+immediate: PASS
st3d scalar\+immediate: PASS
st3d scalar\+immediate \(min index\): PASS
st3d scalar\+immediate \(max index\): PASS
st4b scalar\+immediate: PASS
st4h scalar\+immediate: PASS
st4w scalar\+immediate: PASS
st4d scalar\+immediate: PASS
st4d scalar\+immediate \(min index\): PASS
st4d scalar\+immediate \(max index\): PASS
#endif /* __ARM_FEATURE_SVE */
---- <application exited with code 0> ----
Trace invariant checks passed
9 changes: 8 additions & 1 deletion clients/drcachesim/tracer/raw2trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -932,8 +932,15 @@ class raw2trace_t {
/**
* The trace_entry_t buffer returned by get_write_buffer() is assumed to be at least
* #WRITE_BUFFER_SIZE large.
*
* WRITE_BUFFER_SIZE needs to be large enough to hold one instruction and its
* memrefs.
* Some of the AArch64 SVE scatter/gather instructions have a lot of memref entries.
* For example ld4b loads 4 registers with byte sized elements, so that is
* (vl_bits / 8) * 4
* entries. With a 512-bit vector length that is (512 / 8) * 4 = 256 memref entries.
*/
static const uint WRITE_BUFFER_SIZE = 64;
static const uint WRITE_BUFFER_SIZE = 260;

struct block_summary_t {
block_summary_t(app_pc start, int instr_count)
Expand Down
Loading

0 comments on commit b42815f

Please sign in to comment.