Skip to content

Commit

Permalink
Added #mem_ops (i.e., loads + stores) to encoding.
Browse files Browse the repository at this point in the history
  • Loading branch information
edeiana committed Mar 31, 2024
1 parent 4553779 commit 55f737c
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 39 deletions.
12 changes: 11 additions & 1 deletion core/ir/instr_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -3026,10 +3026,12 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_from)
uint num_dsts = 0;
uint original_num_dsts = (uint)instr_num_dsts(instr_from);
uint max_reg_size = 0;
uint num_stores = 0;
for (uint dst_index = 0; dst_index < original_num_dsts; ++dst_index) {
opnd_t dst_opnd = instr_get_dst(instr_from, dst_index);
uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(dst_opnd);
if (opnd_is_memory_reference(dst_opnd)) {
++num_stores;
for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) {
reg_id_t reg = opnd_get_reg_used(dst_opnd, opnd_index);
/* Map sub-registers to their containing register.
Expand Down Expand Up @@ -3065,8 +3067,11 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_from)
/* Retrieve number of register source operands from real ISA instruction.
*/
uint original_num_srcs = (uint)instr_num_srcs(instr_from);
uint num_loads = 0;
for (uint i = 0; i < original_num_srcs; ++i) {
opnd_t src_opnd = instr_get_src(instr_from, i);
if (opnd_is_memory_reference(src_opnd))
++num_loads;
uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(src_opnd);
for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) {
reg_id_t reg = opnd_get_reg_used(src_opnd, opnd_index);
Expand Down Expand Up @@ -3108,6 +3113,11 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_from)
*/
instr_set_category(instr_to, instr_get_category(instr_from));

/* Use instr_t encoding_hints to record the number of memory operations
* (i.e., loads + stores) executed by the instruction.
*/
instr_to->encoding_hints = num_stores + num_loads;

/* Get max_reg_size as opnz_size_t (takes values from OPSZ_ enum), if there are any
* operands.
*/
Expand Down Expand Up @@ -3152,7 +3162,7 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_from)
* any operands.
*/
uint num_opnd_bytes = num_opnds > 0 ? num_opnds + 1 : 0;
uint instr_length = ALIGN_FORWARD(HEADER_BYTES + num_opnd_bytes, HEADER_BYTES);
uint instr_length = ALIGN_FORWARD(HEADER_BYTES + num_opnd_bytes, ALIGN_BYTES);
instr_to->length = instr_length;

/* Set converted instruction ISA mode to be DR_ISA_REGDEPS.
Expand Down
13 changes: 9 additions & 4 deletions core/ir/synthetic/decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,22 @@ decode_from_synth(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr)
uint category = (encoding_header & CATEGORY_MASK) >> CATEGORY_SHIFT;
instr_set_category(instr, category);

/* Decode nummber of memory operations (i.e., loads + stores).
*/
byte num_mem_ops = encoded_instr[NUM_MEM_OPS_INDEX];
instr->encoding_hints = (uint)num_mem_ops;

/* Decode register operand size, if there are any operands.
*/
uint num_opnds = num_dsts + num_srcs;
opnd_size_t max_reg_size = OPSZ_NA;
if (num_opnds > 0)
max_reg_size = (opnd_size_t)encoded_instr[HEADER_BYTES];
max_reg_size = (opnd_size_t)encoded_instr[OP_SIZE_INDEX];

/* Decode register destination operands, if present.
*/
for (uint i = 0; i < num_dsts; ++i) {
reg_id_t dst = (reg_id_t)encoded_instr[i + HEADER_BYTES + 1];
reg_id_t dst = (reg_id_t)encoded_instr[i + OPND_INDEX];
opnd_t dst_opnd = opnd_create_reg((reg_id_t)dst);
opnd_set_size(&dst_opnd, max_reg_size);
instr_set_dst(instr, i, dst_opnd);
Expand All @@ -103,7 +108,7 @@ decode_from_synth(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr)
/* Decode register source operands, if present.
*/
for (uint i = 0; i < num_srcs; ++i) {
reg_id_t src = (reg_id_t)encoded_instr[i + HEADER_BYTES + 1 + num_dsts];
reg_id_t src = (reg_id_t)encoded_instr[i + OPND_INDEX + num_dsts];
opnd_t src_opnd = opnd_create_reg((reg_id_t)src);
opnd_set_size(&src_opnd, max_reg_size);
instr_set_src(instr, i, src_opnd);
Expand All @@ -118,7 +123,7 @@ decode_from_synth(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr)
* any operands.
*/
uint num_opnd_bytes = num_opnds > 0 ? num_opnds + 1 : 0;
uint instr_length = ALIGN_FORWARD(HEADER_BYTES + num_opnd_bytes, HEADER_BYTES);
uint instr_length = ALIGN_FORWARD(HEADER_BYTES + num_opnd_bytes, ALIGN_BYTES);
instr->length = instr_length;

/* At this point the synthetic instruction has been fully decoded, so we set the
Expand Down
11 changes: 8 additions & 3 deletions core/ir/synthetic/encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
*/
*((uint *)&encoded_instr[0]) = encoding_header;

/* Encode number of memory operations (i.e., loads + stores).
*/
uint num_mem_ops = instr->encoding_hints;
encoded_instr[NUM_MEM_OPS_INDEX] = (byte)num_mem_ops;

/* Encode register destination operands, if present.
*/
opnd_size_t max_opnd_size = OPSZ_NA;
Expand All @@ -90,7 +95,7 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(dst_opnd);
for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) {
reg_id_t reg = opnd_get_reg_used(dst_opnd, opnd_index);
encoded_instr[dst_index + HEADER_BYTES + 1] = (byte)reg;
encoded_instr[dst_index + OPND_INDEX] = (byte)reg;
}
}

Expand All @@ -102,7 +107,7 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(src_opnd);
for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) {
reg_id_t reg = opnd_get_reg_used(src_opnd, opnd_index);
encoded_instr[src_index + HEADER_BYTES + 1 + num_dsts] = (byte)reg;
encoded_instr[src_index + OPND_INDEX + num_dsts] = (byte)reg;
}
}

Expand All @@ -113,7 +118,7 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
CLIENT_ASSERT(
max_opnd_size != OPSZ_NA,
"instructions with register operands cannot have operand size OPSZ_NA");
encoded_instr[HEADER_BYTES] = (byte)max_opnd_size;
encoded_instr[OP_SIZE_INDEX] = (byte)max_opnd_size;
}

/* Retrieve instruction length, which includes bytes for padding to reach 4 byte
Expand Down
78 changes: 47 additions & 31 deletions core/ir/synthetic/encoding_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,23 @@
#define _SYNTHETIC_ENCODING_COMMON_H_

/**
* This synthetic ISA is a made up ISA that has the purpose of preserving register
* dependencies and giving hints on the type of operations each instruction is performing.
* This synthetic ISA (which we call DR_ISA_REGDEPS) is a made up ISA that has the purpose
* of preserving register dependencies and giving hints on the type of operations each
* instruction is performing.
* For this reason the majority of operations that would normally work on instructions
* coming from an actual ISA are not supported.
* The operations we currently support are instr_encode(), instr_encode_to_copy() to
* encode an #instr_t of an actual ISA (e.g., x86) and decode(), decode_common() to obtain
* a synthetic #instr_t from its encoding.
* Synthetic #instr_t can only return the encoded information: categories
* (from #dr_category_t) to indicate the type of operation performed (e.g., a load, a math
* operation, floating point math operation, a branch, etc.), arithmetic flags, number of
* source and destination register operands, operation size, register operand IDs,
* instruction length, and ISA mode (which is DR_ISA_REGDEPS).
* coming from an actual ISA (e.g., DR_ISA_AMD64) are not supported.
* We support instr_convert_to_isa_regdeps(), which converts an #instr_t of an actual ISA
* to a DR_ISA_REGDEPS instruction.
* Other operations we support (related to encoding and decoding of a DR_ISA_REGDEPS
* #instr_t) are: instr_encode(), instr_encode_to_copy(), decode(), decode_common().
* DR_ISA_REGDEPS #instr_t can only return the encoded information described in the
* encoding scheme (core/ir/synthetic/encoding_common.h): categories (from #dr_category_t)
* to indicate the type of operation performed (e.g., a load, a math operation, floating
* point math operation, a branch, etc.), arithmetic flags, number of source and
* destination register operands, number of memory operations (i.e., loads + stores)
* performed (stored in #instr_t encoding_hints field), register operands and their size
* (which is the same for all operands), instruction length, and ISA mode (which is
* DR_ISA_REGDEPS).
* Querying additional information (e.g., the instruction opcode) will return the default
* value generated by instr_create() (i.e., zero).
*/
Expand All @@ -59,11 +64,14 @@
*
* Encoded instructions are 4 byte aligned.
*
* All instruction encodings begin with the following 4 header bytes, which follow this
* All instruction encodings begin with the following 5 header bytes, which follow this
* scheme:
* |----------------------| |--| |----| |----|
* 31.. ..10 9,8 7..4 3..0
* category eflags #src #dst
* |--------| |--------| |--------| |--------|
* 31.. ..24 23.. ..16 15.. ..8 7.. ..0
* padding padding padding #mem_ops
*
* 22 bits, category: it's a high level representation of the opcode of an instruction.
* Each bit represents one category following #dr_instr_category_t.
Expand All @@ -77,38 +85,41 @@
* they are source or destination operands) are considered as source operands in the
* encoded instruction.
* 4 bits, #dst: number of destination operands (written) that are registers.
* 8 bits, #mem_ops: number of memory operations (i.e., loads + stores) performed by the
* instruction.
*
* Because of 4 byte alignment, the last 3 bytes [31.. ..8] are padding and are undefined
* (i.e., it cannot be assumed that they have been zeroed-out or contain any meaningful
* value).
* We assume these encoded values to be little-endian.
* Note that we are only interested in register dependencies, hence operands that are
* not registers, such as immediates or memory references, are not present.
*
* Following the 4 header bytes are the bytes for the operation size and for encoding
* register operands.
* Following the 5 header bytes are the bytes for the operation size and for encoding
* register operands, if any are present.
* These bytes start right after the #mem_ops byte.
* The first byte contains the operation size encoded as a OPSZ_ enum value.
* Following the operation size are the register operand IDs.
* Each register operand is 1 byte.
* The destination operands go first, followed by the source operands.
* An instruction can have up to 8 operands (sources + destinations).
* Note that, because of 4 byte alignment, instructions with 1 to 3 operands will have a
* size of 8 bytes (4 header bytes + 1 byte for operands's size + 3 operand-related
* bytes), instructions with 4 to 7 operands will have a size of 12 bytes, while
* instructions with 8 operands will have the maximum size of 13 bytes.
* Instructions with no operands have only the 4 header bytes (no size-related byte nor
* operand-related bytes).
* For example, an instruction with 4 operands (1 dst, 3 src) has 8 additional bytes (on
* top of the 4 header bytes) that are encoded following this scheme:
* 1st chunk of 4 bytes:
* Note that, because of 4 byte alignment, instructions with 1 or 2 operands will have a
* size of 8 bytes (5 header bytes + 1 byte for operation size + 2 operand-related bytes),
* instructions with 3 to 6 operands will have a size of 12 bytes, while instructions with
* 7 or 8 operands will have the maximum size of 16 bytes.
* Instructions with no operands have only the 5 header bytes (no size-related byte nor
* operand-related bytes) and the 3 bytes of padding, for a total of 8 bytes.
* For example, an instruction with 4 operands (1 dst, 3 src) has 12 bytes and would be
* encoded as:
* |----------------------| |--| |----| |----|
* 31.. ..10 9,8 7..4 3..0
* category eflags #src #dst
* |--------| |--------| |--------| |--------|
* 31.. ..24 23.. ..16 15.. ..8 7.. ..0
* src_op1 src_op0 dst_op0 op_size
* 2nd chunk of 4 bytes:
* src_op0 dst_op0 op_size #mem_ops
* |--------| |--------| |--------| |--------|
* 31.. ..24 23.. ..16 15.. ..8 7.. ..0
* padding padding padding src_op2
*
* Because of 4 byte alignment, the last 3 bytes [31.. ..8] of the 2nd chunk are padding
* and are undefined (i.e., it cannot be assumed that they have been zeroed-out or contain
* any meaningful value).
* padding padding src_op2 src_op1
*/

#define CATEGORY_BITS 22
Expand All @@ -127,7 +138,12 @@
#define SYNTHETIC_INSTR_WRITES_ARITH 0x1
#define SYNTHETIC_INSTR_READS_ARITH 0x2

#define HEADER_BYTES 4
#define HEADER_BYTES 5
#define NUM_MEM_OPS_INDEX HEADER_BYTES - 1
#define OP_SIZE_INDEX NUM_MEM_OPS_INDEX + 1
#define OPND_INDEX OP_SIZE_INDEX + 1

#define ALIGN_BYTES 4

/* Defines the maximum number of non-overlapping registers for any architecture we
* currently support.
Expand Down

0 comments on commit 55f737c

Please sign in to comment.