Skip to content

Commit

Permalink
DAOS-16374 vos: integer overflow on evt recx trace (#15439)
Browse files Browse the repository at this point in the history
The evt recx trace is used for vos aggregation debugging, and it's currently
reset on akey iteration callback, but the akey iteration callback could be
skipped in some cases, for example, when evt aggregation hit an aborted recx,
it'll start over in evtree level without the recx trace reset, that could
lead to integer overflow on the 'int ap_trace_count'.

This patch moved the ap_trace_count reset to merge window open/close to ensure
the evt recx trace always being reset properly.

Signed-off-by: Niu Yawei <[email protected]>
  • Loading branch information
NiuYawei authored Nov 11, 2024
1 parent 248bb75 commit 083d786
Showing 1 changed file with 30 additions and 27 deletions.
57 changes: 30 additions & 27 deletions src/vos/vos_aggregate.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ struct agg_io_context {
d_list_t ic_nvme_exts;
};

#define EV_TRACE_MAX 1024
/* Merge window for evtree aggregation */
struct agg_merge_window {
/* Record size */
Expand All @@ -142,6 +143,10 @@ struct agg_merge_window {
/* I/O context for transferring data on flush */
struct agg_io_context mw_io_ctxt;
uint16_t mw_csum_type;
/* Recxs trace for debugging */
vos_iter_entry_t mw_evt_trace[EV_TRACE_MAX];
unsigned int mw_trace_start;
unsigned int mw_trace_count;
};

struct vos_agg_credits {
Expand All @@ -150,11 +155,7 @@ struct vos_agg_credits {
uint32_t vac_creds_merge; /* # of merging operations */
};

#define EV_TRACE_MAX 1024
struct vos_agg_param {
vos_iter_entry_t ap_evt_trace[EV_TRACE_MAX];
int ap_trace_start;
int ap_trace_count;
struct vos_agg_credits ap_credits;
daos_handle_t ap_coh; /* container handle */
daos_unit_oid_t ap_oid; /* current object ID */
Expand Down Expand Up @@ -1284,28 +1285,27 @@ fill_segments(daos_handle_t ih, struct vos_agg_param *agg_param, unsigned int *a
static void
dump_trace(struct agg_merge_window *mw)
{
struct vos_agg_param *agg_param = container_of(mw, struct vos_agg_param, ap_window);
vos_iter_entry_t *entry;
int i;
int last;

if (agg_param->ap_trace_count == 0)
if (mw->mw_trace_count == 0)
return;

if (agg_param->ap_trace_count < EV_TRACE_MAX) {
if (mw->mw_trace_count < EV_TRACE_MAX) {
D_ERROR("Assertion will trigger, dumping all %d evt_trace entries\n",
agg_param->ap_trace_count);
last = agg_param->ap_trace_count;
mw->mw_trace_count);
last = mw->mw_trace_count;
} else {
D_ERROR("Assertion will trigger, dumping the last %d of %d total evt_trace"
" entries\n",
EV_TRACE_MAX, agg_param->ap_trace_count);
last = agg_param->ap_trace_start;
EV_TRACE_MAX, mw->mw_trace_count);
last = mw->mw_trace_start;
}

i = agg_param->ap_trace_start;
i = mw->mw_trace_start;
do {
entry = &agg_param->ap_evt_trace[i];
entry = &mw->mw_evt_trace[i];
D_ERROR(" 0x" DF_X64 " recs@0x" DF_X64 " (0x" DF_X64 " recs@0x" DF_X64
")@0x" DF_X64 ".%d tx=%d hole=%d flg=%x rsz=0x" DF_X64 " gsz=0x" DF_X64
"\n",
Expand Down Expand Up @@ -1955,6 +1955,10 @@ close_merge_window(struct agg_merge_window *mw, int rc)
io->ic_csum_buf = NULL;
io->ic_csum_buf_len = 0;
}

/* Reset trace on window close */
mw->mw_trace_start = 0;
mw->mw_trace_count = 0;
}

static struct agg_phy_ent *
Expand Down Expand Up @@ -2167,6 +2171,7 @@ set_window_size(struct agg_merge_window *mw, vos_iter_entry_t *entry)
{
struct dcs_csum_info *csum_info = &entry->ie_csum;
daos_size_t rsize = entry->ie_rsize;
unsigned int next_idx;

if (rsize == 0) {
D_DEBUG(DB_TRACE, "EV tree 0 iod_size could be caused by "
Expand Down Expand Up @@ -2197,6 +2202,9 @@ set_window_size(struct agg_merge_window *mw, vos_iter_entry_t *entry)
/* Set csum support flag on processing first entry */
mw->mw_csum_type = csum_info->cs_type;

/* Reset trace on window open */
mw->mw_trace_start = 0;
mw->mw_trace_count = 0;
} else if (mw->mw_rsize != rsize) {
D_CRIT("Mismatched iod_size "DF_U64" != "DF_U64"\n",
mw->mw_rsize, rsize);
Expand All @@ -2207,6 +2215,15 @@ set_window_size(struct agg_merge_window *mw, vos_iter_entry_t *entry)
return -DER_INVAL;
}

if (mw->mw_trace_count >= EV_TRACE_MAX) {
next_idx = mw->mw_trace_start;
mw->mw_trace_start = (mw->mw_trace_start + 1) % EV_TRACE_MAX;
} else {
next_idx = mw->mw_trace_start + mw->mw_trace_count;
}
mw->mw_trace_count++;
memcpy(&mw->mw_evt_trace[next_idx], entry, sizeof(*entry));

return 0;
}

Expand All @@ -2217,23 +2234,13 @@ vos_agg_ev(daos_handle_t ih, vos_iter_entry_t *entry,
struct agg_merge_window *mw = &agg_param->ap_window;
struct evt_extent phy_ext, lgc_ext;
int rc = 0;
int next_idx;
struct vos_container *cont = vos_hdl2cont(agg_param->ap_coh);

D_ASSERT(agg_param != NULL);
D_ASSERT(acts != NULL);
recx2ext(&entry->ie_recx, &lgc_ext);
recx2ext(&entry->ie_orig_recx, &phy_ext);

if (agg_param->ap_trace_count >= EV_TRACE_MAX) {
next_idx = agg_param->ap_trace_start;
agg_param->ap_trace_start = (agg_param->ap_trace_start + 1) % EV_TRACE_MAX;
} else {
next_idx = agg_param->ap_trace_start + agg_param->ap_trace_count;
}
agg_param->ap_trace_count++;
memcpy(&agg_param->ap_evt_trace[next_idx], entry, sizeof(*entry));

credits_consume(&agg_param->ap_credits, AGG_OP_SCAN);

/* Discard */
Expand Down Expand Up @@ -2311,8 +2318,6 @@ vos_aggregate_pre_cb(daos_handle_t ih, vos_iter_entry_t *entry,
break;
case VOS_ITER_AKEY:
rc = vos_agg_akey(ih, entry, agg_param, acts);
agg_param->ap_trace_start = 0;
agg_param->ap_trace_count = 0;
break;
case VOS_ITER_RECX:
rc = vos_agg_ev(ih, entry, agg_param, acts);
Expand Down Expand Up @@ -2416,8 +2421,6 @@ vos_aggregate_post_cb(daos_handle_t ih, vos_iter_entry_t *entry,
agg_param->ap_skip_akey = false;
break;
}
agg_param->ap_trace_start = 0;
agg_param->ap_trace_count = 0;
rc = vos_obj_iter_aggregate(ih, agg_param->ap_discard_obj);
break;
case VOS_ITER_SINGLE:
Expand Down

0 comments on commit 083d786

Please sign in to comment.