From 6fe39b64a3f4a279cf3556f3315fcfe5a9879ed6 Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Tue, 29 Oct 2024 08:54:46 -0600 Subject: [PATCH] DAOS-16211 vos: Avoid race condition with discard (#15370) There is a possible race between aggregation deleting the object tree and discard working on the same object tree. Add a check to avoid this race Signed-off-by: Jeff Olivier --- src/vos/vos_obj_index.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/vos/vos_obj_index.c b/src/vos/vos_obj_index.c index ea47cf4454cf..6806ca73159f 100644 --- a/src/vos/vos_obj_index.c +++ b/src/vos/vos_obj_index.c @@ -793,6 +793,7 @@ oi_iter_check_punch(daos_handle_t ih) { struct vos_iterator *iter = vos_hdl2iter(ih); struct vos_oi_iter *oiter = iter2oiter(iter); + struct vos_container *cont = oiter->oit_cont; struct vos_obj_df *obj; struct oi_delete_arg del_arg; daos_unit_oid_t oid; @@ -811,10 +812,22 @@ oi_iter_check_punch(daos_handle_t ih) obj = (struct vos_obj_df *)rec_iov.iov_buf; oid = obj->vo_id; - if (!vos_ilog_is_punched(vos_cont2hdl(oiter->oit_cont), &obj->vo_ilog, &oiter->oit_epr, + if (!vos_ilog_is_punched(vos_cont2hdl(cont), &obj->vo_ilog, &oiter->oit_epr, NULL, &oiter->oit_ilog_info)) return 0; + rc = vos_obj_hold(vos_obj_cache_current(cont->vc_pool->vp_sysdb), cont, oid, + &oiter->oit_epr, iter->it_bound, VOS_OBJ_AGGREGATE | VOS_OBJ_NO_HOLD, + DAOS_INTENT_PURGE, NULL, NULL); + if (rc != 0) { + /** -DER_BUSY means the object is in-use already. We will after a yield in this + * case. + */ + D_CDEBUG(rc == -DER_BUSY, DB_EPC, DLOG_ERR, "Hold check failed for " DF_UOID "\n", + DP_UOID(oid)); + return rc; + } + /** Ok, ilog is fully punched, so we can move it to gc heap */ rc = umem_tx_begin(vos_cont2umm(oiter->oit_cont), NULL); if (rc != 0)