From a5552da465665cb7852eb02d5a8aa11b9a67a8ff Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Thu, 29 Aug 2024 10:55:20 -0500 Subject: [PATCH] DAOS-16365 client: intercept MPI_Init() to avoid nested call (#14992) We observed deadlock in MPI applications on Aurora due to nested calls of zeInit() inside MPI_Init(). daos_init() is involved in such nested calls. This PR intercepts MPI_Init() and avoid running daos_init() inside MPI_Init(). Signed-off-by: Lei Huang --- src/client/dfuse/pil4dfs/int_dfs.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 81cd69ccda1..0ff8d0d8f19 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -160,6 +160,8 @@ static long int page_size; #define DAOS_INIT_NOT_RUNNING 0 #define DAOS_INIT_RUNNING 1 +static _Atomic uint64_t mpi_init_count; + static long int daos_initing; _Atomic bool d_daos_inited; static bool daos_debug_inited; @@ -467,6 +469,8 @@ static int (*next_posix_fallocate64)(int fd, off64_t offset, off64_t len); static int (*next_tcgetattr)(int fd, void *termios_p); /* end NOT supported by DAOS */ +static int (*next_mpi_init)(int *argc, char ***argv); + /* to do!! */ /** * static char * (*org_realpath)(const char *pathname, char *resolved_path); @@ -1020,6 +1024,22 @@ consume_low_fd(void) return rc; } +int +MPI_Init(int *argc, char ***argv) +{ + int rc; + + if (next_mpi_init == NULL) { + next_mpi_init = dlsym(RTLD_NEXT, "MPI_Init"); + D_ASSERT(next_mpi_init != NULL); + } + + atomic_fetch_add_relaxed(&mpi_init_count, 1); + rc = next_mpi_init(argc, argv); + atomic_fetch_add_relaxed(&mpi_init_count, -1); + return rc; +} + /** determine whether a path (both relative and absolute) is on DAOS or not. If yes, * returns parent object, item name, full path of parent dir, full absolute path, and * the pointer to struct dfs_mt. @@ -1117,6 +1137,15 @@ query_path(const char *szInput, int *is_target_path, struct dcache_rec **parent, uint64_t status_old = DAOS_INIT_NOT_RUNNING; bool rc_cmp_swap; + /* Check whether MPI_Init() is running. If yes, pass to the original + * libc functions. Avoid possible zeInit reentrancy/nested call. + */ + + if (atomic_load_relaxed(&mpi_init_count) > 0) { + *is_target_path = 0; + goto out_normal; + } + /* daos_init() is expensive to call. We call it only when necessary. */ /* Check whether daos_init() is running. If yes, pass to the original