Skip to content

Commit

Permalink
i#6514 null SP to clone(), part 3: Handle null SP passed to clone (#6601
Browse files Browse the repository at this point in the history
)

The kernel supports passing NULL for SP to clone: It means to use the SP
of the parent. Supporting this requires augmenting dynamorio_clone to
handle SP==NULL, and augmenting create_clone_record to handle SP==NULL.

Only tested X86 in this pass. Other arches deferred.

With 32-bit testing temporarily downsized, this was manually tested on
i386:
$ ctest -R clone
Test project /home/dje/upstream/i6514/build32
    Start  34: code_api|linux.clone
1/3 Test  #34: code_api|linux.clone .............   Passed    1.00 sec
    Start  35: code_api|linux.clone-pie
2/3 Test  #35: code_api|linux.clone-pie .........   Passed    0.97 sec
    Start 100: code_api|linux.clone-reset
3/3 Test #100: code_api|linux.clone-reset .......   Passed    0.97 sec
100% tests passed, 0 tests failed out of 3
Total Test time (real) =   3.03 sec

Issue #6514

---------

Co-authored-by: Yang Liu <[email protected]>
Co-authored-by: Derek Bruening <[email protected]>
Co-authored-by: Yang Hu <[email protected]>
Co-authored-by: Phil Ramsey <[email protected]>
Co-authored-by: Abhinav Anil Sharma <[email protected]>
  • Loading branch information
6 people authored Feb 8, 2024
1 parent 845c1d4 commit 55da514
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 24 deletions.
6 changes: 5 additions & 1 deletion core/drlibc/drlibc.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ dynamorio_mach_syscall(uint sysnum, uint num_args, ...);
# else
ptr_int_t
dynamorio_syscall(uint sysnum, uint num_args, ...);
/* N.B. func must not return. */
/* Wrapper for clone().
* N.B. func must not return.
* On x86 (32 and 64-bit) this supports passing NULL for newsp which is just
* shorthand for the child using the same value as the parent.
*/
thread_id_t
dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, void *ctid,
void (*func)(void));
Expand Down
1 change: 1 addition & 0 deletions core/drlibc/drlibc_aarch64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ GLOBAL_LABEL(dynamorio_mach_syscall:)
#ifdef LINUX
/* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls,
* void *ctid, void (*func)(void))
* TODO i#6514: Add support for passing NULL for newsp.
*/
DECLARE_FUNC(dynamorio_clone)
GLOBAL_LABEL(dynamorio_clone:)
Expand Down
1 change: 1 addition & 0 deletions core/drlibc/drlibc_arm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ GLOBAL_LABEL(FUNCNAME:)
#ifdef LINUX
/* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls,
* void *ctid, void (*func)(void))
* TODO i#6514: Add support for passing NULL for newsp.
*/
DECLARE_FUNC(dynamorio_clone)
GLOBAL_LABEL(dynamorio_clone:)
Expand Down
1 change: 1 addition & 0 deletions core/drlibc/drlibc_riscv64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ GLOBAL_LABEL(FUNCNAME:)
/*
* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls,
* void *ctid, void (*func)(void))
* TODO i#6514: Add support for passing NULL for newsp.
*/
DECLARE_FUNC(dynamorio_clone)
GLOBAL_LABEL(dynamorio_clone:)
Expand Down
55 changes: 39 additions & 16 deletions core/drlibc/drlibc_x86.asm
Original file line number Diff line number Diff line change
Expand Up @@ -762,51 +762,74 @@ GLOBAL_LABEL(load_dynamo_failure:)
* signature:
* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls,
* void *ctid, void (*func)(void))
* i#6514: If newsp is NULL then that tells the kernel to give the child the
* same value for SP as the parent.
*/
DECLARE_FUNC(dynamorio_clone)
GLOBAL_LABEL(dynamorio_clone:)
/* save func for use post-syscall on the newsp.
* when using clone_record_t we have 4 slots we can clobber.
/* Save func for use post-syscall on the newsp.
* This is tricky because we have to handle the case of newsp == NULL.
*/
# ifdef X64
sub ARG2, ARG_SZ
mov [ARG2], ARG6 /* func is now on TOS of newsp */
/* all args are already in syscall registers */
/* The syscall preserves all registers except rax, rcx, r11. */
push r15
mov r15, ARG6 /* Func is now in r15. */
and ARG2, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */
/* All args are already in syscall registers, except for rcx. */
mov r10, rcx
mov REG_XAX, SYS_clone
syscall
# else
mov REG_XAX, ARG6
mov REG_XCX, ARG2
sub REG_XCX, ARG_SZ
mov [REG_XCX], REG_XAX /* func is now on TOS of newsp */
mov REG_XDX, ARG3
/* preserve callee-saved regs */
/* Fetch some args we need before we modify XSP and ARGn is no
* longer usable.
*/
mov REG_XCX, ARG2 /* newsp */
mov REG_XDX, ARG3 /* ptid */
mov REG_XAX, ARG6 /* func */
/* Preserve callee-saved regs. */
push REG_XBX
push REG_XSI
push REG_XDI
/* now can't use ARG* since xsp modified by pushes */
/* Now can't use ARG* since xsp modified by pushes. */
mov REG_XBX, DWORD [4*ARG_SZ + REG_XSP] /* ARG1 + 3 pushes */
mov REG_XSI, DWORD [7*ARG_SZ + REG_XSP] /* ARG4 + 3 pushes */
mov REG_XDI, DWORD [8*ARG_SZ + REG_XSP] /* ARG5 + 3 pushes */
/* i#6514: Save func on the child's stack. Remember that if newsp is
* NULL then the child's stack is our stack. When the syscall returns
* it's cumbersome to know whether newsp was NULL. To keep things simple
* for the parent always push func on our stack.
*/
push REG_XAX /* Xsp is misaligned at this point but kernel doesn't care. */
and REG_XCX, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */
jz newsp_is_null
sub REG_XCX, ARG_SZ
mov [REG_XCX], REG_XAX /* Func is now on TOS of newsp. */
newsp_is_null:
mov REG_XAX, SYS_clone
/* PR 254280: we assume int$80 is ok even for LOL64 */
int HEX(80)
# endif
cmp REG_XAX, 0
jne dynamorio_clone_parent
# ifdef X64
call r15
# else
pop REG_XCX
call REG_XCX
/* shouldn't return */
# endif
/* Shouldn't return. */
jmp GLOBAL_REF(unexpected_return)
dynamorio_clone_parent:
# ifndef X64
/* restore callee-saved regs */
# ifdef X64
pop r15
# else
/* Restore callee-saved regs. */
add REG_XSP, ARG_SZ /* Discard func. */
pop REG_XDI
pop REG_XSI
pop REG_XBX
# endif
/* return val is in eax still */
/* Return val is in eax still. */
ret
END_FUNC(dynamorio_clone)
#endif /* LINUX */
Expand Down
10 changes: 8 additions & 2 deletions core/unix/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -817,12 +817,18 @@ create_clone_record(dcontext_t *dcontext, reg_t *app_thread_xsp)
* cl_args->stack. But we expect the highest (non-inclusive)
* in the clone record's app_thread_xsp.
*/
record->app_thread_xsp = dr_clone_args->stack + dr_clone_args->stack_size;
if (dr_clone_args->stack == 0)
record->app_thread_xsp = get_mcontext(dcontext)->xsp;
else
record->app_thread_xsp = dr_clone_args->stack + dr_clone_args->stack_size;
record->clone_flags = dr_clone_args->flags;
record->app_clone_args = app_clone_args;
} else {
#endif
record->app_thread_xsp = *app_thread_xsp;
if (*app_thread_xsp == 0)
record->app_thread_xsp = get_mcontext(dcontext)->xsp;
else
record->app_thread_xsp = *app_thread_xsp;
record->clone_flags = dcontext->sys_param0;
IF_LINUX(record->app_clone_args = NULL);
#ifdef LINUX
Expand Down
62 changes: 57 additions & 5 deletions suite/tests/linux/clone.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ typedef unsigned long ulong;

/* forward declarations */
static int
make_clone_syscall(uint flags, byte *newsp, void *ptid, void *tls, void *ctid,
void (*fcn)(void));
static int
make_clone3_syscall(void *clone_args, ulong clone_args_size, void (*fcn)(void));
static pid_t
create_thread(void (*fcn)(void), void **stack, bool share_sighand, bool clone_vm);
Expand Down Expand Up @@ -118,6 +121,35 @@ test_thread(bool share_sighand, bool clone_vm, bool use_clone3)
delete_thread(child, stack);
}

#ifdef X86 /* i#6514: dynamorio_clone needs to be updated for other arches. */

/* i#6514: Test passing NULL for the stack pointer to the syscall. */
void
test_with_null_stack_pointer(bool clone_vm, bool use_clone3)
{
print("%s(clone_vm %d, use_clone3 %d)\n", __FUNCTION__, clone_vm, use_clone3);
int flags = clone_vm ? (CLONE_VFORK | CLONE_VM) : 0;
int ret;
/* If we don't have clone3, keep expected output the same and just use clone. */
if (use_clone3 && clone3_available) {
clone3_syscall_args_t cl_args = { 0 };
cl_args.flags = flags;
cl_args.exit_signal = SIGCHLD;
ret = make_clone3_syscall(&cl_args, sizeof(cl_args), run_with_exit);
} else {
flags = flags | SIGCHLD;
ret = make_clone_syscall(flags, /*stack=*/NULL, /*parent_tid=*/NULL,
/*tls=*/NULL, /*child_tid=*/NULL, run_with_exit);
}
if (ret == -1) {
perror("Error calling clone");
return;
}
delete_thread(ret, NULL);
}

#endif

int
main()
{
Expand Down Expand Up @@ -154,6 +186,14 @@ main()
*/
test_thread(true /*share_sighand*/, true /*clone_vm*/, false /*use_clone3*/);
test_thread(true /*share_sighand*/, true /*clone_vm*/, true /*use_clone3*/);

#if defined(X86)
/* Test passing NULL for the stack pointer (xref i#6514). */
test_with_null_stack_pointer(/*clone_vm=*/false, /*use_clone3=*/false);
test_with_null_stack_pointer(/*clone_vm=*/false, /*use_clone3=*/true);
test_with_null_stack_pointer(/*clone_vm=*/true, /*use_clone3=*/false);
test_with_null_stack_pointer(/*clone_vm=*/true, /*use_clone3=*/true);
#endif
}

/* Procedure executed by sideline threads
Expand Down Expand Up @@ -185,6 +225,19 @@ run_with_exit(void)
exit(run(NULL));
}

/* A wrapper on dynamorio_clone to set errno. */
static int
make_clone_syscall(uint flags, byte *newsp, void *ptid, void *tls, void *ctid,
void (*fcn)(void))
{
int ret = dynamorio_clone(flags, newsp, ptid, tls, ctid, fcn);
if (ret < 0) {
errno = -ret;
return -1;
}
return ret;
}

void *p_tid, *c_tid;

/* Create a new thread. It should be passed "fcn", a function which
Expand Down Expand Up @@ -212,11 +265,9 @@ create_thread(void (*fcn)(void), void **stack, bool share_sighand, bool clone_vm
(clone_vm ? CLONE_VM : 0));
/* The stack arg should point to the stack's highest address (non-inclusive). */
stack_ptr = (void *)((size_t)my_stack + THREAD_STACK_SIZE);
newpid = dynamorio_clone(flags, stack_ptr, &p_tid, NULL, &c_tid, fcn);
newpid = make_clone_syscall(flags, stack_ptr, &p_tid, NULL, &c_tid, fcn);

if (newpid < 0) {
/* dynamorio_clone doesn't set errno */
errno = -newpid;
perror("Error calling clone\n");
stack_free(my_stack, THREAD_STACK_SIZE);
return -1;
Expand All @@ -226,7 +277,7 @@ create_thread(void (*fcn)(void), void **stack, bool share_sighand, bool clone_vm
return newpid;
}

/* glibc does not provide a wrapper for clone3 yet. This makes it difficult
/* glibc,drlibc do not provide a wrapper for clone3 yet. This makes it difficult
* to create new threads in C code using syscall(), as we have to deal with
* complexities associated with the child thread having a fresh stack
* without any return addresses or space for local variables. So, we
Expand Down Expand Up @@ -360,7 +411,8 @@ delete_thread(pid_t pid, void *stack)
perror("delete_thread waitpid");
else if (!WIFEXITED(wait_status) || WEXITSTATUS(wait_status) != 0)
print("delete_thread bad wait_status: 0x%x\n", wait_status);
stack_free(stack, THREAD_STACK_SIZE);
if (stack != NULL)
stack_free(stack, THREAD_STACK_SIZE);
}

/* Allocate stack storage on the app's heap. Returns the lowest address of the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,61 @@ i = 22500000
i = 25000000
Sideline thread finished
Child has exited
#if defined(X86)
test_with_null_stack_pointer(clone_vm 0, use_clone3 0)
Sideline thread started
i = 2500000
i = 5000000
i = 7500000
i = 10000000
i = 12500000
i = 15000000
i = 17500000
i = 20000000
i = 22500000
i = 25000000
Sideline thread finished
Child has exited
test_with_null_stack_pointer(clone_vm 0, use_clone3 1)
Sideline thread started
i = 2500000
i = 5000000
i = 7500000
i = 10000000
i = 12500000
i = 15000000
i = 17500000
i = 20000000
i = 22500000
i = 25000000
Sideline thread finished
Child has exited
test_with_null_stack_pointer(clone_vm 1, use_clone3 0)
Sideline thread started
i = 2500000
i = 5000000
i = 7500000
i = 10000000
i = 12500000
i = 15000000
i = 17500000
i = 20000000
i = 22500000
i = 25000000
Sideline thread finished
Child has exited
test_with_null_stack_pointer(clone_vm 1, use_clone3 1)
Sideline thread started
i = 2500000
i = 5000000
i = 7500000
i = 10000000
i = 12500000
i = 15000000
i = 17500000
i = 20000000
i = 22500000
i = 25000000
Sideline thread finished
Child has exited
#endif

0 comments on commit 55da514

Please sign in to comment.