From cce5ff15107be5b439936ca12c02fccc78a6954d Mon Sep 17 00:00:00 2001 From: Andy Ross Date: Wed, 3 Feb 2021 12:56:51 -0800 Subject: [PATCH] arch/x86: Fix stack alignment for user threads The x86_64 SysV ABI requires 16 byte alignment for the stack pointer during execution of normal code. That means that on entry to an ABI-compatible C function (which is reached via a CALL instruction that pushes the return address) the RSP register must be MISaligned by exactly 8 bytes. The kernel mode thread setup got this right, but we missed the equivalent condition in userspace entry. The end result was a misaligned stack, which is surprisingly robust for most use. But recent toolchains have starting doing some more elaborate vectorization, and the resulting SSE instructions started failing in userspace on the misaliged loads. Note that there's a comment about optimization: we're doing the stack alignment in the "wrong place" and are needlessly wasting bytes in some cases. We should see the raw stack boundaries where we are setting up RSP values. Add a FIXME to this effect, but don't touch anything as this patch is a targeted bugfix. Also fix a somewhat embarassing 32-bit-ism that would have truncated the address of a userspace stack that we tried to put above 4G. Fixes #31018 Signed-off-by: Andy Ross --- arch/x86/core/intel64/thread.c | 8 ++++++++ arch/x86/core/userspace.c | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/core/intel64/thread.c b/arch/x86/core/intel64/thread.c index 451f34b50ca..d0e44503948 100644 --- a/arch/x86/core/intel64/thread.c +++ b/arch/x86/core/intel64/thread.c @@ -12,6 +12,14 @@ extern void x86_sse_init(struct k_thread *); /* in locore.S */ +/* FIXME: This exists to make space for a "return address" at the top + * of the stack. Obviously this is unused at runtime, but is required + * for alignment: stacks at runtime should be 16-byte aligned, and a + * CALL will therefore push a return address that leaves the stack + * misaligned. Effectively we're wasting 8 bytes here to undo (!) the + * alignment that the upper level code already tried to do for us. We + * should clean this up. + */ struct x86_initial_frame { /* zeroed return address for ABI */ uint64_t rip; diff --git a/arch/x86/core/userspace.c b/arch/x86/core/userspace.c index 98027ff50fa..43d02af23ec 100644 --- a/arch/x86/core/userspace.c +++ b/arch/x86/core/userspace.c @@ -87,7 +87,7 @@ void *z_x86_userspace_prepare_thread(struct k_thread *thread) FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry, void *p1, void *p2, void *p3) { - uint32_t stack_end; + size_t stack_end; /* Transition will reset stack pointer to initial, discarding * any old context since this is a one-way operation @@ -96,6 +96,15 @@ FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry, _current->stack_info.size - _current->stack_info.delta); +#ifdef CONFIG_X86_64 + /* x86_64 SysV ABI requires 16 byte stack alignment, which + * means that on entry to a C function (which follows a CALL + * that pushes 8 bytes) the stack must be MISALIGNED by + * exactly 8 bytes. + */ + stack_end -= 8; +#endif + z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end, _current->stack_info.start); CODE_UNREACHABLE;