zephyr/arch/x86/core/ia32/userspace.S

356 lines
9.5 KiB
ArmAsm
Raw Permalink Normal View History

/*
* Copyright (c) 2017 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/arch/x86/ia32/asm.h>
#include <zephyr/arch/cpu.h>
#include <offsets_short.h>
#include <zephyr/syscall.h>
#include <zephyr/kernel/mm.h>
#include <x86_mmu.h>
/* Exports */
GTEXT(z_x86_syscall_entry_stub)
GTEXT(z_x86_userspace_enter)
GTEXT(arch_user_string_nlen)
GTEXT(z_x86_user_string_nlen_fault_start)
GTEXT(z_x86_user_string_nlen_fault_end)
GTEXT(z_x86_user_string_nlen_fixup)
/* Imports */
GDATA(_k_syscall_table)
#ifdef CONFIG_X86_KPTI
/* Switch from the shadow to the kernel page table, switch to the interrupted
* thread's kernel stack, and copy all context from the trampoline stack.
*
* Assumes all registers are callee-saved since this gets called from other
* ASM code. Assumes a particular stack layout which is correct for
* _exception_enter and _interrupt_enter when invoked with a call instruction:
*
* 28 SS
* 24 ES
* 20 EFLAGS
* 16 CS
* 12 EIP
* 8 isr_param or exc code
* 4 isr or exc handler
* 0 return address
*/
SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_kernel)
/* Check interrupted code segment to see if we came from ring 3
* and hence on the trampoline stack
*/
testb $3, 16(%esp) /* Offset of CS */
jz 1f
/* Stash these regs as we need to use them */
pushl %esi
pushl %edi
/* Switch to kernel page table */
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi
movl %esi, %cr3
/* Save old trampoline stack pointer in %edi */
movl %esp, %edi
/* Switch to privilege mode stack */
movl $_kernel, %esi
movl _kernel_offset_to_current(%esi), %esi
movl _thread_offset_to_psp(%esi), %esp
/* Transplant stack context and restore ESI/EDI. Taking care to zero
* or put uninteresting values where we stashed ESI/EDI since the
* trampoline page is insecure and there might a context switch
* on the way out instead of returning to the original thread
* immediately.
*/
pushl 36(%edi) /* SS */
pushl 32(%edi) /* ESP */
pushl 28(%edi) /* EFLAGS */
pushl 24(%edi) /* CS */
pushl 20(%edi) /* EIP */
pushl 16(%edi) /* error code or isr parameter */
pushl 12(%edi) /* exception/irq handler */
pushl 8(%edi) /* return address */
movl 4(%edi), %esi /* restore ESI */
movl $0, 4(%edi) /* Zero old esi storage area */
xchgl %edi, (%edi) /* Exchange old edi to restore it and put
old sp in the storage area */
/* Trampoline stack should have nothing sensitive in it at this point */
1:
ret
/* Copy interrupt return stack context to the trampoline stack, switch back
* to the user page table, and only then 'iret'. We jump to this instead
* of calling 'iret' if KPTI is turned on.
*
* Stack layout is expected to be as follows:
*
* 16 SS
* 12 ESP
* 8 EFLAGS
* 4 CS
* 0 EIP
*
* This function is conditionally macroed to KPTI_IRET/KPTI_IRET_USER
*/
SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user)
/* Check interrupted code segment to see if we came from ring 3
* and hence on the trampoline stack
*/
testb $3, 4(%esp) /* Offset of CS */
jz 1f
/* Otherwise, fall through ... */
SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user_always)
/* Stash EDI, need a free register */
pushl %edi
/* Store old stack pointer and switch to trampoline stack.
* Lock IRQs before changing stack pointer to the trampoline stack,
* we don't want any interrupts also using the trampoline stack
* during this time.
*/
movl %esp, %edi
cli
movl $z_trampoline_stack_end, %esp
/* Copy context */
pushl 20(%edi) /* SS */
pushl 16(%edi) /* ESP */
pushl 12(%edi) /* EFLAGS */
pushl 8(%edi) /* CS */
pushl 4(%edi) /* EIP */
xchgl %edi, (%edi) /* Exchange old edi to restore it and put
trampoline stack address in its old storage
area */
/* Switch to user page table */
pushl %eax
movl $_kernel, %eax
movl _kernel_offset_to_current(%eax), %eax
movl _thread_offset_to_ptables(%eax), %eax
movl %eax, %cr3
popl %eax
movl $0, -4(%esp) /* Delete stashed EAX data */
/* Trampoline stack should have nothing sensitive in it at this point */
1:
iret
#endif /* CONFIG_X86_KPTI */
/* Landing site for syscall SW IRQ. Marshal arguments and call C function for
* further processing. We're on the kernel stack for the invoking thread,
* unless KPTI is enabled, in which case we're on the trampoline stack and
* need to get off it before enabling interrupts.
*/
SECTION_FUNC(TEXT, z_x86_syscall_entry_stub)
#ifdef CONFIG_X86_KPTI
/* Stash these regs as we need to use them */
pushl %esi
pushl %edi
/* Switch to kernel page table */
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi
movl %esi, %cr3
/* Save old trampoline stack pointer in %edi */
movl %esp, %edi
/* Switch to privilege elevation stack */
movl $_kernel, %esi
movl _kernel_offset_to_current(%esi), %esi
movl _thread_offset_to_psp(%esi), %esp
/* Transplant context according to layout above. Variant of logic
* in x86_trampoline_to_kernel */
pushl 24(%edi) /* SS */
pushl 20(%edi) /* ESP */
pushl 16(%edi) /* EFLAGS */
pushl 12(%edi) /* CS */
pushl 8(%edi) /* EIP */
movl 4(%edi), %esi /* restore ESI */
movl $0, 4(%edi) /* Zero old esi storage area */
xchgl %edi, (%edi) /* Exchange old edi to restore it and put
old sp in the storage area */
/* Trampoline stack should have nothing sensitive in it at this point */
#endif /* CONFIG_X86_KPTI */
userspace: flesh out internal syscall interface * Instead of a common system call entry function, we instead create a table mapping system call ids to handler skeleton functions which are invoked directly by the architecture code which receives the system call. * system call handler prototype specified. All but the most trivial system calls will implement one of these. They validate all the arguments, including verifying kernel/device object pointers, ensuring that the calling thread has appropriate access to any memory buffers passed in, and performing other parameter checks that the base system call implementation does not check, or only checks with __ASSERT(). It's only possible to install a system call implementation directly inside this table if the implementation has a return value and requires no validation of any of its arguments. A sample handler implementation for k_mutex_unlock() might look like: u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, void *ssf) { struct k_mutex *mutex = (struct k_mutex *)mutex_arg; _SYSCALL_ARG1; _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0, ssf); _SYSCALL_VERIFY(mutex->lock_count > 0, ssf); _SYSCALL_VERIFY(mutex->owner == _current, ssf); k_mutex_unlock(mutex); return 0; } * the x86 port modified to work with the system call table instead of calling a common handler function. fixed an issue where registers being changed could confuse the compiler has been fixed; all registers, even ones used for parameters, must be preserved across the system call. * a new arch API for producing a kernel oops when validating system call arguments added. The debug information reported will be from the system call site and not inside the handler function. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2017-09-14 03:04:21 +02:00
sti /* re-enable interrupts */
cld /* clear direction flag, restored on 'iret' */
/* call_id is in ESI. bounds-check it, must be less than
* K_SYSCALL_LIMIT
*/
cmp $K_SYSCALL_LIMIT, %esi
userspace: flesh out internal syscall interface * Instead of a common system call entry function, we instead create a table mapping system call ids to handler skeleton functions which are invoked directly by the architecture code which receives the system call. * system call handler prototype specified. All but the most trivial system calls will implement one of these. They validate all the arguments, including verifying kernel/device object pointers, ensuring that the calling thread has appropriate access to any memory buffers passed in, and performing other parameter checks that the base system call implementation does not check, or only checks with __ASSERT(). It's only possible to install a system call implementation directly inside this table if the implementation has a return value and requires no validation of any of its arguments. A sample handler implementation for k_mutex_unlock() might look like: u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, void *ssf) { struct k_mutex *mutex = (struct k_mutex *)mutex_arg; _SYSCALL_ARG1; _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0, ssf); _SYSCALL_VERIFY(mutex->lock_count > 0, ssf); _SYSCALL_VERIFY(mutex->owner == _current, ssf); k_mutex_unlock(mutex); return 0; } * the x86 port modified to work with the system call table instead of calling a common handler function. fixed an issue where registers being changed could confuse the compiler has been fixed; all registers, even ones used for parameters, must be preserved across the system call. * a new arch API for producing a kernel oops when validating system call arguments added. The debug information reported will be from the system call site and not inside the handler function. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2017-09-14 03:04:21 +02:00
jae _bad_syscall
_id_ok:
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
/* Prevent speculation with bogus system call IDs */
lfence
#endif
userspace: flesh out internal syscall interface * Instead of a common system call entry function, we instead create a table mapping system call ids to handler skeleton functions which are invoked directly by the architecture code which receives the system call. * system call handler prototype specified. All but the most trivial system calls will implement one of these. They validate all the arguments, including verifying kernel/device object pointers, ensuring that the calling thread has appropriate access to any memory buffers passed in, and performing other parameter checks that the base system call implementation does not check, or only checks with __ASSERT(). It's only possible to install a system call implementation directly inside this table if the implementation has a return value and requires no validation of any of its arguments. A sample handler implementation for k_mutex_unlock() might look like: u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, void *ssf) { struct k_mutex *mutex = (struct k_mutex *)mutex_arg; _SYSCALL_ARG1; _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0, ssf); _SYSCALL_VERIFY(mutex->lock_count > 0, ssf); _SYSCALL_VERIFY(mutex->owner == _current, ssf); k_mutex_unlock(mutex); return 0; } * the x86 port modified to work with the system call table instead of calling a common handler function. fixed an issue where registers being changed could confuse the compiler has been fixed; all registers, even ones used for parameters, must be preserved across the system call. * a new arch API for producing a kernel oops when validating system call arguments added. The debug information reported will be from the system call site and not inside the handler function. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2017-09-14 03:04:21 +02:00
/* Marshal arguments per calling convention to match what is expected
* for _k_syscall_handler_t functions
*/
push %esp /* ssf */
push %ebp /* arg6 */
push %edi /* arg5 */
push %ebx /* arg4 */
push %ecx /* arg3 */
push %edx /* arg2 */
push %eax /* arg1 */
userspace: flesh out internal syscall interface * Instead of a common system call entry function, we instead create a table mapping system call ids to handler skeleton functions which are invoked directly by the architecture code which receives the system call. * system call handler prototype specified. All but the most trivial system calls will implement one of these. They validate all the arguments, including verifying kernel/device object pointers, ensuring that the calling thread has appropriate access to any memory buffers passed in, and performing other parameter checks that the base system call implementation does not check, or only checks with __ASSERT(). It's only possible to install a system call implementation directly inside this table if the implementation has a return value and requires no validation of any of its arguments. A sample handler implementation for k_mutex_unlock() might look like: u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, void *ssf) { struct k_mutex *mutex = (struct k_mutex *)mutex_arg; _SYSCALL_ARG1; _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0, ssf); _SYSCALL_VERIFY(mutex->lock_count > 0, ssf); _SYSCALL_VERIFY(mutex->owner == _current, ssf); k_mutex_unlock(mutex); return 0; } * the x86 port modified to work with the system call table instead of calling a common handler function. fixed an issue where registers being changed could confuse the compiler has been fixed; all registers, even ones used for parameters, must be preserved across the system call. * a new arch API for producing a kernel oops when validating system call arguments added. The debug information reported will be from the system call site and not inside the handler function. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2017-09-14 03:04:21 +02:00
/* from the call ID in ESI, load EBX with the actual function pointer
* to call by looking it up in the system call dispatch table
*/
xor %edi, %edi
mov _k_syscall_table(%edi, %esi, 4), %ebx
/* Run the handler, which is some entry in _k_syscall_table */
call *%ebx
/* EAX now contains return value. Pop or xor everything else to prevent
* information leak from kernel mode.
*/
userspace: flesh out internal syscall interface * Instead of a common system call entry function, we instead create a table mapping system call ids to handler skeleton functions which are invoked directly by the architecture code which receives the system call. * system call handler prototype specified. All but the most trivial system calls will implement one of these. They validate all the arguments, including verifying kernel/device object pointers, ensuring that the calling thread has appropriate access to any memory buffers passed in, and performing other parameter checks that the base system call implementation does not check, or only checks with __ASSERT(). It's only possible to install a system call implementation directly inside this table if the implementation has a return value and requires no validation of any of its arguments. A sample handler implementation for k_mutex_unlock() might look like: u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, void *ssf) { struct k_mutex *mutex = (struct k_mutex *)mutex_arg; _SYSCALL_ARG1; _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0, ssf); _SYSCALL_VERIFY(mutex->lock_count > 0, ssf); _SYSCALL_VERIFY(mutex->owner == _current, ssf); k_mutex_unlock(mutex); return 0; } * the x86 port modified to work with the system call table instead of calling a common handler function. fixed an issue where registers being changed could confuse the compiler has been fixed; all registers, even ones used for parameters, must be preserved across the system call. * a new arch API for producing a kernel oops when validating system call arguments added. The debug information reported will be from the system call site and not inside the handler function. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2017-09-14 03:04:21 +02:00
pop %edx /* old arg1 value, discard it */
pop %edx
pop %ecx
pop %ebx
pop %edi
/* Discard ssf and arg6 */
add $8, %esp
KPTI_IRET_USER
userspace: flesh out internal syscall interface * Instead of a common system call entry function, we instead create a table mapping system call ids to handler skeleton functions which are invoked directly by the architecture code which receives the system call. * system call handler prototype specified. All but the most trivial system calls will implement one of these. They validate all the arguments, including verifying kernel/device object pointers, ensuring that the calling thread has appropriate access to any memory buffers passed in, and performing other parameter checks that the base system call implementation does not check, or only checks with __ASSERT(). It's only possible to install a system call implementation directly inside this table if the implementation has a return value and requires no validation of any of its arguments. A sample handler implementation for k_mutex_unlock() might look like: u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, void *ssf) { struct k_mutex *mutex = (struct k_mutex *)mutex_arg; _SYSCALL_ARG1; _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0, ssf); _SYSCALL_VERIFY(mutex->lock_count > 0, ssf); _SYSCALL_VERIFY(mutex->owner == _current, ssf); k_mutex_unlock(mutex); return 0; } * the x86 port modified to work with the system call table instead of calling a common handler function. fixed an issue where registers being changed could confuse the compiler has been fixed; all registers, even ones used for parameters, must be preserved across the system call. * a new arch API for producing a kernel oops when validating system call arguments added. The debug information reported will be from the system call site and not inside the handler function. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2017-09-14 03:04:21 +02:00
_bad_syscall:
/* ESI had a bogus syscall value in it, replace with the bad syscall
* handler's ID, and put the bad ID as its first argument. This
* clobbers ESI but the bad syscall handler never returns
* anyway, it's going to generate a kernel oops
*/
mov %esi, %eax
mov $K_SYSCALL_BAD, %esi
userspace: flesh out internal syscall interface * Instead of a common system call entry function, we instead create a table mapping system call ids to handler skeleton functions which are invoked directly by the architecture code which receives the system call. * system call handler prototype specified. All but the most trivial system calls will implement one of these. They validate all the arguments, including verifying kernel/device object pointers, ensuring that the calling thread has appropriate access to any memory buffers passed in, and performing other parameter checks that the base system call implementation does not check, or only checks with __ASSERT(). It's only possible to install a system call implementation directly inside this table if the implementation has a return value and requires no validation of any of its arguments. A sample handler implementation for k_mutex_unlock() might look like: u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3, u32_t arg4, u32_t arg5, void *ssf) { struct k_mutex *mutex = (struct k_mutex *)mutex_arg; _SYSCALL_ARG1; _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0, ssf); _SYSCALL_VERIFY(mutex->lock_count > 0, ssf); _SYSCALL_VERIFY(mutex->owner == _current, ssf); k_mutex_unlock(mutex); return 0; } * the x86 port modified to work with the system call table instead of calling a common handler function. fixed an issue where registers being changed could confuse the compiler has been fixed; all registers, even ones used for parameters, must be preserved across the system call. * a new arch API for producing a kernel oops when validating system call arguments added. The debug information reported will be from the system call site and not inside the handler function. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2017-09-14 03:04:21 +02:00
jmp _id_ok
/*
* size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg)
*/
SECTION_FUNC(TEXT, arch_user_string_nlen)
push %ebp
mov %esp, %ebp
/* error value, set to -1 initially. This location is -4(%ebp) */
push $-1
/* Do the strlen operation, based on disassembly of minimal libc */
xor %eax, %eax /* EAX = 0, length count */
mov 0x8(%ebp), %edx /* EDX base of string */
/* This code might page fault */
strlen_loop:
z_x86_user_string_nlen_fault_start:
cmpb $0x0, (%edx, %eax, 1) /* *(EDX + EAX) == 0? Could fault. */
z_x86_user_string_nlen_fault_end:
je strlen_done
cmp 0xc(%ebp), %eax /* Max length reached? */
je strlen_done
inc %eax /* EAX++ and loop again */
jmp strlen_loop
strlen_done:
/* Set error value to 0 since we succeeded */
movl $0, -4(%ebp)
z_x86_user_string_nlen_fixup:
/* Write error value to err pointer parameter */
movl 0x10(%ebp), %ecx
pop %edx
movl %edx, (%ecx)
pop %ebp
ret
/* FUNC_NORETURN void z_x86_userspace_enter(k_thread_entry_t user_entry,
* void *p1, void *p2, void *p3,
* uint32_t stack_end,
* uint32_t stack_start)
*
* A one-way trip to userspace.
*/
SECTION_FUNC(TEXT, z_x86_userspace_enter)
pop %esi /* Discard return address on stack */
/* Fetch parameters on the stack */
pop %eax /* user_entry */
pop %edx /* p1 */
pop %ecx /* p2 */
pop %esi /* p3 */
pop %ebx /* stack_end (high address) */
pop %edi /* stack_start (low address) */
/* Move to the kernel stack for this thread, so we can erase the
* user stack. The kernel stack is the page immediately before
* the user stack.
*
* For security reasons, we must erase the entire user stack.
* We don't know what previous contexts it was used and do not
* want to leak any information.
*/
mov %edi, %esp
/* Erase and enable US bit in page tables for the stack buffer */
push %ecx
push %eax
push %edx
call z_x86_current_stack_perms
pop %edx
pop %eax
pop %ecx
/* Set stack pointer to the base of the freshly-erased user stack.
* Now that this is set we won't need EBX any more.
*/
mov %ebx, %esp
/* Set segment registers (except CS and SS which are done in
* a special way by 'iret' below)
*/
mov $USER_DATA_SEG, %bx
mov %bx, %ds
mov %bx, %es
/* Push arguments to z_thread_entry() */
push %esi /* p3 */
push %ecx /* p2 */
push %edx /* p1 */
push %eax /* user_entry */
/* NULL return address */
push $0
/* Save stack pointer at this position, this is where it will be
* when we land in z_thread_entry()
*/
mov %esp, %edi
/* Inter-privilege 'iret' pops all of these. Need to fake an interrupt
* return to enter user mode as far calls cannot change privilege
* level
*/
push $USER_DATA_SEG /* SS */
push %edi /* ESP */
pushfl /* EFLAGS */
push $USER_CODE_SEG /* CS */
push $z_thread_entry /* EIP */
/* We will land in z_thread_entry() in user mode after this */
KPTI_IRET_USER