diff --git a/arch/x86/core/fatal.c b/arch/x86/core/fatal.c index 1e8b533a8ed..5c4418a66f3 100644 --- a/arch/x86/core/fatal.c +++ b/arch/x86/core/fatal.c @@ -109,6 +109,23 @@ FUNC_NORETURN void _NanoFatalErrorHandler(unsigned int reason, _SysFatalErrorHandler(reason, pEsf); } +FUNC_NORETURN void _arch_syscall_oops(void *ssf_ptr) +{ + struct _x86_syscall_stack_frame *ssf = + (struct _x86_syscall_stack_frame *)ssf_ptr; + NANO_ESF oops_esf = { + .eip = ssf->eip, + .cs = ssf->cs, + .eflags = ssf->eflags + }; + + if (oops_esf.cs == USER_CODE_SEG) { + oops_esf.esp = ssf->esp; + } + + _NanoFatalErrorHandler(_NANO_ERR_KERNEL_OOPS, &oops_esf); +} + #ifdef CONFIG_X86_KERNEL_OOPS /* The reason code gets pushed onto the stack right before the exception is * triggered, so it would be after the nano_esf data diff --git a/arch/x86/core/userspace.S b/arch/x86/core/userspace.S index 5183ff7c81e..5c42d09df34 100644 --- a/arch/x86/core/userspace.S +++ b/arch/x86/core/userspace.S @@ -8,19 +8,33 @@ #include #include #include +#include /* Exports */ GTEXT(_x86_syscall_entry_stub) GTEXT(_x86_userspace_enter) /* Imports */ -GTEXT(_k_syscall_entry) +GTEXT(_k_syscall_table) /* Landing site for syscall SW IRQ. Marshal arguments and call C function for - * further processing. + * further processing. We're on the kernel stack for the invoking thread. */ SECTION_FUNC(TEXT, _x86_syscall_entry_stub) - push %esi /* call_id */ + sti /* re-enable interrupts */ + cld /* clear direction flag, restored on 'iret' */ + + /* call_id is in ESI. bounds-check it, must be less than + * K_SYSCALL_LIMIT + */ + cmp $K_SYSCALL_LIMIT, %esi + jae _bad_syscall + +_id_ok: + /* Marshal arguments per calling convention to match what is expected + * for _k_syscall_handler_t functions + */ + push %esp /* ssf */ push %edi /* arg5 */ push %ebx /* arg4 */ #ifndef CONFIG_X86_IAMCU @@ -29,25 +43,44 @@ SECTION_FUNC(TEXT, _x86_syscall_entry_stub) push %eax /* arg1 */ #endif - call _k_syscall_entry + /* from the call ID in ESI, load EBX with the actual function pointer + * to call by looking it up in the system call dispatch table + */ + xor %edi, %edi + mov _k_syscall_table(%edi, %esi, 4), %ebx + + /* Run the handler, which is some entry in _k_syscall_table */ + call *%ebx /* EAX now contains return value. Pop or xor everything else to prevent * information leak from kernel mode. */ #ifndef CONFIG_X86_IAMCU - pop %edx /* old EAX value, discard it */ + pop %edx /* old arg1 value, discard it */ pop %edx pop %ecx -#else - xor %edx, %edx - xor %ecx, %ecx #endif pop %ebx pop %edi - pop %esi - +#ifndef CONFIG_X86_IAMCU + /* Discard ssf, no free register to pop it into so we add instead */ + add $4, %esp +#else + xor %edx, %edx /* Clean EDX */ + pop %ecx /* Clean ECX and get ssf arg off the stack */ +#endif iret +_bad_syscall: + /* ESI had a bogus syscall value in it, replace with the bad syscall + * handler's ID, and put the bad ID as its first argument. This + * clobbers ESI but the bad syscall handler never returns + * anyway, it's going to generate a kernel oops + */ + mov %esi, %eax + mov $K_SYSCALL_BAD, %esi + jmp _id_ok + /* FUNC_NORETURN void _x86_userspace_enter(k_thread_entry_t user_entry, * void *p1, void *p2, void *p3, diff --git a/include/arch/x86/arch.h b/include/arch/x86/arch.h index 05005595846..cca90b987db 100644 --- a/include/arch/x86/arch.h +++ b/include/arch/x86/arch.h @@ -323,6 +323,17 @@ typedef struct nanoEsf { unsigned int eflags; } NANO_ESF; + +struct _x86_syscall_stack_frame { + u32_t eip; + u32_t cs; + u32_t eflags; + + /* These are only present if cs = USER_CODE_SEG */ + u32_t esp; + u32_t ss; +}; + /** * @brief "interrupt stack frame" (ISF) * @@ -545,6 +556,8 @@ extern struct task_state_segment _main_tss; /* Syscall invocation macros. x86-specific machine constraints used to ensure * args land in the proper registers, see implementation of * _x86_syscall_entry_stub in userspace.S + * + * the entry stub clobbers EDX and ECX on IAMCU systems */ static inline u32_t _arch_syscall_invoke5(u32_t arg1, u32_t arg2, u32_t arg3, @@ -554,6 +567,9 @@ static inline u32_t _arch_syscall_invoke5(u32_t arg1, u32_t arg2, u32_t arg3, __asm__ volatile("int $0x80" : "=a" (ret) +#ifdef CONFIG_X86_IAMCU + , "=d" (arg2), "=c" (arg3) +#endif : "S" (call_id), "a" (arg1), "d" (arg2), "c" (arg3), "b" (arg4), "D" (arg5)); return ret; @@ -566,8 +582,11 @@ static inline u32_t _arch_syscall_invoke4(u32_t arg1, u32_t arg2, u32_t arg3, __asm__ volatile("int $0x80" : "=a" (ret) - : "S" (call_id), "a" (arg1), "d" (arg2), - "c" (arg3), "b" (arg4)); +#ifdef CONFIG_X86_IAMCU + , "=d" (arg2), "=c" (arg3) +#endif + : "S" (call_id), "a" (arg1), "d" (arg2), "c" (arg3), + "b" (arg4)); return ret; } @@ -578,6 +597,9 @@ static inline u32_t _arch_syscall_invoke3(u32_t arg1, u32_t arg2, u32_t arg3, __asm__ volatile("int $0x80" : "=a" (ret) +#ifdef CONFIG_X86_IAMCU + , "=d" (arg2), "=c" (arg3) +#endif : "S" (call_id), "a" (arg1), "d" (arg2), "c" (arg3)); return ret; } @@ -588,7 +610,14 @@ static inline u32_t _arch_syscall_invoke2(u32_t arg1, u32_t arg2, u32_t call_id) __asm__ volatile("int $0x80" : "=a" (ret) - : "S" (call_id), "a" (arg1), "d" (arg2)); +#ifdef CONFIG_X86_IAMCU + , "=d" (arg2) +#endif + : "S" (call_id), "a" (arg1), "d" (arg2) +#ifdef CONFIG_X86_IAMCU + : "ecx" +#endif + ); return ret; } @@ -598,7 +627,11 @@ static inline u32_t _arch_syscall_invoke1(u32_t arg1, u32_t call_id) __asm__ volatile("int $0x80" : "=a" (ret) - : "S" (call_id), "a" (arg1)); + : "S" (call_id), "a" (arg1) +#ifdef CONFIG_X86_IAMCU + : "edx", "ecx" +#endif + ); return ret; } @@ -608,7 +641,11 @@ static inline u32_t _arch_syscall_invoke0(u32_t call_id) __asm__ volatile("int $0x80" : "=a" (ret) - : "S" (call_id)); + : "S" (call_id) +#ifdef CONFIG_X86_IAMCU + : "edx", "ecx" +#endif + ); return ret; } diff --git a/include/syscall.h b/include/syscall.h new file mode 100644 index 00000000000..d243262fe45 --- /dev/null +++ b/include/syscall.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache 2.0 + */ + + +#ifndef _ZEPHYR_SYSCALL_H_ +#define _ZEPHYR_SYSCALL_H_ + +/* Fixed system call IDs. We use #defines instead of enumeration so that if + * system calls are retired it does not shift the IDs of other system calls. + */ +#define K_SYSCALL_BAD 0 + +#define K_SYSCALL_LIMIT 1 + +#ifndef _ASMLANGUAGE +#include + +/** + * @typedef _k_syscall_handler_t + * @brief System call handler function type + * + * These are kernel-side skeleton functions for system calls. They are + * necessary to sanitize the arguments passed into the system call: + * + * - Any kernel object or device pointers are validated with _SYSCALL_IS_OBJ() + * - Any memory buffers passed in are checked to ensure that the calling thread + * actually has access to them + * - Many kernel calls do no sanity checking of parameters other than + * assertions. The handler must check all of these conditions using + * _SYSCALL_ASSERT() + * - If the system call has more then 5 arguments, then arg5 will be a pointer + * to some struct containing arguments 5+. The struct itself needs to be + * validated like any other buffer passed in from userspace, and its members + * individually validated (if necessary) and then passed to the real + * implementation like normal arguments + * + * Even if the system call implementation has no return value, these always + * return something, even 0, to prevent register leakage to userspace. + * + * Once everything has been validated, the real implementation will be executed. + * + * @param arg1 system call argument 1 + * @param arg2 system call argument 2 + * @param arg3 system call argument 3 + * @param arg4 system call argument 4 + * @param arg5 system call argument 5 + * @param ssf System call stack frame pointer. Used to generate kernel oops + * via _arch_syscall_oops_at(). Contents are arch-specific. + * @return system call return value, or 0 if the system call implementation + * return void + * + */ +typedef u32_t (*_k_syscall_handler_t)(u32_t arg1, u32_t arg2, u32_t arg3, + u32_t arg4, u32_t arg5, void *ssf); + + +extern const _k_syscall_handler_t _k_syscall_table[K_SYSCALL_LIMIT]; + + +/** + * @brief Runtime expression check for system call arguments + * + * Used in handler functions to perform various runtime checks on arguments, + * and generate a kernel oops if anything is not expected + * + * @param expr Boolean expression to verify, a false result will trigger an + * oops + * @param ssf Syscall stack frame argument passed to the handler function + */ +#define _SYSCALL_VERIFY(expr, ssf) \ + do { \ + if (!(expr)) { \ + printk("FATAL: syscall failed check: " #expr "\n"); \ + _arch_syscall_oops(ssf); \ + } \ + } while (0) + +/** + * @brief Runtime check that a pointer is a kernel object of expected type + * + * Passes along arguments to _k_object_validate() and triggers a kernel oops + * if the object wasn't valid or had incorrect permissions. + * + * @param ptr Untrusted kernel object pointer + * @param type Expected kernel object type + * @param init Whether this is an init function handler + * @param ssf Syscall stack frame argument passed to the handler function + */ +#define _SYSCALL_IS_OBJ(ptr, type, init, ssf) \ + _SYSCALL_VERIFY(!_k_object_validate((void *)ptr, type, init), ssf) + +/* Convenience macros for handler implementations */ +#define _SYSCALL_ARG0 ARG_UNUSED(arg1); ARG_UNUSED(arg2); ARG_UNUSED(arg3); \ + ARG_UNUSED(arg4); ARH_UNUSED(arg5) + +#define _SYSCALL_ARG1 ARG_UNUSED(arg2); ARG_UNUSED(arg3); ARG_UNUSED(arg4); \ + ARG_UNUSED(arg5) + +#define _SYSCALL_ARG2 ARG_UNUSED(arg3); ARG_UNUSED(arg4); ARG_UNUSED(arg5) + +#define _SYSCALL_ARG3 ARG_UNUSED(arg4); ARG_UNUSED(arg5) + +#define _SYSCALL_ARG4 ARG_UNUSED(arg5) + + +#endif /* _ASMLANGUAGE */ + +#endif /* _ZEPHYR_SYSCALL_H_ */ diff --git a/kernel/include/nano_internal.h b/kernel/include/nano_internal.h index d32dd162ba2..051166e26af 100644 --- a/kernel/include/nano_internal.h +++ b/kernel/include/nano_internal.h @@ -133,8 +133,22 @@ extern FUNC_NORETURN void _arch_user_mode_enter(k_thread_entry_t user_entry, void *p1, void *p2, void *p3); -extern u32_t _k_syscall_entry(u32_t arg1, u32_t arg2, u32_t arg3, u32_t arg4, - u32_t arg5, u32_t call_id); + +/** + * @brief Induce a kernel oops that appears to come from a specific location + * + * Normally, k_oops() generates an exception that appears to come from the + * call site of the k_oops() itself. + * + * However, when validating arguments to a system call, if there are problems + * we want the oops to appear to come from where the system call was invoked + * and not inside the validation function. + * + * @param ssf System call stack frame pointer. This gets passed as an argument + * to _k_syscall_handler_t functions and its contents are completely + * architecture specific. + */ +extern FUNC_NORETURN void _arch_syscall_oops(void *ssf); #endif /* CONFIG_USERSPACE */ /* set and clear essential fiber/task flag */ diff --git a/kernel/userspace.c b/kernel/userspace.c index ea6ad20dd72..790f2201186 100644 --- a/kernel/userspace.c +++ b/kernel/userspace.c @@ -11,6 +11,7 @@ #include #include #include +#include /** * Kernel object validation function @@ -181,16 +182,15 @@ void _k_object_init(void *object) ko->flags |= K_OBJ_FLAG_INITIALIZED; } - -u32_t _k_syscall_entry(u32_t arg1, u32_t arg2, u32_t arg3, u32_t arg4, - u32_t arg5, u32_t call_id) +static u32_t _syscall_bad_handler(u32_t bad_id, u32_t arg2, u32_t arg3, + u32_t arg4, u32_t arg5, void *ssf) { - /* A real implementation will figure out what function to call - * based on call_id, validate arguments, perform any other runtime - * checks needed, and call into the appropriate kernel function. - */ - __ASSERT(0, "system calls are unimplemented"); - - return 0; + printk("Bad system call id %u invoked\n", bad_id); + _arch_syscall_oops(ssf); + CODE_UNREACHABLE; } +/* This table will eventually be generated by a script, placeholder for now */ +const _k_syscall_handler_t _k_syscall_table[K_SYSCALL_LIMIT] = { + [K_SYSCALL_BAD] = _syscall_bad_handler, +};