userspace: flesh out internal syscall interface

* Instead of a common system call entry function, we instead create a
table mapping system call ids to handler skeleton functions which are
invoked directly by the architecture code which receives the system
call.

* system call handler prototype specified. All but the most trivial
system calls will implement one of these. They validate all the
arguments, including verifying kernel/device object pointers, ensuring
that the calling thread has appropriate access to any memory buffers
passed in, and performing other parameter checks that the base system
call implementation does not check, or only checks with __ASSERT().

It's only possible to install a system call implementation directly
inside this table if the implementation has a return value and requires
no validation of any of its arguments.

A sample handler implementation for k_mutex_unlock() might look like:

u32_t _syscall_k_mutex_unlock(u32_t mutex_arg, u32_t arg2, u32_t arg3,
                              u32_t arg4, u32_t arg5, void *ssf)
{
        struct k_mutex *mutex = (struct k_mutex *)mutex_arg;
        _SYSCALL_ARG1;

        _SYSCALL_IS_OBJ(mutex, K_OBJ_MUTEX, 0,  ssf);
        _SYSCALL_VERIFY(mutex->lock_count > 0, ssf);
        _SYSCALL_VERIFY(mutex->owner == _current, ssf);

        k_mutex_unlock(mutex);

        return 0;
}

* the x86 port modified to work with the system call table instead of
calling a common handler function. fixed an issue where registers being
changed could confuse the compiler has been fixed; all registers, even
ones used for parameters, must be preserved across the system call.

* a new arch API for producing a kernel oops when validating system call
arguments added. The debug information reported will be from the system
call site and not inside the handler function.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
Andrew Boie 2017-09-13 18:04:21 -07:00 committed by Andrew Boie
commit a23c245a9a
6 changed files with 239 additions and 27 deletions

View file

@ -109,6 +109,23 @@ FUNC_NORETURN void _NanoFatalErrorHandler(unsigned int reason,
_SysFatalErrorHandler(reason, pEsf);
}
FUNC_NORETURN void _arch_syscall_oops(void *ssf_ptr)
{
struct _x86_syscall_stack_frame *ssf =
(struct _x86_syscall_stack_frame *)ssf_ptr;
NANO_ESF oops_esf = {
.eip = ssf->eip,
.cs = ssf->cs,
.eflags = ssf->eflags
};
if (oops_esf.cs == USER_CODE_SEG) {
oops_esf.esp = ssf->esp;
}
_NanoFatalErrorHandler(_NANO_ERR_KERNEL_OOPS, &oops_esf);
}
#ifdef CONFIG_X86_KERNEL_OOPS
/* The reason code gets pushed onto the stack right before the exception is
* triggered, so it would be after the nano_esf data

View file

@ -8,19 +8,33 @@
#include <arch/x86/asm.h>
#include <arch/cpu.h>
#include <offsets_short.h>
#include <syscall.h>
/* Exports */
GTEXT(_x86_syscall_entry_stub)
GTEXT(_x86_userspace_enter)
/* Imports */
GTEXT(_k_syscall_entry)
GTEXT(_k_syscall_table)
/* Landing site for syscall SW IRQ. Marshal arguments and call C function for
* further processing.
* further processing. We're on the kernel stack for the invoking thread.
*/
SECTION_FUNC(TEXT, _x86_syscall_entry_stub)
push %esi /* call_id */
sti /* re-enable interrupts */
cld /* clear direction flag, restored on 'iret' */
/* call_id is in ESI. bounds-check it, must be less than
* K_SYSCALL_LIMIT
*/
cmp $K_SYSCALL_LIMIT, %esi
jae _bad_syscall
_id_ok:
/* Marshal arguments per calling convention to match what is expected
* for _k_syscall_handler_t functions
*/
push %esp /* ssf */
push %edi /* arg5 */
push %ebx /* arg4 */
#ifndef CONFIG_X86_IAMCU
@ -29,25 +43,44 @@ SECTION_FUNC(TEXT, _x86_syscall_entry_stub)
push %eax /* arg1 */
#endif
call _k_syscall_entry
/* from the call ID in ESI, load EBX with the actual function pointer
* to call by looking it up in the system call dispatch table
*/
xor %edi, %edi
mov _k_syscall_table(%edi, %esi, 4), %ebx
/* Run the handler, which is some entry in _k_syscall_table */
call *%ebx
/* EAX now contains return value. Pop or xor everything else to prevent
* information leak from kernel mode.
*/
#ifndef CONFIG_X86_IAMCU
pop %edx /* old EAX value, discard it */
pop %edx /* old arg1 value, discard it */
pop %edx
pop %ecx
#else
xor %edx, %edx
xor %ecx, %ecx
#endif
pop %ebx
pop %edi
pop %esi
#ifndef CONFIG_X86_IAMCU
/* Discard ssf, no free register to pop it into so we add instead */
add $4, %esp
#else
xor %edx, %edx /* Clean EDX */
pop %ecx /* Clean ECX and get ssf arg off the stack */
#endif
iret
_bad_syscall:
/* ESI had a bogus syscall value in it, replace with the bad syscall
* handler's ID, and put the bad ID as its first argument. This
* clobbers ESI but the bad syscall handler never returns
* anyway, it's going to generate a kernel oops
*/
mov %esi, %eax
mov $K_SYSCALL_BAD, %esi
jmp _id_ok
/* FUNC_NORETURN void _x86_userspace_enter(k_thread_entry_t user_entry,
* void *p1, void *p2, void *p3,

View file

@ -323,6 +323,17 @@ typedef struct nanoEsf {
unsigned int eflags;
} NANO_ESF;
struct _x86_syscall_stack_frame {
u32_t eip;
u32_t cs;
u32_t eflags;
/* These are only present if cs = USER_CODE_SEG */
u32_t esp;
u32_t ss;
};
/**
* @brief "interrupt stack frame" (ISF)
*
@ -545,6 +556,8 @@ extern struct task_state_segment _main_tss;
/* Syscall invocation macros. x86-specific machine constraints used to ensure
* args land in the proper registers, see implementation of
* _x86_syscall_entry_stub in userspace.S
*
* the entry stub clobbers EDX and ECX on IAMCU systems
*/
static inline u32_t _arch_syscall_invoke5(u32_t arg1, u32_t arg2, u32_t arg3,
@ -554,6 +567,9 @@ static inline u32_t _arch_syscall_invoke5(u32_t arg1, u32_t arg2, u32_t arg3,
__asm__ volatile("int $0x80"
: "=a" (ret)
#ifdef CONFIG_X86_IAMCU
, "=d" (arg2), "=c" (arg3)
#endif
: "S" (call_id), "a" (arg1), "d" (arg2),
"c" (arg3), "b" (arg4), "D" (arg5));
return ret;
@ -566,8 +582,11 @@ static inline u32_t _arch_syscall_invoke4(u32_t arg1, u32_t arg2, u32_t arg3,
__asm__ volatile("int $0x80"
: "=a" (ret)
: "S" (call_id), "a" (arg1), "d" (arg2),
"c" (arg3), "b" (arg4));
#ifdef CONFIG_X86_IAMCU
, "=d" (arg2), "=c" (arg3)
#endif
: "S" (call_id), "a" (arg1), "d" (arg2), "c" (arg3),
"b" (arg4));
return ret;
}
@ -578,6 +597,9 @@ static inline u32_t _arch_syscall_invoke3(u32_t arg1, u32_t arg2, u32_t arg3,
__asm__ volatile("int $0x80"
: "=a" (ret)
#ifdef CONFIG_X86_IAMCU
, "=d" (arg2), "=c" (arg3)
#endif
: "S" (call_id), "a" (arg1), "d" (arg2), "c" (arg3));
return ret;
}
@ -588,7 +610,14 @@ static inline u32_t _arch_syscall_invoke2(u32_t arg1, u32_t arg2, u32_t call_id)
__asm__ volatile("int $0x80"
: "=a" (ret)
: "S" (call_id), "a" (arg1), "d" (arg2));
#ifdef CONFIG_X86_IAMCU
, "=d" (arg2)
#endif
: "S" (call_id), "a" (arg1), "d" (arg2)
#ifdef CONFIG_X86_IAMCU
: "ecx"
#endif
);
return ret;
}
@ -598,7 +627,11 @@ static inline u32_t _arch_syscall_invoke1(u32_t arg1, u32_t call_id)
__asm__ volatile("int $0x80"
: "=a" (ret)
: "S" (call_id), "a" (arg1));
: "S" (call_id), "a" (arg1)
#ifdef CONFIG_X86_IAMCU
: "edx", "ecx"
#endif
);
return ret;
}
@ -608,7 +641,11 @@ static inline u32_t _arch_syscall_invoke0(u32_t call_id)
__asm__ volatile("int $0x80"
: "=a" (ret)
: "S" (call_id));
: "S" (call_id)
#ifdef CONFIG_X86_IAMCU
: "edx", "ecx"
#endif
);
return ret;
}

111
include/syscall.h Normal file
View file

@ -0,0 +1,111 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* SPDX-License-Identifier: Apache 2.0
*/
#ifndef _ZEPHYR_SYSCALL_H_
#define _ZEPHYR_SYSCALL_H_
/* Fixed system call IDs. We use #defines instead of enumeration so that if
* system calls are retired it does not shift the IDs of other system calls.
*/
#define K_SYSCALL_BAD 0
#define K_SYSCALL_LIMIT 1
#ifndef _ASMLANGUAGE
#include <misc/printk.h>
/**
* @typedef _k_syscall_handler_t
* @brief System call handler function type
*
* These are kernel-side skeleton functions for system calls. They are
* necessary to sanitize the arguments passed into the system call:
*
* - Any kernel object or device pointers are validated with _SYSCALL_IS_OBJ()
* - Any memory buffers passed in are checked to ensure that the calling thread
* actually has access to them
* - Many kernel calls do no sanity checking of parameters other than
* assertions. The handler must check all of these conditions using
* _SYSCALL_ASSERT()
* - If the system call has more then 5 arguments, then arg5 will be a pointer
* to some struct containing arguments 5+. The struct itself needs to be
* validated like any other buffer passed in from userspace, and its members
* individually validated (if necessary) and then passed to the real
* implementation like normal arguments
*
* Even if the system call implementation has no return value, these always
* return something, even 0, to prevent register leakage to userspace.
*
* Once everything has been validated, the real implementation will be executed.
*
* @param arg1 system call argument 1
* @param arg2 system call argument 2
* @param arg3 system call argument 3
* @param arg4 system call argument 4
* @param arg5 system call argument 5
* @param ssf System call stack frame pointer. Used to generate kernel oops
* via _arch_syscall_oops_at(). Contents are arch-specific.
* @return system call return value, or 0 if the system call implementation
* return void
*
*/
typedef u32_t (*_k_syscall_handler_t)(u32_t arg1, u32_t arg2, u32_t arg3,
u32_t arg4, u32_t arg5, void *ssf);
extern const _k_syscall_handler_t _k_syscall_table[K_SYSCALL_LIMIT];
/**
* @brief Runtime expression check for system call arguments
*
* Used in handler functions to perform various runtime checks on arguments,
* and generate a kernel oops if anything is not expected
*
* @param expr Boolean expression to verify, a false result will trigger an
* oops
* @param ssf Syscall stack frame argument passed to the handler function
*/
#define _SYSCALL_VERIFY(expr, ssf) \
do { \
if (!(expr)) { \
printk("FATAL: syscall failed check: " #expr "\n"); \
_arch_syscall_oops(ssf); \
} \
} while (0)
/**
* @brief Runtime check that a pointer is a kernel object of expected type
*
* Passes along arguments to _k_object_validate() and triggers a kernel oops
* if the object wasn't valid or had incorrect permissions.
*
* @param ptr Untrusted kernel object pointer
* @param type Expected kernel object type
* @param init Whether this is an init function handler
* @param ssf Syscall stack frame argument passed to the handler function
*/
#define _SYSCALL_IS_OBJ(ptr, type, init, ssf) \
_SYSCALL_VERIFY(!_k_object_validate((void *)ptr, type, init), ssf)
/* Convenience macros for handler implementations */
#define _SYSCALL_ARG0 ARG_UNUSED(arg1); ARG_UNUSED(arg2); ARG_UNUSED(arg3); \
ARG_UNUSED(arg4); ARH_UNUSED(arg5)
#define _SYSCALL_ARG1 ARG_UNUSED(arg2); ARG_UNUSED(arg3); ARG_UNUSED(arg4); \
ARG_UNUSED(arg5)
#define _SYSCALL_ARG2 ARG_UNUSED(arg3); ARG_UNUSED(arg4); ARG_UNUSED(arg5)
#define _SYSCALL_ARG3 ARG_UNUSED(arg4); ARG_UNUSED(arg5)
#define _SYSCALL_ARG4 ARG_UNUSED(arg5)
#endif /* _ASMLANGUAGE */
#endif /* _ZEPHYR_SYSCALL_H_ */

View file

@ -133,8 +133,22 @@ extern FUNC_NORETURN
void _arch_user_mode_enter(k_thread_entry_t user_entry, void *p1, void *p2,
void *p3);
extern u32_t _k_syscall_entry(u32_t arg1, u32_t arg2, u32_t arg3, u32_t arg4,
u32_t arg5, u32_t call_id);
/**
* @brief Induce a kernel oops that appears to come from a specific location
*
* Normally, k_oops() generates an exception that appears to come from the
* call site of the k_oops() itself.
*
* However, when validating arguments to a system call, if there are problems
* we want the oops to appear to come from where the system call was invoked
* and not inside the validation function.
*
* @param ssf System call stack frame pointer. This gets passed as an argument
* to _k_syscall_handler_t functions and its contents are completely
* architecture specific.
*/
extern FUNC_NORETURN void _arch_syscall_oops(void *ssf);
#endif /* CONFIG_USERSPACE */
/* set and clear essential fiber/task flag */

View file

@ -11,6 +11,7 @@
#include <kernel_structs.h>
#include <sys_io.h>
#include <ksched.h>
#include <syscall.h>
/**
* Kernel object validation function
@ -181,16 +182,15 @@ void _k_object_init(void *object)
ko->flags |= K_OBJ_FLAG_INITIALIZED;
}
u32_t _k_syscall_entry(u32_t arg1, u32_t arg2, u32_t arg3, u32_t arg4,
u32_t arg5, u32_t call_id)
static u32_t _syscall_bad_handler(u32_t bad_id, u32_t arg2, u32_t arg3,
u32_t arg4, u32_t arg5, void *ssf)
{
/* A real implementation will figure out what function to call
* based on call_id, validate arguments, perform any other runtime
* checks needed, and call into the appropriate kernel function.
*/
__ASSERT(0, "system calls are unimplemented");
return 0;
printk("Bad system call id %u invoked\n", bad_id);
_arch_syscall_oops(ssf);
CODE_UNREACHABLE;
}
/* This table will eventually be generated by a script, placeholder for now */
const _k_syscall_handler_t _k_syscall_table[K_SYSCALL_LIMIT] = {
[K_SYSCALL_BAD] = _syscall_bad_handler,
};