x86: implement user mode on 64-bit
- In early boot, enable the syscall instruction and set up necessary MSRs - Add a hook to update page tables on context switch - Properly initialize thread based on whether it will start in user or supervisor mode - Add landing function for system calls to execute the desired handler - Implement arch_user_string_nlen() - Implement logic for dropping a thread down to user mode - Reserve per-CPU storage space for user and privilege elevation stack pointers, necessary for handling syscalls when no free registers are available - Proper handling of gs register considerations when transitioning privilege levels Kernel page table isolation (KPTI) is not yet implemented. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
07c278382a
commit
3d80208025
10 changed files with 406 additions and 15 deletions
|
@ -17,14 +17,14 @@ config CPU_ATOM
|
||||||
bool
|
bool
|
||||||
select CPU_HAS_FPU
|
select CPU_HAS_FPU
|
||||||
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
||||||
select ARCH_HAS_USERSPACE if X86_MMU && !X86_64
|
select ARCH_HAS_USERSPACE if X86_MMU
|
||||||
help
|
help
|
||||||
This option signifies the use of a CPU from the Atom family.
|
This option signifies the use of a CPU from the Atom family.
|
||||||
|
|
||||||
config CPU_MINUTEIA
|
config CPU_MINUTEIA
|
||||||
bool
|
bool
|
||||||
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
||||||
select ARCH_HAS_USERSPACE if X86_MMU && !X86_64
|
select ARCH_HAS_USERSPACE if X86_MMU
|
||||||
help
|
help
|
||||||
This option signifies the use of a CPU from the Minute IA family.
|
This option signifies the use of a CPU from the Minute IA family.
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ config CPU_APOLLO_LAKE
|
||||||
bool
|
bool
|
||||||
select CPU_HAS_FPU
|
select CPU_HAS_FPU
|
||||||
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
||||||
select ARCH_HAS_USERSPACE if X86_MMU && !X86_64
|
select ARCH_HAS_USERSPACE if X86_MMU
|
||||||
help
|
help
|
||||||
This option signifies the use of a CPU from the Apollo Lake family.
|
This option signifies the use of a CPU from the Apollo Lake family.
|
||||||
|
|
||||||
|
@ -269,6 +269,7 @@ config X86_KPTI
|
||||||
default y
|
default y
|
||||||
depends on USERSPACE
|
depends on USERSPACE
|
||||||
depends on !X86_NO_MELTDOWN
|
depends on !X86_NO_MELTDOWN
|
||||||
|
depends on !X86_64
|
||||||
help
|
help
|
||||||
Implements kernel page table isolation to mitigate Meltdown exploits
|
Implements kernel page table isolation to mitigate Meltdown exploits
|
||||||
to read Kernel RAM. Incurs a significant performance cost for
|
to read Kernel RAM. Incurs a significant performance cost for
|
||||||
|
|
|
@ -15,3 +15,5 @@ zephyr_library_sources(
|
||||||
intel64/thread.c
|
intel64/thread.c
|
||||||
intel64/fatal.c
|
intel64/fatal.c
|
||||||
)
|
)
|
||||||
|
|
||||||
|
zephyr_library_sources_ifdef(CONFIG_USERSPACE intel64/userspace.S)
|
||||||
|
|
|
@ -24,3 +24,14 @@ void z_x86_exception(z_arch_esf_t *esf)
|
||||||
CODE_UNREACHABLE;
|
CODE_UNREACHABLE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
void arch_syscall_oops(void *ssf_ptr)
|
||||||
|
{
|
||||||
|
struct x86_ssf *ssf = ssf_ptr;
|
||||||
|
|
||||||
|
LOG_ERR("Bad system call from RIP 0x%lx", ssf->rip);
|
||||||
|
|
||||||
|
z_x86_fatal_error(K_ERR_KERNEL_OOPS, NULL);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_USERSPACE */
|
||||||
|
|
|
@ -115,9 +115,9 @@ go64: movl %cr4, %eax /* enable PAE and SSE */
|
||||||
#endif
|
#endif
|
||||||
movl %eax, %cr3
|
movl %eax, %cr3
|
||||||
|
|
||||||
movl $X86_EFER_MSR, %ecx /* enable long mode and no-execute */
|
movl $X86_EFER_MSR, %ecx /* enable long mode, no-execute, syscall */
|
||||||
rdmsr
|
rdmsr
|
||||||
orl $(X86_EFER_MSR_LME | X86_EFER_MSR_NXE), %eax
|
orl $(X86_EFER_MSR_LME | X86_EFER_MSR_NXE | X86_EFER_MSR_SCE), %eax
|
||||||
wrmsr
|
wrmsr
|
||||||
|
|
||||||
movl %cr0, %eax /* enable paging */
|
movl %cr0, %eax /* enable paging */
|
||||||
|
@ -169,6 +169,30 @@ go64: movl %cr4, %eax /* enable PAE and SSE */
|
||||||
call z_loapic_enable
|
call z_loapic_enable
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* Set landing site for system calls made with 'syscall' instruction */
|
||||||
|
movq $z_x86_syscall_entry_stub, %rax
|
||||||
|
movq %rax, %rdx
|
||||||
|
shrq $32, %rdx
|
||||||
|
movl $X86_LSTAR_MSR, %ecx
|
||||||
|
/* LSTAR set to 64-bit address denoted by EDX:EAX */
|
||||||
|
wrmsr
|
||||||
|
|
||||||
|
/* Set segment descriptors in STAR */
|
||||||
|
xorl %eax, %eax /* Zero low bits, reserved */
|
||||||
|
movl $X86_STAR_UPPER, %edx
|
||||||
|
movl $X86_STAR_MSR, %ecx
|
||||||
|
wrmsr
|
||||||
|
|
||||||
|
/* Set EFLAGS mask applied when making system calls. Currently we
|
||||||
|
* mask interrupts and clear direction flag.
|
||||||
|
*/
|
||||||
|
movl $0x600, %eax
|
||||||
|
xorl %edx, %edx
|
||||||
|
movl $X86_FMASK_MSR, %ecx
|
||||||
|
wrmsr
|
||||||
|
#endif /* CONFIG_USERSPACE */
|
||||||
|
|
||||||
/* don't replace CALL with JMP; honor the ABI stack alignment! */
|
/* don't replace CALL with JMP; honor the ABI stack alignment! */
|
||||||
|
|
||||||
incl __x86_cpuboot_t_ready_OFFSET(%rbp)
|
incl __x86_cpuboot_t_ready_OFFSET(%rbp)
|
||||||
|
@ -222,7 +246,13 @@ z_x86_switch:
|
||||||
movq %r13, _thread_offset_to_r13(%rsi)
|
movq %r13, _thread_offset_to_r13(%rsi)
|
||||||
movq %r14, _thread_offset_to_r14(%rsi)
|
movq %r14, _thread_offset_to_r14(%rsi)
|
||||||
movq %r15, _thread_offset_to_r15(%rsi)
|
movq %r15, _thread_offset_to_r15(%rsi)
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* We're always in supervisor mode if we get here, the other case
|
||||||
|
* is when __resume is invoked from irq_dispatch
|
||||||
|
*/
|
||||||
|
movq $X86_KERNEL_CS, _thread_offset_to_cs(%rsi)
|
||||||
|
movq $X86_KERNEL_DS, _thread_offset_to_ss(%rsi)
|
||||||
|
#endif
|
||||||
movq %gs:__x86_tss64_t_ist1_OFFSET, %rsp
|
movq %gs:__x86_tss64_t_ist1_OFFSET, %rsp
|
||||||
|
|
||||||
/* fall through to __resume */
|
/* fall through to __resume */
|
||||||
|
@ -234,10 +264,23 @@ z_x86_switch:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
__resume:
|
__resume:
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
pushq %rdi /* Caller-saved, stash it */
|
||||||
|
call z_x86_swap_update_page_tables
|
||||||
|
popq %rdi
|
||||||
|
|
||||||
|
/* Set up exception return stack frame */
|
||||||
|
pushq _thread_offset_to_ss(%rdi) /* SS */
|
||||||
|
#else
|
||||||
pushq $X86_KERNEL_DS /* SS */
|
pushq $X86_KERNEL_DS /* SS */
|
||||||
|
#endif
|
||||||
pushq _thread_offset_to_rsp(%rdi) /* RSP */
|
pushq _thread_offset_to_rsp(%rdi) /* RSP */
|
||||||
pushq _thread_offset_to_rflags(%rdi) /* RFLAGS */
|
pushq _thread_offset_to_rflags(%rdi) /* RFLAGS */
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
pushq _thread_offset_to_cs(%rdi) /* CS */
|
||||||
|
#else
|
||||||
pushq $X86_KERNEL_CS /* CS */
|
pushq $X86_KERNEL_CS /* CS */
|
||||||
|
#endif
|
||||||
pushq _thread_offset_to_rip(%rdi) /* RIP */
|
pushq _thread_offset_to_rip(%rdi) /* RIP */
|
||||||
|
|
||||||
movq _thread_offset_to_rbx(%rdi), %rbx
|
movq _thread_offset_to_rbx(%rdi), %rbx
|
||||||
|
@ -261,6 +304,13 @@ __resume:
|
||||||
movq _thread_offset_to_r11(%rdi), %r11
|
movq _thread_offset_to_r11(%rdi), %r11
|
||||||
movq _thread_offset_to_rdi(%rdi), %rdi /* do last :-) */
|
movq _thread_offset_to_rdi(%rdi), %rdi /* do last :-) */
|
||||||
|
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* Swap GS register values if we are returning to user mode */
|
||||||
|
testb $0x3, 8(%rsp)
|
||||||
|
jz 1f
|
||||||
|
swapgs
|
||||||
|
#endif /* CONFIG_USERSPACE */
|
||||||
|
|
||||||
1: iretq
|
1: iretq
|
||||||
|
|
||||||
|
|
||||||
|
@ -273,7 +323,13 @@ except: /*
|
||||||
* finish struct NANO_ESF on stack. 'vector' .. 'ss' are
|
* finish struct NANO_ESF on stack. 'vector' .. 'ss' are
|
||||||
* already there from hardware trap and EXCEPT_*() stub.
|
* already there from hardware trap and EXCEPT_*() stub.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* Swap GS register values if we came in from user mode */
|
||||||
|
testb $0x3, 24(%rsp)
|
||||||
|
jz 1f
|
||||||
|
swapgs
|
||||||
|
1:
|
||||||
|
#endif /* CONFIG_USERSPACE */
|
||||||
pushq %r15
|
pushq %r15
|
||||||
subq $X86_FXSAVE_SIZE, %rsp
|
subq $X86_FXSAVE_SIZE, %rsp
|
||||||
fxsave (%rsp)
|
fxsave (%rsp)
|
||||||
|
@ -323,6 +379,15 @@ except: /*
|
||||||
/* Drop the vector/err code pushed by the HW or EXCEPT_*() stub */
|
/* Drop the vector/err code pushed by the HW or EXCEPT_*() stub */
|
||||||
add $16, %rsp
|
add $16, %rsp
|
||||||
|
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* Swap GS register values if we are returning to user mode */
|
||||||
|
testb $0x3, 8(%rsp)
|
||||||
|
jz 1f
|
||||||
|
cli
|
||||||
|
swapgs
|
||||||
|
1:
|
||||||
|
#endif /* CONFIG_USERSPACE */
|
||||||
|
|
||||||
iretq
|
iretq
|
||||||
|
|
||||||
EXCEPT ( 0); EXCEPT ( 1); EXCEPT ( 2); EXCEPT ( 3)
|
EXCEPT ( 0); EXCEPT ( 1); EXCEPT ( 2); EXCEPT ( 3)
|
||||||
|
@ -356,6 +421,13 @@ EXCEPT(Z_X86_OOPS_VECTOR);
|
||||||
.globl x86_irq_args /* .. for these definitions */
|
.globl x86_irq_args /* .. for these definitions */
|
||||||
|
|
||||||
irq:
|
irq:
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* Swap GS register values if we came in from user mode */
|
||||||
|
testb $0x3, 16(%rsp)
|
||||||
|
jz 1f
|
||||||
|
swapgs
|
||||||
|
1:
|
||||||
|
#endif /* CONFIG_USERSPACE */
|
||||||
pushq %rsi
|
pushq %rsi
|
||||||
movq %gs:__x86_tss64_t_cpu_OFFSET, %rsi
|
movq %gs:__x86_tss64_t_cpu_OFFSET, %rsi
|
||||||
|
|
||||||
|
@ -422,12 +494,18 @@ irq_enter_unnested: /* Not nested: dump state to thread struct for __resume */
|
||||||
popq %rcx /* vector number */
|
popq %rcx /* vector number */
|
||||||
popq %rax /* RIP */
|
popq %rax /* RIP */
|
||||||
movq %rax, _thread_offset_to_rip(%rsi)
|
movq %rax, _thread_offset_to_rip(%rsi)
|
||||||
popq %rax /* CS: discard */
|
popq %rax /* CS */
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
movq %rax, _thread_offset_to_cs(%rsi)
|
||||||
|
#endif
|
||||||
popq %rax /* RFLAGS */
|
popq %rax /* RFLAGS */
|
||||||
movq %rax, _thread_offset_to_rflags(%rsi)
|
movq %rax, _thread_offset_to_rflags(%rsi)
|
||||||
popq %rax /* RSP */
|
popq %rax /* RSP */
|
||||||
movq %rax, _thread_offset_to_rsp(%rsi)
|
movq %rax, _thread_offset_to_rsp(%rsi)
|
||||||
popq %rax /* SS: discard */
|
popq %rax /* SS */
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
movq %rax, _thread_offset_to_ss(%rsi)
|
||||||
|
#endif
|
||||||
|
|
||||||
irq_dispatch:
|
irq_dispatch:
|
||||||
movq x86_irq_funcs(,%rcx,8), %rbx
|
movq x86_irq_funcs(,%rcx,8), %rbx
|
||||||
|
|
|
@ -15,27 +15,36 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
||||||
void *parameter1, void *parameter2, void *parameter3,
|
void *parameter1, void *parameter2, void *parameter3,
|
||||||
int priority, unsigned int options)
|
int priority, unsigned int options)
|
||||||
{
|
{
|
||||||
#if defined(CONFIG_X86_USERSPACE) || defined(CONFIG_X86_STACK_PROTECTION)
|
void *switch_entry;
|
||||||
struct z_x86_thread_stack_header *header =
|
|
||||||
(struct z_x86_thread_stack_header *)stack;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Z_ASSERT_VALID_PRIO(priority, entry);
|
Z_ASSERT_VALID_PRIO(priority, entry);
|
||||||
z_new_thread_init(thread, Z_THREAD_STACK_BUFFER(stack),
|
z_new_thread_init(thread, Z_THREAD_STACK_BUFFER(stack),
|
||||||
stack_size, priority, options);
|
stack_size, priority, options);
|
||||||
|
|
||||||
#if CONFIG_X86_STACK_PROTECTION
|
#if CONFIG_X86_STACK_PROTECTION
|
||||||
|
struct z_x86_thread_stack_header *header =
|
||||||
|
(struct z_x86_thread_stack_header *)stack;
|
||||||
|
|
||||||
/* Set guard area to read-only to catch stack overflows */
|
/* Set guard area to read-only to catch stack overflows */
|
||||||
z_x86_mmu_set_flags(&z_x86_kernel_ptables, &header->guard_page,
|
z_x86_mmu_set_flags(&z_x86_kernel_ptables, &header->guard_page,
|
||||||
MMU_PAGE_SIZE, MMU_ENTRY_READ, Z_X86_MMU_RW,
|
MMU_PAGE_SIZE, MMU_ENTRY_READ, Z_X86_MMU_RW,
|
||||||
true);
|
true);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
switch_entry = z_x86_userspace_prepare_thread(thread);
|
||||||
|
thread->arch.cs = X86_KERNEL_CS;
|
||||||
|
thread->arch.ss = X86_KERNEL_DS;
|
||||||
|
#else
|
||||||
|
switch_entry = z_thread_entry;
|
||||||
|
#endif
|
||||||
thread->callee_saved.rsp = (long) Z_THREAD_STACK_BUFFER(stack);
|
thread->callee_saved.rsp = (long) Z_THREAD_STACK_BUFFER(stack);
|
||||||
thread->callee_saved.rsp += (stack_size - 8); /* fake RIP for ABI */
|
thread->callee_saved.rsp += (stack_size - 8); /* fake RIP for ABI */
|
||||||
thread->callee_saved.rip = (long) z_thread_entry;
|
thread->callee_saved.rip = (long) switch_entry;
|
||||||
thread->callee_saved.rflags = EFLAGS_INITIAL;
|
thread->callee_saved.rflags = EFLAGS_INITIAL;
|
||||||
|
|
||||||
|
/* Parameters to entry point, which is populated in
|
||||||
|
* thread->callee_saved.rip
|
||||||
|
*/
|
||||||
thread->arch.rdi = (long) entry;
|
thread->arch.rdi = (long) entry;
|
||||||
thread->arch.rsi = (long) parameter1;
|
thread->arch.rsi = (long) parameter1;
|
||||||
thread->arch.rdx = (long) parameter2;
|
thread->arch.rdx = (long) parameter2;
|
||||||
|
|
234
arch/x86/core/intel64/userspace.S
Normal file
234
arch/x86/core/intel64/userspace.S
Normal file
|
@ -0,0 +1,234 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2017 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <arch/cpu.h>
|
||||||
|
#include <offsets_short.h>
|
||||||
|
#include <syscall.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* Landing site for 'syscall' instruction
|
||||||
|
*
|
||||||
|
* Call id is in RAX
|
||||||
|
* Arguments are in RDI, RSI, RDX, R10, R8, R9
|
||||||
|
* Return address stored by CPU in RCX
|
||||||
|
* User RFLAGS store by CPU in R11
|
||||||
|
* Current RFLAGS has been masked with ~X86_FMASK_MSR
|
||||||
|
*/
|
||||||
|
.global z_x86_syscall_entry_stub
|
||||||
|
z_x86_syscall_entry_stub:
|
||||||
|
swapgs
|
||||||
|
|
||||||
|
/* Switch to the privilege mode stack pointer stored in
|
||||||
|
* x86_tss64.psp and store the user mode stack pointer in
|
||||||
|
* x86_tss64.usp, immediately pushing it once the stack switch
|
||||||
|
* is done since this is a per-cpu and not per-thread area.
|
||||||
|
*
|
||||||
|
* This dance is necessary as upon entry we have no free registers
|
||||||
|
* nor a stack we can push to.
|
||||||
|
*/
|
||||||
|
movq %rsp, %gs:__x86_tss64_t_usp_OFFSET
|
||||||
|
movq %gs:__x86_tss64_t_psp_OFFSET, %rsp
|
||||||
|
pushq %gs:__x86_tss64_t_usp_OFFSET
|
||||||
|
|
||||||
|
sti /* re-enable interrupts */
|
||||||
|
|
||||||
|
/* call_id is in RAX. bounds-check it, must be less than
|
||||||
|
* K_SYSCALL_LIMIT.
|
||||||
|
*/
|
||||||
|
cmp $K_SYSCALL_LIMIT, %rax
|
||||||
|
jae _bad_syscall
|
||||||
|
|
||||||
|
_id_ok:
|
||||||
|
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
|
||||||
|
/* Prevent speculation with bogus system call IDs */
|
||||||
|
lfence
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Remaining registers not involved in the syscall operation are
|
||||||
|
* RBX, RBP, R12-R15, plus floating point / SIMD registers.
|
||||||
|
*
|
||||||
|
* We save caller-saved registers so we can restore to original values
|
||||||
|
* when we call 'sysretq' at the end.
|
||||||
|
*/
|
||||||
|
pushq %rdi
|
||||||
|
subq $X86_FXSAVE_SIZE, %rsp
|
||||||
|
fxsave (%rsp)
|
||||||
|
pushq %rsi
|
||||||
|
pushq %rdx
|
||||||
|
pushq %r8
|
||||||
|
pushq %r9
|
||||||
|
pushq %r10
|
||||||
|
pushq %r11 /* RFLAGS */
|
||||||
|
pushq %rcx /* Return address stored by 'syscall' */
|
||||||
|
pushq %rsp /* SSF parameter */
|
||||||
|
|
||||||
|
/* All other args are in the right registers, except arg4 which
|
||||||
|
* we had to put in r10 instead of RCX
|
||||||
|
*/
|
||||||
|
movq %r10, %rcx
|
||||||
|
|
||||||
|
/* from the call ID in RAX, load R10 with the actual function pointer
|
||||||
|
* to call by looking it up in the system call dispatch table
|
||||||
|
*/
|
||||||
|
xorq %r11, %r11
|
||||||
|
movq _k_syscall_table(%r11, %rax, 8), %r10
|
||||||
|
|
||||||
|
/* Run the marshal function, which is some entry in _k_syscall_table */
|
||||||
|
call *%r10
|
||||||
|
|
||||||
|
/* RAX now contains the return value
|
||||||
|
*
|
||||||
|
* Callee-saved registers are un-touched from original values per C
|
||||||
|
* calling convention, but sensitive data may lurk in caller-saved regs
|
||||||
|
* RDI, RSI, RDX, R8, R9, R10, XMM* after we have serviced the system
|
||||||
|
* call. We saved them earlier, restore their original values when
|
||||||
|
* the syscall was made. This also preserves these registers if they
|
||||||
|
* were not used as arguments.
|
||||||
|
*
|
||||||
|
* We also can't have RCX and R11 clobbered as we need the original
|
||||||
|
* values to successfully 'sysretq'.
|
||||||
|
*/
|
||||||
|
addq $8, %rsp /* Discard ssf */
|
||||||
|
popq %rcx /* Restore return address for 'sysretq' */
|
||||||
|
popq %r11 /* Restore RFLAGS for 'sysretq' */
|
||||||
|
popq %r10
|
||||||
|
popq %r9
|
||||||
|
popq %r8
|
||||||
|
popq %rdx
|
||||||
|
popq %rsi
|
||||||
|
fxrstor (%rsp)
|
||||||
|
addq $X86_FXSAVE_SIZE, %rsp
|
||||||
|
popq %rdi
|
||||||
|
|
||||||
|
/* Restore user stack pointer */
|
||||||
|
popq %rsp
|
||||||
|
|
||||||
|
/* Return to user mode, locking interrupts as the normal interrupt
|
||||||
|
* handling path will get very confused if it occurs between
|
||||||
|
* 'swapgs' and 'sysretq'
|
||||||
|
*/
|
||||||
|
cli
|
||||||
|
swapgs
|
||||||
|
sysretq
|
||||||
|
|
||||||
|
_bad_syscall:
|
||||||
|
/* RAX had a bogus syscall value in it, replace with the bad syscall
|
||||||
|
* handler's ID, and put the bad ID as its first argument.
|
||||||
|
*
|
||||||
|
* TODO: On this and all other arches, simply immediately return
|
||||||
|
* with -ENOSYS, once all syscalls have a return value
|
||||||
|
*/
|
||||||
|
movq %rax, %rdi
|
||||||
|
movq $K_SYSCALL_BAD, %rax
|
||||||
|
jmp _id_ok
|
||||||
|
|
||||||
|
/*
|
||||||
|
* size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg)
|
||||||
|
* ^ RDI ^ RSI ^ RDX
|
||||||
|
*/
|
||||||
|
.global arch_user_string_nlen
|
||||||
|
arch_user_string_nlen:
|
||||||
|
/* Initial error value, strlen_done adjusts this if we succeed */
|
||||||
|
movl $-1, %r8d
|
||||||
|
|
||||||
|
/* use RAX as our length count (this function's return value) */
|
||||||
|
xor %rax, %rax
|
||||||
|
|
||||||
|
/* This code might page fault */
|
||||||
|
strlen_loop:
|
||||||
|
.global z_x86_user_string_nlen_fault_start
|
||||||
|
z_x86_user_string_nlen_fault_start:
|
||||||
|
cmpb $0x0, (%rdi, %rax, 1) /* *(RDI + RAX) == 0? Could fault. */
|
||||||
|
|
||||||
|
.global z_x86_user_string_nlen_fault_end
|
||||||
|
z_x86_user_string_nlen_fault_end:
|
||||||
|
je strlen_done
|
||||||
|
cmp %rsi, %rax /* Max length reached? */
|
||||||
|
je strlen_done
|
||||||
|
inc %rax /* EAX++ and loop again */
|
||||||
|
jmp strlen_loop
|
||||||
|
|
||||||
|
strlen_done:
|
||||||
|
/* Set error value to 0 since we succeeded */
|
||||||
|
xorl %r8d, %r8d
|
||||||
|
|
||||||
|
.global z_x86_user_string_nlen_fixup
|
||||||
|
z_x86_user_string_nlen_fixup:
|
||||||
|
/* Write error value to 32-bit integer err pointer parameter */
|
||||||
|
movl %r8d, (%rdx)
|
||||||
|
retq
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trampoline function to put the p3 parameter in the register expected
|
||||||
|
* by the calling convention, we couldn't use RCX when we called 'sysret'
|
||||||
|
*/
|
||||||
|
z_x86_userspace_landing_site:
|
||||||
|
/* Place argument 4 in the correct position */
|
||||||
|
movq %r10, %rcx
|
||||||
|
call z_thread_entry
|
||||||
|
|
||||||
|
/* FUNC_NORETURN void z_x86_userspace_enter(
|
||||||
|
* k_thread_entry_t user_entry, <- RDI
|
||||||
|
* void *p1, void *p2, void *p3, <- RSI, RDX, RCX
|
||||||
|
* uintptr_t stack_end, <- R8
|
||||||
|
* uintptr_t stack_start) <- R9
|
||||||
|
*
|
||||||
|
* A one-way trip to userspace.
|
||||||
|
*/
|
||||||
|
.global z_x86_userspace_enter
|
||||||
|
z_x86_userspace_enter:
|
||||||
|
/* RCX is sysret return address, pass along p3 in r10,
|
||||||
|
* z_x86_userspace_landing_site will fix this up
|
||||||
|
*/
|
||||||
|
movq %rcx, %r10
|
||||||
|
|
||||||
|
/* switch to privilege mode stack so we can erase thread stack buffer,
|
||||||
|
* the buffer is the page immediately before the thread stack
|
||||||
|
*/
|
||||||
|
movq %r9, %rsp
|
||||||
|
|
||||||
|
/* Need RDI temporarily */
|
||||||
|
pushq %rdi
|
||||||
|
|
||||||
|
/* Compute size of user stack in 8-byte chunks and put in RCX */
|
||||||
|
movq %r9, %rdi /* Start address for rep stosq in RDI */
|
||||||
|
movq %r8, %rcx /* Ending address */
|
||||||
|
subq %rdi, %rcx /* Subtract starting address */
|
||||||
|
shrq $3, %rcx /* Divide by 8 */
|
||||||
|
|
||||||
|
movq $0xAAAAAAAAAAAAAAAA, %rax /* Fill value */
|
||||||
|
/* Copy 8 bytes of memory at a time, starting at ES:RDI, with whatever
|
||||||
|
* is in RAX. Repeat this RCX times. Stack sizes are always at least
|
||||||
|
* 8-byte aligned.
|
||||||
|
*/
|
||||||
|
cld
|
||||||
|
rep stosq
|
||||||
|
|
||||||
|
popq %rdi
|
||||||
|
|
||||||
|
/* Reset to the beginning of the user stack */
|
||||||
|
movq %r8, %rsp
|
||||||
|
|
||||||
|
/* set sysret entry point */
|
||||||
|
movq $z_x86_userspace_landing_site, %rcx
|
||||||
|
|
||||||
|
/* Copy RFLAGS into r11, required by sysret */
|
||||||
|
pushfq
|
||||||
|
movq (%rsp), %r11
|
||||||
|
movq $0, (%rsp) /* Now a debugger-friendly return address */
|
||||||
|
|
||||||
|
/* cleanse other registers */
|
||||||
|
xorq %rax, %rax
|
||||||
|
xorq %rbx, %rbx
|
||||||
|
xorq %rbp, %rbp
|
||||||
|
xorq %r12, %r12
|
||||||
|
xorq %r13, %r13
|
||||||
|
xorq %r14, %r14
|
||||||
|
xorq %r15, %r15
|
||||||
|
|
||||||
|
cli
|
||||||
|
swapgs
|
||||||
|
sysretq
|
|
@ -23,9 +23,17 @@ GEN_OFFSET_SYM(_thread_arch_t, r9);
|
||||||
GEN_OFFSET_SYM(_thread_arch_t, r10);
|
GEN_OFFSET_SYM(_thread_arch_t, r10);
|
||||||
GEN_OFFSET_SYM(_thread_arch_t, r11);
|
GEN_OFFSET_SYM(_thread_arch_t, r11);
|
||||||
GEN_OFFSET_SYM(_thread_arch_t, sse);
|
GEN_OFFSET_SYM(_thread_arch_t, sse);
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
GEN_OFFSET_SYM(_thread_arch_t, ss);
|
||||||
|
GEN_OFFSET_SYM(_thread_arch_t, cs);
|
||||||
|
#endif
|
||||||
|
|
||||||
GEN_OFFSET_SYM(x86_tss64_t, ist1);
|
GEN_OFFSET_SYM(x86_tss64_t, ist1);
|
||||||
GEN_OFFSET_SYM(x86_tss64_t, cpu);
|
GEN_OFFSET_SYM(x86_tss64_t, cpu);
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
GEN_OFFSET_SYM(x86_tss64_t, psp);
|
||||||
|
GEN_OFFSET_SYM(x86_tss64_t, usp);
|
||||||
|
#endif
|
||||||
GEN_ABSOLUTE_SYM(__X86_TSS64_SIZEOF, sizeof(x86_tss64_t));
|
GEN_ABSOLUTE_SYM(__X86_TSS64_SIZEOF, sizeof(x86_tss64_t));
|
||||||
|
|
||||||
GEN_OFFSET_SYM(x86_cpuboot_t, ready);
|
GEN_OFFSET_SYM(x86_cpuboot_t, ready);
|
||||||
|
|
|
@ -65,4 +65,10 @@
|
||||||
#define _thread_offset_to_sse \
|
#define _thread_offset_to_sse \
|
||||||
(___thread_t_arch_OFFSET + ___thread_arch_t_sse_OFFSET)
|
(___thread_t_arch_OFFSET + ___thread_arch_t_sse_OFFSET)
|
||||||
|
|
||||||
|
#define _thread_offset_to_ss \
|
||||||
|
(___thread_t_arch_OFFSET + ___thread_arch_t_ss_OFFSET)
|
||||||
|
|
||||||
|
#define _thread_offset_to_cs \
|
||||||
|
(___thread_t_arch_OFFSET + ___thread_arch_t_cs_OFFSET)
|
||||||
|
|
||||||
#endif /* ZEPHYR_ARCH_X86_INCLUDE_INTEL64_OFFSETS_SHORT_ARCH_H_ */
|
#endif /* ZEPHYR_ARCH_X86_INCLUDE_INTEL64_OFFSETS_SHORT_ARCH_H_ */
|
||||||
|
|
|
@ -59,6 +59,19 @@ struct x86_esf {
|
||||||
|
|
||||||
typedef struct x86_esf z_arch_esf_t;
|
typedef struct x86_esf z_arch_esf_t;
|
||||||
|
|
||||||
|
struct x86_ssf {
|
||||||
|
unsigned long rip;
|
||||||
|
unsigned long rflags;
|
||||||
|
unsigned long r10;
|
||||||
|
unsigned long r9;
|
||||||
|
unsigned long r8;
|
||||||
|
unsigned long rdx;
|
||||||
|
unsigned long rsi;
|
||||||
|
char fxsave[X86_FXSAVE_SIZE];
|
||||||
|
unsigned long rdi;
|
||||||
|
unsigned long rsp;
|
||||||
|
};
|
||||||
|
|
||||||
#define ARCH_EXCEPT(reason_p) do { \
|
#define ARCH_EXCEPT(reason_p) do { \
|
||||||
__asm__ volatile( \
|
__asm__ volatile( \
|
||||||
"movq %[reason], %%rax\n\t" \
|
"movq %[reason], %%rax\n\t" \
|
||||||
|
|
|
@ -20,6 +20,11 @@
|
||||||
#define X86_USER_DS 0x30 /* 64-bit user mode data */
|
#define X86_USER_DS 0x30 /* 64-bit user mode data */
|
||||||
#define X86_USER_CS 0x38 /* 64-bit user mode code */
|
#define X86_USER_CS 0x38 /* 64-bit user mode code */
|
||||||
|
|
||||||
|
/* Value programmed into bits 63:32 of STAR MSR with proper segment
|
||||||
|
* descriptors for implementing user mode with syscall/sysret
|
||||||
|
*/
|
||||||
|
#define X86_STAR_UPPER ((X86_USER_CS_32 << 16) | X86_KERNEL_CS)
|
||||||
|
|
||||||
#define X86_KERNEL_CPU0_TR 0x40 /* 64-bit task state segment */
|
#define X86_KERNEL_CPU0_TR 0x40 /* 64-bit task state segment */
|
||||||
#define X86_KERNEL_CPU1_TR 0x50 /* 64-bit task state segment */
|
#define X86_KERNEL_CPU1_TR 0x50 /* 64-bit task state segment */
|
||||||
#define X86_KERNEL_CPU2_TR 0x60 /* 64-bit task state segment */
|
#define X86_KERNEL_CPU2_TR 0x60 /* 64-bit task state segment */
|
||||||
|
@ -73,6 +78,13 @@ struct x86_tss64 {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct _cpu *cpu;
|
struct _cpu *cpu;
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* Privilege mode stack pointer value when doing a system call */
|
||||||
|
char *psp;
|
||||||
|
|
||||||
|
/* Storage area for user mode stack pointer when doing a syscall */
|
||||||
|
char *usp;
|
||||||
|
#endif
|
||||||
} __packed __aligned(8);
|
} __packed __aligned(8);
|
||||||
|
|
||||||
typedef struct x86_tss64 x86_tss64_t;
|
typedef struct x86_tss64 x86_tss64_t;
|
||||||
|
@ -101,6 +113,23 @@ typedef struct _callee_saved _callee_saved_t;
|
||||||
struct _thread_arch {
|
struct _thread_arch {
|
||||||
u8_t flags;
|
u8_t flags;
|
||||||
|
|
||||||
|
#ifdef CONFIG_USERSPACE
|
||||||
|
/* Pointer to page tables used by this thread. Supervisor threads
|
||||||
|
* always use the kernel's page table, user thread use per-thread
|
||||||
|
* tables stored in the stack object
|
||||||
|
*/
|
||||||
|
struct x86_page_tables *ptables;
|
||||||
|
|
||||||
|
/* Initial privilege mode stack pointer when doing a system call.
|
||||||
|
* Un-set for supervisor threads.
|
||||||
|
*/
|
||||||
|
char *psp;
|
||||||
|
|
||||||
|
/* SS and CS selectors for this thread when restoring context */
|
||||||
|
u64_t ss;
|
||||||
|
u64_t cs;
|
||||||
|
#endif
|
||||||
|
|
||||||
u64_t rax;
|
u64_t rax;
|
||||||
u64_t rcx;
|
u64_t rcx;
|
||||||
u64_t rdx;
|
u64_t rdx;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue