/* * Copyright (c) 2010-2015 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 */ /** * @file * @brief Kernel swapper code for IA-32 * * This module implements the arch_swap() routine for the IA-32 architecture. */ #include #include #include #include #include /* exports (internal APIs) */ GTEXT(arch_swap) GTEXT(z_x86_thread_entry_wrapper) GTEXT(_x86_user_thread_entry_wrapper) /* externs */ #if !defined(CONFIG_X86_KPTI) && defined(CONFIG_X86_USERSPACE) GTEXT(z_x86_swap_update_page_tables) #endif GDATA(_k_neg_eagain) /* * Given that arch_swap() is called to effect a cooperative context switch, * only the non-volatile integer registers need to be saved in the TCS of the * outgoing thread. The restoration of the integer registers of the incoming * thread depends on whether that thread was preemptively context switched out. * The X86_THREAD_FLAG_INT and _EXC bits in the k_thread->arch.flags field will * signify that the thread was preemptively context switched out, and thus both * the volatile and non-volatile integer registers need to be restored. * * The non-volatile registers need to be scrubbed to ensure they contain no * sensitive information that could compromise system security. This is to * make sure that information will not be leaked from one application to * another via these volatile registers. * * Here, the integer registers (EAX, ECX, EDX) have been scrubbed. Any changes * to this routine that alter the values of these registers MUST be reviewed * for potential security impacts. * * Floating point registers are handled using a lazy save/restore mechanism * since it's expected relatively few threads will be created with the * K_FP_REGS or K_SSE_REGS option bits. The kernel data structure maintains a * 'current_fp' field to keep track of the thread that "owns" the floating * point registers. Floating point registers consist of ST0->ST7 (x87 FPU and * MMX registers) and XMM0 -> XMM7. * * All floating point registers are considered 'volatile' thus they will only * be saved/restored when a preemptive context switch occurs. * * Floating point registers are currently NOT scrubbed, and are subject to * potential security leaks. * * C function prototype: * * unsigned int arch_swap (unsigned int eflags); */ SECTION_FUNC(PINNED_TEXT, arch_swap) #if defined(CONFIG_INSTRUMENT_THREAD_SWITCHING) pushl %eax call z_thread_mark_switched_out popl %eax #endif /* * Push all non-volatile registers onto the stack; do not copy * any of these registers into the k_thread. Only the 'esp' register * after all the pushes have been performed) will be stored in the * k_thread. */ pushl %edi movl $_kernel, %edi pushl %esi pushl %ebx pushl %ebp /* * Carve space for the return value. Setting it to a default of * -EAGAIN eliminates the need for the timeout code to set it. * If another value is ever needed, it can be modified with * arch_thread_return_value_set(). */ pushl _k_neg_eagain /* save esp into k_thread structure */ movl _kernel_offset_to_current(%edi), %edx movl %esp, _thread_offset_to_esp(%edx) movl _kernel_offset_to_ready_q_cache(%edi), %eax /* * At this point, the %eax register contains the 'k_thread *' of the * thread to be swapped in, and %edi still contains &_kernel. %edx * has the pointer to the outgoing thread. */ #if defined(CONFIG_X86_USERSPACE) && !defined(CONFIG_X86_KPTI) push %eax call z_x86_swap_update_page_tables pop %eax /* Page tables updated. All memory access after this point needs to be * to memory that has the same mappings and access attributes wrt * supervisor mode! */ #endif #ifdef CONFIG_EAGER_FPU_SHARING /* Eager floating point state restore logic * * Addresses CVE-2018-3665 * Used as an alternate to CONFIG_LAZY_FPU_SHARING if there is any * sensitive data in the floating point/SIMD registers in a system * with untrusted threads. * * Unconditionally save/restore floating point registers on context * switch. */ /* Save outgpoing thread context */ #ifdef CONFIG_X86_SSE fxsave _thread_offset_to_preempFloatReg(%edx) fninit #else fnsave _thread_offset_to_preempFloatReg(%edx) #endif /* Restore incoming thread context */ #ifdef CONFIG_X86_SSE fxrstor _thread_offset_to_preempFloatReg(%eax) #else frstor _thread_offset_to_preempFloatReg(%eax) #endif /* CONFIG_X86_SSE */ #elif defined(CONFIG_LAZY_FPU_SHARING) /* * Clear the CR0[TS] bit (in the event the current thread * doesn't have floating point enabled) to prevent the "device not * available" exception when executing the subsequent fxsave/fnsave * and/or fxrstor/frstor instructions. * * Indeed, it's possible that none of the aforementioned instructions * need to be executed, for example, the incoming thread doesn't * utilize floating point operations. However, the code responsible * for setting the CR0[TS] bit appropriately for the incoming thread * (just after the 'restoreContext_NoFloatSwap' label) will leverage * the fact that the following 'clts' was performed already. */ clts /* * Determine whether the incoming thread utilizes floating point regs * _and_ whether the thread was context switched out preemptively. */ testb $_FP_USER_MASK, _thread_offset_to_user_options(%eax) je restoreContext_NoFloatSwap /* * The incoming thread uses floating point registers: * Was it the last thread to use floating point registers? * If so, there there is no need to restore the floating point context. */ movl _kernel_offset_to_current_fp(%edi), %ebx cmpl %ebx, %eax je restoreContext_NoFloatSwap /* * The incoming thread uses floating point registers and it was _not_ * the last thread to use those registers: * Check whether the current FP context actually needs to be saved * before swapping in the context of the incoming thread. */ testl %ebx, %ebx jz restoreContext_NoFloatSave /* * The incoming thread uses floating point registers and it was _not_ * the last thread to use those registers _and_ the current FP context * needs to be saved. * * Given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are all * 'volatile', only save the registers if the "current FP context" * was preemptively context switched. */ testb $X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%ebx) je restoreContext_NoFloatSave #ifdef CONFIG_X86_SSE testb $K_SSE_REGS, _thread_offset_to_user_options(%ebx) je x87FloatSave /* * 'fxsave' does NOT perform an implicit 'fninit', therefore issue an * 'fninit' to ensure a "clean" FPU state for the incoming thread * (for the case when the fxrstor is not executed). */ fxsave _thread_offset_to_preempFloatReg(%ebx) fninit jmp floatSaveDone x87FloatSave: #endif /* CONFIG_X86_SSE */ /* 'fnsave' performs an implicit 'fninit' after saving state! */ fnsave _thread_offset_to_preempFloatReg(%ebx) /* fall through to 'floatSaveDone' */ floatSaveDone: restoreContext_NoFloatSave: /********************************************************* * Restore floating point context of the incoming thread. *********************************************************/ /* * Again, given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are * all 'volatile', only restore the registers if the incoming thread * was previously preemptively context switched out. */ testb $X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%eax) je restoreContext_NoFloatRestore #ifdef CONFIG_X86_SSE testb $K_SSE_REGS, _thread_offset_to_user_options(%eax) je x87FloatRestore fxrstor _thread_offset_to_preempFloatReg(%eax) jmp floatRestoreDone x87FloatRestore: #endif /* CONFIG_X86_SSE */ frstor _thread_offset_to_preempFloatReg(%eax) /* fall through to 'floatRestoreDone' */ floatRestoreDone: restoreContext_NoFloatRestore: /* record that the incoming thread "owns" the floating point registers */ movl %eax, _kernel_offset_to_current_fp(%edi) /* * Branch point when none of the floating point registers need to be * swapped because: a) the incoming thread does not use them OR * b) the incoming thread is the last thread that used those registers. */ restoreContext_NoFloatSwap: /* * Leave CR0[TS] clear if incoming thread utilizes the floating point * registers */ testb $_FP_USER_MASK, _thread_offset_to_user_options(%eax) jne CROHandlingDone /* * The incoming thread does NOT currently utilize the floating point * registers, so set CR0[TS] to ensure the "device not available" * exception occurs on the first attempt to access a x87 FPU, MMX, * or XMM register. */ movl %cr0, %edx orl $0x8, %edx movl %edx, %cr0 CROHandlingDone: #endif /* CONFIG_LAZY_FPU_SHARING */ /* update _kernel.current to reflect incoming thread */ movl %eax, _kernel_offset_to_current(%edi) #if defined(CONFIG_X86_USE_THREAD_LOCAL_STORAGE) pushl %eax call z_x86_tls_update_gdt /* Since segment descriptor has changed, need to reload */ movw $GS_TLS_SEG, %ax movw %ax, %gs popl %eax #endif /* recover thread stack pointer from k_thread */ movl _thread_offset_to_esp(%eax), %esp /* load return value from a possible arch_thread_return_value_set() */ popl %eax /* pop the non-volatile registers from the stack */ popl %ebp popl %ebx popl %esi popl %edi /* * %eax may contain one of these values: * * - the return value for arch_swap() that was set up by a call to * arch_thread_return_value_set() * - -EINVAL */ /* Utilize the 'eflags' parameter to arch_swap() */ pushl 4(%esp) popfl #if defined(CONFIG_INSTRUMENT_THREAD_SWITCHING) pushl %eax call z_thread_mark_switched_in popl %eax #endif ret #ifdef _THREAD_WRAPPER_REQUIRED /** * * @brief Adjust stack/parameters before invoking thread entry function * * This function adjusts the initial stack frame created by arch_new_thread() * such that the GDB stack frame unwinders recognize it as the outermost frame * in the thread's stack. * * GDB normally stops unwinding a stack when it detects that it has * reached a function called main(). Kernel threads, however, do not have * a main() function, and there does not appear to be a simple way of stopping * the unwinding of the stack. * * Given the initial thread created by arch_new_thread(), GDB expects to find * a return address on the stack immediately above the thread entry routine * z_thread_entry, in the location occupied by the initial EFLAGS. GDB * attempts to examine the memory at this return address, which typically * results in an invalid access to page 0 of memory. * * This function overwrites the initial EFLAGS with zero. When GDB subsequently * attempts to examine memory at address zero, the PeekPoke driver detects * an invalid access to address zero and returns an error, which causes the * GDB stack unwinder to stop somewhat gracefully. * * The initial EFLAGS cannot be overwritten until after z_swap() has swapped in * the new thread for the first time. This routine is called by z_swap() the * first time that the new thread is swapped in, and it jumps to * z_thread_entry after it has done its work. * * __________________ * | param3 | <------ Top of the stack * |__________________| * | param2 | Stack Grows Down * |__________________| | * | param1 | V * |__________________| * | pEntry | * |__________________| * | initial EFLAGS | <---- ESP when invoked by z_swap() * |__________________| (Zeroed by this routine) * * The address of the thread entry function needs to be in %edi when this is * invoked. It will either be z_thread_entry, or if userspace is enabled, * _arch_drop_to_user_mode if this is a user thread. * * @return this routine does NOT return. */ SECTION_FUNC(PINNED_TEXT, z_x86_thread_entry_wrapper) movl $0, (%esp) jmp *%edi #endif /* _THREAD_WRAPPER_REQUIRED */