arch/x86: add SSE floating-point to Intel64 subarch
This is a naive implementation which does "eager" context switching for floating-point context, which, of course, introduces performance concerns. Other approaches have security concerns, SMP implications, and impact the x86 arch and Zephyr project as a whole. Discussion is needed, so punting with the straightforward solution for now. Signed-off-by: Charles E. Youse <charles.youse@intel.com>
This commit is contained in:
parent
2e788040d8
commit
a5eea17dda
7 changed files with 67 additions and 3 deletions
|
@ -53,11 +53,12 @@ __start:
|
|||
|
||||
movl $(exception_stack + CONFIG_EXCEPTION_STACK_SIZE), %esp
|
||||
|
||||
/* transition to long mode, by the book. */
|
||||
/* transition to long mode. along the way, we enable SSE. */
|
||||
|
||||
movl %cr4, %eax /* enable PAE */
|
||||
orl $CR4_PAE, %eax
|
||||
movl %cr4, %eax /* enable PAE and SSE */
|
||||
orl $(CR4_PAE | CR4_OSFXSR), %eax
|
||||
movl %eax, %cr4
|
||||
clts
|
||||
|
||||
movl $pml4, %eax /* load page base */
|
||||
movl %eax, %cr3
|
||||
|
@ -105,12 +106,38 @@ __start:
|
|||
1:
|
||||
#endif /* CONFIG_X86_MULTIBOOT_INFO */
|
||||
|
||||
/*
|
||||
* set up SSE in case something uses the floating-point unit during
|
||||
* early initialization (either directly, or if GCC gets clever)
|
||||
*/
|
||||
|
||||
xorl %edi, %edi
|
||||
call x86_sse_init
|
||||
|
||||
/* don't replace CALL with JMP; honor the ABI stack alignment! */
|
||||
|
||||
call z_cstart
|
||||
|
||||
stop: jmp stop
|
||||
|
||||
/*
|
||||
* void x86_sse_init(struct k_thread *thread);
|
||||
*
|
||||
* Initialize floating-point state to something sane. If 'thread' is
|
||||
* not NULL, then the resulting FP state is saved to thread->arch.sse.
|
||||
*/
|
||||
|
||||
.global x86_sse_init
|
||||
x86_sse_init:
|
||||
fninit
|
||||
ldmxcsr mxcsr
|
||||
testq %rdi, %rdi
|
||||
jz 1f
|
||||
fxsave _thread_offset_to_sse(%rdi)
|
||||
1: retq
|
||||
|
||||
mxcsr: .long X86_MXCSR_SANE
|
||||
|
||||
/*
|
||||
* FIXME: The multiboot header is identical (for obvious reasons) to the
|
||||
* version in ia32/crt0.S. They should be refactored into a common file.
|
||||
|
@ -188,6 +215,7 @@ __resume:
|
|||
testb $_THREAD_SWAPPED, _thread_offset_to_thread_state(%rsi)
|
||||
jnz 1f
|
||||
|
||||
fxrstor _thread_offset_to_sse(%rsi)
|
||||
movq _thread_offset_to_rcx(%rsi), %rcx
|
||||
movq _thread_offset_to_rdx(%rsi), %rdx
|
||||
movq _thread_offset_to_rdi(%rsi), %rdi
|
||||
|
@ -403,6 +431,18 @@ irq:
|
|||
1: cmpl $1, _kernel_offset_to_nested(%rsi)
|
||||
je irq_enter_unnested
|
||||
|
||||
/*
|
||||
* if we're a nested interrupt, we have to dump the state to the
|
||||
* stack. we play some games here to re-arrange the stack thusly:
|
||||
*
|
||||
* SS RSP RFLAGS CS RIP RAX RSI RCX RDX RBX
|
||||
* RDI RBP R8 R9 R10 R11 R12 R13 R14 R15
|
||||
* X86_FXSAVE_SIZE bytes of SSE data <-- RSP points here
|
||||
*
|
||||
* note that the final value of RSP must be 16-byte aligned here,
|
||||
* both to satisfy FXSAVE/FXRSTOR but also to honor the C ABI.
|
||||
*/
|
||||
|
||||
irq_enter_nested: /* Nested IRQ: dump register state to stack. */
|
||||
pushq %rcx
|
||||
movq 16(%rsp), %rcx /* RCX = vector */
|
||||
|
@ -419,11 +459,14 @@ irq_enter_nested: /* Nested IRQ: dump register state to stack. */
|
|||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
subq $X86_FXSAVE_SIZE, %rsp
|
||||
fxsave (%rsp)
|
||||
jmp irq_dispatch
|
||||
|
||||
irq_enter_unnested: /* Not nested: dump state to thread struct for __resume */
|
||||
movq _kernel_offset_to_current(%rsi), %rsi
|
||||
andb $(~_THREAD_SWAPPED), _thread_offset_to_thread_state(%rsi)
|
||||
fxsave _thread_offset_to_sse(%rsi)
|
||||
movq %rbx, _thread_offset_to_rbx(%rsi)
|
||||
movq %rbp, _thread_offset_to_rbp(%rsi)
|
||||
movq %r12, _thread_offset_to_r12(%rsi)
|
||||
|
@ -469,6 +512,8 @@ irq_dispatch:
|
|||
jz __resume /* not nested, just __resume (might change threads) */
|
||||
|
||||
irq_exit_nested:
|
||||
fxrstor (%rsp)
|
||||
addq $X86_FXSAVE_SIZE, %rsp
|
||||
popq %r15
|
||||
popq %r14
|
||||
popq %r13
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include <kernel_structs.h>
|
||||
#include <kernel_internal.h>
|
||||
|
||||
extern void x86_sse_init(struct k_thread *); /* in locore.S */
|
||||
|
||||
void z_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
||||
size_t stack_size, k_thread_entry_t entry,
|
||||
void *parameter1, void *parameter2, void *parameter3,
|
||||
|
@ -26,4 +28,6 @@ void z_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
|||
thread->arch.rsi = (long) parameter1;
|
||||
thread->arch.rdx = (long) parameter2;
|
||||
thread->arch.rcx = (long) parameter3;
|
||||
|
||||
x86_sse_init(thread);
|
||||
}
|
||||
|
|
|
@ -26,3 +26,4 @@ GEN_OFFSET_SYM(_thread_arch_t, r8);
|
|||
GEN_OFFSET_SYM(_thread_arch_t, r9);
|
||||
GEN_OFFSET_SYM(_thread_arch_t, r10);
|
||||
GEN_OFFSET_SYM(_thread_arch_t, r11);
|
||||
GEN_OFFSET_SYM(_thread_arch_t, sse);
|
||||
|
|
|
@ -6,6 +6,14 @@
|
|||
#ifndef ZEPHYR_ARCH_X86_INCLUDE_INTEL64_KERNEL_ARCH_DATA_H_
|
||||
#define ZEPHYR_ARCH_X86_INCLUDE_INTEL64_KERNEL_ARCH_DATA_H_
|
||||
|
||||
/*
|
||||
* Some SSE definitions. Ideally these will ultimately be shared with 32-bit.
|
||||
*/
|
||||
|
||||
#define X86_FXSAVE_SIZE 512 /* size and alignment of buffer ... */
|
||||
#define X86_FXSAVE_ALIGN 16 /* ... for FXSAVE/FXRSTOR ops */
|
||||
#define X86_MXCSR_SANE 0x1dc0 /* enable division-by-zero exception */
|
||||
|
||||
/*
|
||||
* A flag for k_thread.thread_state to tell __resume that the thread
|
||||
* voluntarily switched itself out, so only a portion of the register
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#ifndef _ASMLANGUAGE
|
||||
|
||||
#include <zephyr/types.h>
|
||||
#include <kernel_arch_data.h>
|
||||
|
||||
/*
|
||||
* The _callee_saved registers are unconditionally saved/restored across
|
||||
|
@ -41,6 +42,7 @@ struct _thread_arch {
|
|||
u64_t r9;
|
||||
u64_t r10;
|
||||
u64_t r11;
|
||||
char __aligned(X86_FXSAVE_ALIGN) sse[X86_FXSAVE_SIZE];
|
||||
};
|
||||
|
||||
typedef struct _thread_arch _thread_arch_t;
|
||||
|
|
|
@ -62,4 +62,7 @@
|
|||
#define _thread_offset_to_r11 \
|
||||
(___thread_t_arch_OFFSET + ___thread_arch_t_r11_OFFSET)
|
||||
|
||||
#define _thread_offset_to_sse \
|
||||
(___thread_t_arch_OFFSET + ___thread_arch_t_sse_OFFSET)
|
||||
|
||||
#endif /* ZEPHYR_ARCH_X86_INCLUDE_INTEL64_OFFSETS_SHORT_ARCH_H_ */
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#define CR0_PG 0x80000000 /* enable paging */
|
||||
#define CR0_WP 0x00010000 /* honor W bit even when supervisor */
|
||||
#define CR4_PAE 0x00000020 /* enable PAE */
|
||||
#define CR4_OSFXSR 0x00000200 /* enable SSE (OS FXSAVE/RSTOR) */
|
||||
|
||||
#ifdef CONFIG_X86_LONGMODE
|
||||
#include <intel64/kernel_arch_data.h>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue