x86: properly align initial dummy thread
x86-32 thread objects require special alignment since they contain a buffer that is passed to fxsave/fxrstor instructions. This fell over if the dummy thread is created in a stack frame. Implement a custom swap to main for x86 which still uses a dummy thread, but in an unused part of the interrupt stack with proper alignment. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
468efadd47
commit
d149909b03
3 changed files with 35 additions and 14 deletions
|
@ -38,6 +38,7 @@ config X86
|
||||||
select ARCH_IS_SET
|
select ARCH_IS_SET
|
||||||
select ATOMIC_OPERATIONS_BUILTIN
|
select ATOMIC_OPERATIONS_BUILTIN
|
||||||
select HAS_DTS
|
select HAS_DTS
|
||||||
|
select ARCH_HAS_CUSTOM_SWAP_TO_MAIN if !X86_64
|
||||||
help
|
help
|
||||||
x86 architecture
|
x86 architecture
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <kernel.h>
|
#include <kernel.h>
|
||||||
#include <ksched.h>
|
#include <ksched.h>
|
||||||
#include <arch/x86/mmustructs.h>
|
#include <arch/x86/mmustructs.h>
|
||||||
|
#include <kswap.h>
|
||||||
|
|
||||||
/* forward declaration */
|
/* forward declaration */
|
||||||
|
|
||||||
|
@ -115,3 +116,29 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
||||||
#endif /* CONFIG_LAZY_FPU_SHARING */
|
#endif /* CONFIG_LAZY_FPU_SHARING */
|
||||||
thread->arch.flags = 0;
|
thread->arch.flags = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The core kernel code puts the dummy thread on the stack, which unfortunately
|
||||||
|
* doesn't work for 32-bit x86 as k_thread objects must be aligned due to the
|
||||||
|
* buffer within them fed to fxsave/fxrstor.
|
||||||
|
*
|
||||||
|
* Use some sufficiently aligned bytes in the lower memory of the interrupt
|
||||||
|
* stack instead, otherwise the logic is more or less the same.
|
||||||
|
*/
|
||||||
|
void arch_switch_to_main_thread(struct k_thread *main_thread,
|
||||||
|
k_thread_stack_t *main_stack,
|
||||||
|
size_t main_stack_size,
|
||||||
|
k_thread_entry_t _main)
|
||||||
|
{
|
||||||
|
struct k_thread *dummy_thread = (struct k_thread *)
|
||||||
|
ROUND_UP(Z_THREAD_STACK_BUFFER(z_interrupt_stacks[0]),
|
||||||
|
FP_REG_SET_ALIGN);
|
||||||
|
|
||||||
|
__ASSERT(((uintptr_t)(&dummy_thread->arch.preempFloatReg) %
|
||||||
|
FP_REG_SET_ALIGN) == 0,
|
||||||
|
"unaligned dummy thread %p float member %p",
|
||||||
|
dummy_thread, &dummy_thread->arch.preempFloatReg);
|
||||||
|
|
||||||
|
z_dummy_thread_init(dummy_thread);
|
||||||
|
z_swap_unlocked();
|
||||||
|
CODE_UNREACHABLE;
|
||||||
|
}
|
||||||
|
|
|
@ -26,12 +26,18 @@
|
||||||
* since the 'fxsave' and 'fxrstor' instructions require this. In all other
|
* since the 'fxsave' and 'fxrstor' instructions require this. In all other
|
||||||
* cases a 4 byte boundary is sufficient.
|
* cases a 4 byte boundary is sufficient.
|
||||||
*/
|
*/
|
||||||
|
#if defined(CONFIG_EAGER_FPU_SHARING) || defined(CONFIG_LAZY_FPU_SHARING)
|
||||||
#ifdef CONFIG_SSE
|
#ifdef CONFIG_SSE
|
||||||
#define FP_REG_SET_ALIGN 16
|
#define FP_REG_SET_ALIGN 16
|
||||||
#else
|
#else
|
||||||
#define FP_REG_SET_ALIGN 4
|
#define FP_REG_SET_ALIGN 4
|
||||||
#endif
|
#endif
|
||||||
|
#else
|
||||||
|
/* Unused, no special alignment requirements, use default alignment for
|
||||||
|
* char buffers on this arch
|
||||||
|
*/
|
||||||
|
#define FP_REG_SET_ALIGN 1
|
||||||
|
#endif /* CONFIG_*_FP_SHARING */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bits for _thread_arch.flags, see their use in intstub.S et al.
|
* Bits for _thread_arch.flags, see their use in intstub.S et al.
|
||||||
|
@ -230,19 +236,6 @@ struct _thread_arch {
|
||||||
unsigned excNestCount; /* nested exception count */
|
unsigned excNestCount; /* nested exception count */
|
||||||
#endif /* CONFIG_LAZY_FPU_SHARING */
|
#endif /* CONFIG_LAZY_FPU_SHARING */
|
||||||
|
|
||||||
/*
|
|
||||||
* The location of all floating point related structures/fields MUST be
|
|
||||||
* located at the end of struct k_thread. This way only the
|
|
||||||
* threads that actually utilize non-integer capabilities need to
|
|
||||||
* account for the increased memory required for storing FP state when
|
|
||||||
* sizing stacks.
|
|
||||||
*
|
|
||||||
* Given that stacks "grow down" on IA-32, and the TCS is located
|
|
||||||
* at the start of a thread's "workspace" memory, the stacks of
|
|
||||||
* threads that do not utilize floating point instruction can
|
|
||||||
* effectively consume the memory occupied by the 'tPreempFloatReg'
|
|
||||||
* struct without ill effect.
|
|
||||||
*/
|
|
||||||
tPreempFloatReg preempFloatReg; /* volatile float register storage */
|
tPreempFloatReg preempFloatReg; /* volatile float register storage */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue