arm64: improved arch_switch() implementation
Make it optimal without the need for an SVC/exception roundtrip on every context switch. Performance numbers from tests/benchmarks/sched: Before: unpend 85 ready 58 switch 258 pend 231 tot 632 (avg 699) After: unpend 85 ready 59 switch 115 pend 138 tot 397 (avg 478) Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
parent
a5b33f89b7
commit
8affac64a7
10 changed files with 63 additions and 49 deletions
|
@ -268,20 +268,19 @@ void z_arm64_fpu_trap(z_arch_esf_t *esf)
|
||||||
/*
|
/*
|
||||||
* Perform lazy FPU context switching by simply granting or denying
|
* Perform lazy FPU context switching by simply granting or denying
|
||||||
* access to FP regs based on FPU ownership before leaving the last
|
* access to FP regs based on FPU ownership before leaving the last
|
||||||
* exception level. If current thread doesn't own the FP regs then
|
* exception level in case of exceptions, or during a thread context
|
||||||
* it will trap on its first access and then the actual FPU context
|
* switch with the exception level of the new thread being 0.
|
||||||
* switching will occur.
|
* If current thread doesn't own the FP regs then it will trap on its
|
||||||
*
|
* first access and then the actual FPU context switching will occur.
|
||||||
* This is called on every exception exit except for z_arm64_fpu_trap().
|
|
||||||
*/
|
*/
|
||||||
void z_arm64_fpu_exit_exc(void)
|
static void fpu_access_update(unsigned int exc_update_level)
|
||||||
{
|
{
|
||||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||||
|
|
||||||
uint64_t cpacr = read_cpacr_el1();
|
uint64_t cpacr = read_cpacr_el1();
|
||||||
|
|
||||||
if (arch_exception_depth() == 1) {
|
if (arch_exception_depth() == exc_update_level) {
|
||||||
/* We're about to leave exception mode */
|
/* We're about to execute non-exception code */
|
||||||
if (_current_cpu->arch.fpu_owner == _current) {
|
if (_current_cpu->arch.fpu_owner == _current) {
|
||||||
/* turn on FPU access */
|
/* turn on FPU access */
|
||||||
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
|
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
|
||||||
|
@ -291,14 +290,34 @@ void z_arm64_fpu_exit_exc(void)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Shallower exception levels should always trap on FPU
|
* Any new exception level should always trap on FPU
|
||||||
* access as we want to make sure IRQs are disabled before
|
* access as we want to make sure IRQs are disabled before
|
||||||
* granting them access.
|
* granting it access (see z_arm64_fpu_trap() documentation).
|
||||||
*/
|
*/
|
||||||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is called on every exception exit except for z_arm64_fpu_trap().
|
||||||
|
* In that case the exception level of interest is 1 (soon to be 0).
|
||||||
|
*/
|
||||||
|
void z_arm64_fpu_exit_exc(void)
|
||||||
|
{
|
||||||
|
fpu_access_update(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is called from z_arm64_context_switch(). FPU access may be granted
|
||||||
|
* only if exception level is 0. If we switch to a thread that is still in
|
||||||
|
* some exception context then FPU access would be re-evaluated at exception
|
||||||
|
* exit time via z_arm64_fpu_exit_exc().
|
||||||
|
*/
|
||||||
|
void z_arm64_fpu_thread_context_switch(void)
|
||||||
|
{
|
||||||
|
fpu_access_update(0);
|
||||||
|
}
|
||||||
|
|
||||||
int arch_float_disable(struct k_thread *thread)
|
int arch_float_disable(struct k_thread *thread)
|
||||||
{
|
{
|
||||||
if (thread != NULL) {
|
if (thread != NULL) {
|
||||||
|
|
|
@ -38,7 +38,7 @@ GEN_NAMED_OFFSET_SYM(_callee_saved_t, x23, x23_x24);
|
||||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x25, x25_x26);
|
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x25, x25_x26);
|
||||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x27, x27_x28);
|
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x27, x27_x28);
|
||||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x29, x29_sp_el0);
|
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x29, x29_sp_el0);
|
||||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, sp_elx, sp_elx);
|
GEN_NAMED_OFFSET_SYM(_callee_saved_t, sp_elx, sp_elx_lr);
|
||||||
|
|
||||||
GEN_ABSOLUTE_SYM(___callee_saved_t_SIZEOF, sizeof(struct _callee_saved));
|
GEN_ABSOLUTE_SYM(___callee_saved_t_SIZEOF, sizeof(struct _callee_saved));
|
||||||
|
|
||||||
|
|
|
@ -165,6 +165,9 @@ switch_el:
|
||||||
/* EL1 init */
|
/* EL1 init */
|
||||||
bl z_arm64_el1_init
|
bl z_arm64_el1_init
|
||||||
|
|
||||||
|
/* We want to use SP_ELx from now on */
|
||||||
|
msr SPSel, #1
|
||||||
|
|
||||||
/* Enable SError interrupts */
|
/* Enable SError interrupts */
|
||||||
msr DAIFClr, #(DAIFCLR_ABT_BIT)
|
msr DAIFClr, #(DAIFCLR_ABT_BIT)
|
||||||
isb
|
isb
|
||||||
|
|
|
@ -24,7 +24,9 @@ _ASM_FILE_PROLOGUE
|
||||||
* Routine to handle context switches
|
* Routine to handle context switches
|
||||||
*
|
*
|
||||||
* This function is directly called either by _isr_wrapper() in case of
|
* This function is directly called either by _isr_wrapper() in case of
|
||||||
* preemption, or z_arm64_sync_exc() in case of cooperative switching.
|
* preemption, or arch_switch() in case of cooperative switching.
|
||||||
|
*
|
||||||
|
* void z_arm64_context_switch(struct k_thread *new, struct k_thread *old);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
GTEXT(z_arm64_context_switch)
|
GTEXT(z_arm64_context_switch)
|
||||||
|
@ -40,9 +42,9 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
||||||
stp x27, x28, [x1, #_thread_offset_to_callee_saved_x27_x28]
|
stp x27, x28, [x1, #_thread_offset_to_callee_saved_x27_x28]
|
||||||
stp x29, x4, [x1, #_thread_offset_to_callee_saved_x29_sp_el0]
|
stp x29, x4, [x1, #_thread_offset_to_callee_saved_x29_sp_el0]
|
||||||
|
|
||||||
/* Save the current SP_ELx */
|
/* Save the current SP_ELx and return address */
|
||||||
mov x4, sp
|
mov x4, sp
|
||||||
str x4, [x1, #_thread_offset_to_callee_saved_sp_elx]
|
stp x4, lr, [x1, #_thread_offset_to_callee_saved_sp_elx_lr]
|
||||||
|
|
||||||
/* save current thread's exception depth */
|
/* save current thread's exception depth */
|
||||||
mrs x4, tpidrro_el0
|
mrs x4, tpidrro_el0
|
||||||
|
@ -55,6 +57,17 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
||||||
orr x4, x4, x2, lsl #TPIDRROEL0_EXC_SHIFT
|
orr x4, x4, x2, lsl #TPIDRROEL0_EXC_SHIFT
|
||||||
msr tpidrro_el0, x4
|
msr tpidrro_el0, x4
|
||||||
|
|
||||||
|
#ifdef CONFIG_FPU_SHARING
|
||||||
|
/*
|
||||||
|
* Do this after tpidrro_el0 is updated with the new exception
|
||||||
|
* depth value, and before old->switch_handle is updated (making
|
||||||
|
* it available for grab by another CPU) as we still use its stack.
|
||||||
|
*/
|
||||||
|
stp x0, x1, [sp, #-16]!
|
||||||
|
bl z_arm64_fpu_thread_context_switch
|
||||||
|
ldp x0, x1, [sp], #16
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
/* save old thread into switch handle which is required by
|
/* save old thread into switch handle which is required by
|
||||||
* wait_for_switch
|
* wait_for_switch
|
||||||
|
@ -83,8 +96,8 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
||||||
/* Restore SP_EL0 */
|
/* Restore SP_EL0 */
|
||||||
msr sp_el0, x4
|
msr sp_el0, x4
|
||||||
|
|
||||||
/* Restore SP_EL1 */
|
/* Restore SP_EL1 and return address */
|
||||||
ldr x4, [x0, #_thread_offset_to_callee_saved_sp_elx]
|
ldp x4, lr, [x0, #_thread_offset_to_callee_saved_sp_elx_lr]
|
||||||
mov sp, x4
|
mov sp, x4
|
||||||
|
|
||||||
#ifdef CONFIG_USERSPACE
|
#ifdef CONFIG_USERSPACE
|
||||||
|
@ -99,7 +112,7 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
||||||
ldp xzr, x30, [sp], #16
|
ldp xzr, x30, [sp], #16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Return to z_arm64_sync_exc() or _isr_wrapper() */
|
/* Return to arch_switch() or _isr_wrapper() */
|
||||||
ret
|
ret
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -131,9 +144,6 @@ SECTION_FUNC(TEXT, z_arm64_sync_exc)
|
||||||
/* Demux the SVC call */
|
/* Demux the SVC call */
|
||||||
and x1, x0, #0xff
|
and x1, x0, #0xff
|
||||||
|
|
||||||
cmp x1, #_SVC_CALL_CONTEXT_SWITCH
|
|
||||||
beq context_switch
|
|
||||||
|
|
||||||
cmp x1, #_SVC_CALL_RUNTIME_EXCEPT
|
cmp x1, #_SVC_CALL_RUNTIME_EXCEPT
|
||||||
beq oops
|
beq oops
|
||||||
|
|
||||||
|
@ -179,22 +189,6 @@ oops:
|
||||||
mov x0, sp
|
mov x0, sp
|
||||||
b z_arm64_do_kernel_oops
|
b z_arm64_do_kernel_oops
|
||||||
|
|
||||||
context_switch:
|
|
||||||
/*
|
|
||||||
* Retrieve x0 and x1 from the stack:
|
|
||||||
*
|
|
||||||
* - x0 = new_thread->switch_handle = switch_to thread
|
|
||||||
* - x1 = &old_thread->switch_handle = current thread
|
|
||||||
*/
|
|
||||||
ldp x0, x1, [sp, ___esf_t_x0_x1_OFFSET]
|
|
||||||
|
|
||||||
/* Get old thread from x1 */
|
|
||||||
sub x1, x1, ___thread_t_switch_handle_OFFSET
|
|
||||||
|
|
||||||
/* Switch thread */
|
|
||||||
bl z_arm64_context_switch
|
|
||||||
b z_arm64_exit_exc
|
|
||||||
|
|
||||||
inv:
|
inv:
|
||||||
mov x0, #0 /* K_ERR_CPU_EXCEPTION */
|
mov x0, #0 /* K_ERR_CPU_EXCEPTION */
|
||||||
mov x1, sp
|
mov x1, sp
|
||||||
|
@ -202,8 +196,3 @@ inv:
|
||||||
|
|
||||||
/* Return here only in case of recoverable error */
|
/* Return here only in case of recoverable error */
|
||||||
b z_arm64_exit_exc
|
b z_arm64_exit_exc
|
||||||
|
|
||||||
GTEXT(z_arm64_call_svc)
|
|
||||||
SECTION_FUNC(TEXT, z_arm64_call_svc)
|
|
||||||
svc #_SVC_CALL_CONTEXT_SWITCH
|
|
||||||
ret
|
|
||||||
|
|
|
@ -70,6 +70,7 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
||||||
char *stack_ptr, k_thread_entry_t entry,
|
char *stack_ptr, k_thread_entry_t entry,
|
||||||
void *p1, void *p2, void *p3)
|
void *p1, void *p2, void *p3)
|
||||||
{
|
{
|
||||||
|
extern void z_arm64_exit_exc(void);
|
||||||
z_arch_esf_t *pInitCtx;
|
z_arch_esf_t *pInitCtx;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -118,6 +119,7 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
||||||
* z_arm64_userspace_enter() (see comments there)
|
* z_arm64_userspace_enter() (see comments there)
|
||||||
*/
|
*/
|
||||||
thread->callee_saved.sp_elx = (uint64_t)pInitCtx;
|
thread->callee_saved.sp_elx = (uint64_t)pInitCtx;
|
||||||
|
thread->callee_saved.lr = (uint64_t)z_arm64_exit_exc;
|
||||||
|
|
||||||
thread->switch_handle = thread;
|
thread->switch_handle = thread;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,9 +31,6 @@ static ALWAYS_INLINE bool arch_is_in_isr(void)
|
||||||
return arch_curr_cpu()->nested != 0U;
|
return arch_curr_cpu()->nested != 0U;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern void z_arm64_call_svc(void *switch_to, void **switched_from);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -34,9 +34,13 @@ static ALWAYS_INLINE void arch_kernel_init(void)
|
||||||
|
|
||||||
static inline void arch_switch(void *switch_to, void **switched_from)
|
static inline void arch_switch(void *switch_to, void **switched_from)
|
||||||
{
|
{
|
||||||
z_arm64_call_svc(switch_to, switched_from);
|
extern void z_arm64_context_switch(struct k_thread *new,
|
||||||
|
struct k_thread *old);
|
||||||
|
struct k_thread *new = switch_to;
|
||||||
|
struct k_thread *old = CONTAINER_OF(switched_from, struct k_thread,
|
||||||
|
switch_handle);
|
||||||
|
|
||||||
return;
|
z_arm64_context_switch(new, old);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void z_arm64_fatal_error(z_arch_esf_t *esf, unsigned int reason);
|
extern void z_arm64_fatal_error(z_arch_esf_t *esf, unsigned int reason);
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x27_x28_OFFSET)
|
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x27_x28_OFFSET)
|
||||||
#define _thread_offset_to_callee_saved_x29_sp_el0 \
|
#define _thread_offset_to_callee_saved_x29_sp_el0 \
|
||||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x29_sp_el0_OFFSET)
|
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x29_sp_el0_OFFSET)
|
||||||
#define _thread_offset_to_callee_saved_sp_elx \
|
#define _thread_offset_to_callee_saved_sp_elx_lr \
|
||||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_sp_elx_OFFSET)
|
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_sp_elx_lr_OFFSET)
|
||||||
|
|
||||||
#endif /* ZEPHYR_ARCH_ARM64_INCLUDE_OFFSETS_SHORT_ARCH_H_ */
|
#endif /* ZEPHYR_ARCH_ARM64_INCLUDE_OFFSETS_SHORT_ARCH_H_ */
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
#ifndef ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
|
#ifndef ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
|
||||||
#define ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
|
#define ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
|
||||||
|
|
||||||
#define _SVC_CALL_CONTEXT_SWITCH 0
|
|
||||||
#define _SVC_CALL_IRQ_OFFLOAD 1
|
#define _SVC_CALL_IRQ_OFFLOAD 1
|
||||||
#define _SVC_CALL_RUNTIME_EXCEPT 2
|
#define _SVC_CALL_RUNTIME_EXCEPT 2
|
||||||
#define _SVC_CALL_SYSTEM_CALL 3
|
#define _SVC_CALL_SYSTEM_CALL 3
|
||||||
|
|
|
@ -36,6 +36,7 @@ struct _callee_saved {
|
||||||
uint64_t x29;
|
uint64_t x29;
|
||||||
uint64_t sp_el0;
|
uint64_t sp_el0;
|
||||||
uint64_t sp_elx;
|
uint64_t sp_elx;
|
||||||
|
uint64_t lr;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct _callee_saved _callee_saved_t;
|
typedef struct _callee_saved _callee_saved_t;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue