arch: arm: aarch32: cortex_a_r: Add shared FPU support

This adds lazy floating point context switching.  On svc/irq entrance,
the VFP is disabled and a pointer to the exception stack frame is saved
away.  If the esf pointer is still valid on exception exit, then no
other context used the VFP so the context is still valid and nothing
needs to be restored.  If the esf pointer is NULL on exception exit,
then some other context used the VFP and the floating point context is
restored from the esf.

The undefined instruction handler is responsible for saving away the
floating point context if needed.  If the handler is in the first
irq/svc context and the current thread uses the VFP, then the float
context needs to be saved.  Also, if the handler is in a nested context
and the previous context was using the FVP, save the float context.

Signed-off-by: Bradley Bolen <bbolen@lexmark.com>
This commit is contained in:
Bradley Bolen 2020-12-10 08:45:20 -05:00 committed by Stephanos Ioannidis
commit 88ba97fea4
9 changed files with 363 additions and 10 deletions

View file

@ -30,6 +30,9 @@
_ASM_FILE_PROLOGUE
#if defined(CONFIG_FPU_SHARING)
GTEXT(z_arm_fault_undef_instruction_fp)
#endif
GTEXT(z_arm_fault_undef_instruction)
GTEXT(z_arm_fault_prefetch)
GTEXT(z_arm_fault_data)
@ -47,6 +50,19 @@ GTEXT(z_arm_data_abort)
stmfd sp, {r0-r3, r12, lr}^
sub sp, #24
#if defined(CONFIG_FPU_SHARING)
sub sp, #___fpu_t_SIZEOF
vmrs r1, fpexc
mov r0, #FPEXC_EN
vmsr fpexc, r0
vmrs r0, fpscr
mov r2, sp
vstmia r2!, {s0-s15}
stm r2, {r0, r1}
#endif
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
/* Pointer to extra esf info */
sub sp, #___extra_esf_info_t_SIZEOF
@ -100,7 +116,56 @@ SECTION_SUBSEC_FUNC(TEXT, __exc, z_arm_undef_instruction)
subne lr, #2 /* Thumb (T_BIT) */
pop {r0}
exception_entry MODE_UND
/*
* Store r0-r3, r12, lr, lr_und and spsr_und into the stack to
* construct an exception stack frame.
*/
srsdb sp!, #MODE_UND
stmfd sp, {r0-r3, r12, lr}^
sub sp, #24
/* Increment exception nesting count */
ldr r2, =_kernel
ldr r1, [r2, #_kernel_offset_to_nested]
add r1, r1, #1
str r1, [r2, #_kernel_offset_to_nested]
#if defined(CONFIG_FPU_SHARING)
sub sp, #___fpu_t_SIZEOF
bl z_arm_fault_undef_instruction_fp
cmp r0, #0
beq z_arm_exc_exit
vmrs r1, fpexc
mov r0, #FPEXC_EN
vmsr fpexc, r0
vmrs r0, fpscr
mov r2, sp
vstmia r2!, {s0-s15}
stm r2, {r0, r1}
#endif
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
/* Pointer to extra esf info */
sub sp, #___extra_esf_info_t_SIZEOF
mov r0, #0
str r0, [sp, #4]
str r0, [sp, #8]
sub r1, sp, #___callee_saved_t_SIZEOF
str r1, [sp]
cps #MODE_SYS
stm r1, {r4-r11, sp}
cps #MODE_UND
mov r0, sp
mov sp, r1
#else
mov r0, sp
#endif
bl z_arm_fault_undef_instruction
exception_exit
@ -125,6 +190,12 @@ SECTION_SUBSEC_FUNC(TEXT, __exc, z_arm_prefetch_abort)
b z_arm_exc_exit
#if defined(CONFIG_FPU_SHARING)
#define FPU_SF_SIZE ___fpu_t_SIZEOF
#else
#define FPU_SF_SIZE 0
#endif
/**
* @brief Data abort exception handler
*
@ -148,10 +219,10 @@ SECTION_SUBSEC_FUNC(TEXT, __exc, z_arm_data_abort)
* the true esf from the one passed to z_arm_fault_data.
*/
cmp r0, #0
ldreq r1, [sp, #24]
ldreq r1, [sp, #24 + FPU_SF_SIZE]
exception_exit
streq r1, [sp, #24]
streq r1, [sp, #24 + FPU_SF_SIZE]
b z_arm_exc_exit

View file

@ -62,6 +62,45 @@ system_thread_exit\@:
#endif
.endm
.macro fpu_exc_exit
#if defined(CONFIG_FPU_SHARING)
/*
* If the floating point context pointer is null, then a context was
* saved so restore the float context from the exception stack frame.
*/
ldr r2, =_kernel
ldr r1, [r2, #_kernel_offset_to_fp_ctx]
cmp r1, #0
beq vfp_restore\@
/*
* If leaving the last interrupt context, remove the floating point
* context pointer.
*/
cmp r0, #0
moveq r1, #0
streq r1, [r2, #_kernel_offset_to_fp_ctx]
b vfp_exit\@
vfp_restore\@:
add r3, sp, #___fpu_sf_t_fpscr_OFFSET
ldm r3, {r1, r2}
tst r2, #FPEXC_EN
beq vfp_exit\@
vmsr fpexc, r2
vmsr fpscr, r1
vldmia sp, {s0-s15}
vfp_exit\@:
/* Leave the VFP disabled when leaving */
mov r1, #0
vmsr fpexc, r1
add sp, sp, #___fpu_t_SIZEOF
#endif
.endm
/**
* @brief Kernel housekeeping when exiting interrupt handler installed directly
* in the vector table
@ -133,6 +172,11 @@ __EXIT_INT:
* out or they are the args to _new_thread for a new thread.
*/
cps #MODE_SYS
#if defined(CONFIG_FPU_SHARING)
fpu_exc_exit
#endif
pop {r0-r3, r12, lr}
userspace_exc_exit
rfeia sp!
@ -173,6 +217,9 @@ SECTION_SUBSEC_FUNC(TEXT, _HandlerModeExit, z_arm_exc_exit)
*/
/* Clean up exception stack frame */
#if defined(CONFIG_FPU_SHARING)
add sp, sp, #___fpu_t_SIZEOF
#endif
add sp, #32
/*
@ -193,6 +240,9 @@ SECTION_SUBSEC_FUNC(TEXT, _HandlerModeExit, z_arm_exc_exit)
/* Return to the switched thread */
cps #MODE_SYS
#if defined(CONFIG_FPU_SHARING)
fpu_exc_exit
#endif
pop {r0-r3, r12, lr}
userspace_exc_exit
rfeia sp!
@ -203,6 +253,9 @@ __EXIT_EXC:
sub r0, r0, #1
str r0, [r3, #_kernel_offset_to_nested]
#if defined(CONFIG_FPU_SHARING)
add sp, sp, #___fpu_t_SIZEOF
#endif
/*
* Restore r0-r3, r12, lr, lr_und and spsr_und from the exception stack
* and return to the current thread.

View file

@ -86,6 +86,80 @@ static void dump_fault(uint32_t status, uint32_t addr)
}
#endif
#if defined(CONFIG_FPU_SHARING)
/**
* @brief FPU undefined instruction fault handler
*
* @return Returns true if the FPU is already enabled
* implying a true undefined instruction
* Returns false if the FPU was disabled
*/
bool z_arm_fault_undef_instruction_fp(void)
{
/*
* Assume this is a floating point instruction that faulted because
* the FP unit was disabled. Enable the FP unit and try again. If
* the FP was already enabled then this was an actual undefined
* instruction.
*/
if (__get_FPEXC() & FPEXC_EN)
return true;
__set_FPEXC(FPEXC_EN);
if (_kernel.cpus[0].nested > 1) {
/*
* If the nested count is greater than 1, the undefined
* instruction exception came from an irq/svc context. (The
* irq/svc handler would have the nested count at 1 and then
* the undef exception would increment it to 2).
*/
struct __fpu_sf *spill_esf =
(struct __fpu_sf *)_kernel.cpus[0].fp_ctx;
if (spill_esf == NULL)
return false;
_kernel.cpus[0].fp_ctx = NULL;
/*
* If the nested count is 2 and the current thread has used the
* VFP (whether or not it was actually using the VFP before the
* current exception) OR if the nested count is greater than 2
* and the VFP was enabled on the irq/svc entrance for the
* saved exception stack frame, then save the floating point
* context because it is about to be overwritten.
*/
if (((_kernel.cpus[0].nested == 2)
&& (_current->base.user_options & K_FP_REGS))
|| ((_kernel.cpus[0].nested > 2)
&& (spill_esf->undefined & FPEXC_EN))) {
/*
* Spill VFP registers to specified exception stack
* frame
*/
spill_esf->undefined |= FPEXC_EN;
spill_esf->fpscr = __get_FPSCR();
__asm__ volatile (
"vstmia %0, {s0-s15};\n"
: : "r" (&spill_esf->s[0])
: "memory"
);
}
} else {
/*
* If the nested count is one, a thread was the faulting
* context. Just flag that this thread uses the VFP. This
* means that a thread that uses the VFP does not have to,
* but should, set K_FP_REGS on thread creation.
*/
_current->base.user_options |= K_FP_REGS;
}
return false;
}
#endif
/**
* @brief Undefined instruction fault handler
*
@ -93,6 +167,20 @@ static void dump_fault(uint32_t status, uint32_t addr)
*/
bool z_arm_fault_undef_instruction(z_arch_esf_t *esf)
{
#if defined(CONFIG_FPU_SHARING)
/*
* This is a true undefined instruction and we will be crashing
* so save away the VFP registers.
*/
esf->fpu.undefined = __get_FPEXC();
esf->fpu.fpscr = __get_FPSCR();
__asm__ volatile (
"vstmia %0, {s0-s15};\n"
: : "r" (&esf->fpu.s[0])
: "memory"
);
#endif
/* Print fault information */
LOG_ERR("***** UNDEFINED INSTRUCTION ABORT *****");

View file

@ -88,6 +88,31 @@ isr_system_thread:
cps #MODE_SYS
push {r0-r3, r12, lr}
#if defined(CONFIG_FPU_SHARING)
sub sp, sp, #___fpu_t_SIZEOF
/*
* Note that this handler was entered with the VFP unit enabled.
* The undefined instruction handler uses this to know that it
* needs to save the current floating context.
*/
vmrs r0, fpexc
str r0, [sp, #___fpu_t_SIZEOF - 4]
/* Disable VFP */
mov r0, #0
vmsr fpexc, r0
/*
* Mark where to store the floating context for the undefined
* instruction handler
*/
ldr r2, =_kernel
ldr r0, [r2, #_kernel_offset_to_fp_ctx]
cmp r0, #0
streq sp, [r2, #_kernel_offset_to_fp_ctx]
#endif /* CONFIG_FPU_SHARING */
/*
* Use SVC mode stack for predictable interrupt behaviour; running ISRs
* in the SYS/USR mode stack (i.e. interrupted thread stack) leaves the

View file

@ -19,6 +19,7 @@
#include <offsets_short.h>
#include <arch/cpu.h>
#include <syscall.h>
#include <kernel.h>
#if defined(CONFIG_CPU_CORTEX_M)
#include <arch/arm/aarch32/cortex_m/cpu.h>
@ -126,6 +127,45 @@ out_fp_endif:
cps #MODE_SYS
stm r0, {r4-r11, sp}
cps #MODE_SVC
#if defined(CONFIG_FPU_SHARING)
ldrb r0, [r2, #_thread_offset_to_user_options]
tst r0, #K_FP_REGS /* _current->base.user_options & K_FP_REGS */
beq out_fp_inactive
mov ip, #FPEXC_EN
vmsr fpexc, ip
/*
* If the float context pointer is not null, then the VFP has not been
* used since this thread has used it. Consequently, the caller-saved
* float registers have not been saved away, so write them to the
* exception stack frame.
*/
ldr r0, [r1, #_kernel_offset_to_fp_ctx]
cmp r0, #0
beq out_store_thread_context
vstmia r0!, {s0-s15}
vmrs r3, fpscr
stm r0, {r3, ip}
out_store_thread_context:
/* Store s16-s31 to thread context */
add r0, r2, #_thread_offset_to_preempt_float
vstmia r0, {s16-s31}
mov ip, #0
vmsr fpexc, ip
out_fp_inactive:
/*
* The floating context has now been saved to the exception stack
* frame, so zero out the global pointer to note this.
*/
mov r0, #0
str r0, [r1, #_kernel_offset_to_fp_ctx]
#endif /* CONFIG_FPU_SHARING */
#else
#error Unknown ARM architecture
#endif /* CONFIG_ARMV6_M_ARMV8_M_BASELINE */
@ -362,6 +402,24 @@ _thread_irq_disabled:
ldm r0, {r4-r11, sp}
cps #MODE_SVC
#if defined(CONFIG_FPU_SHARING)
ldrb r0, [r2, #_thread_offset_to_user_options]
tst r0, #K_FP_REGS /* _current->base.user_options & K_FP_REGS */
beq in_fp_inactive
mov r3, #FPEXC_EN
vmsr fpexc, r3
/* Restore s16-s31 from thread context */
add r0, r2, #_thread_offset_to_preempt_float
vldmia r0, {s16-s31}
mov r3, #0
vmsr fpexc, r3
in_fp_inactive:
#endif /* CONFIG_FPU_SHARING */
#if defined (CONFIG_ARM_MPU)
/* r2 contains k_thread */
mov r0, r2
@ -608,6 +666,12 @@ valid_syscall_id:
#elif defined(CONFIG_ARMV7_R) || defined(CONFIG_AARCH32_ARMV8_R) \
|| defined(CONFIG_ARMV7_A)
#if defined(CONFIG_FPU_SHARING)
#define FPU_SF_SIZE ___fpu_t_SIZEOF
#else
#define FPU_SF_SIZE 0
#endif
/**
*
* @brief Service call handler
@ -650,7 +714,34 @@ svc_system_thread:
srsdb #MODE_SYS!
cps #MODE_SYS
push {r0-r3, r12, lr}
#if defined(CONFIG_FPU_SHARING)
sub sp, sp, #___fpu_t_SIZEOF
/*
* Note that this handler was entered with the VFP unit enabled.
* The undefined instruction handler uses this to know that it
* needs to save the current floating context.
*/
vmrs r0, fpexc
str r0, [sp, #___fpu_t_SIZEOF - 4]
/* Disable VFP */
mov r0, #0
vmsr fpexc, r0
/*
* Mark where to store the floating context for the undefined
* instruction handler
*/
ldr r2, =_kernel
ldr r0, [r2, #_kernel_offset_to_fp_ctx]
cmp r0, #0
streq sp, [r2, #_kernel_offset_to_fp_ctx]
#endif /* CONFIG_FPU_SHARING */
mov ip, sp
cps #MODE_SVC
/*
@ -735,7 +826,7 @@ _oops:
* the SVC.
*
* On SVC exception, the USER/SYSTEM stack looks like the following:
* r0 - r1 - r2 - r3 - r12 - LR - { possible FPU space } - PC - SPSR
* { possible FPU space } - r0 - r1 - r2 - r3 - r12 - LR - PC - SPSR
*
* Registers look like:
* r0 - arg1
@ -748,10 +839,11 @@ _oops:
* r8 - saved link register
*/
_do_syscall:
ldr r8, [ip, #24] /* grab address of LR from stack frame */
/* grab address of LR from stack frame */
ldr r8, [ip, #(FPU_SF_SIZE + ___basic_sf_t_pc_OFFSET)]
/* Make the exception return to system state */
ldr r1, [ip, #28]
ldr r1, [ip, #(FPU_SF_SIZE + ___basic_sf_t_xpsr_OFFSET)]
/* If leaving thumb mode, set the return address to thumb mode */
tst r1, #T_BIT
@ -759,14 +851,14 @@ _do_syscall:
bic r1, #(MODE_MASK | T_BIT)
orr r1, r1, #MODE_SYS
str r1, [ip, #28]
str r1, [ip, #(FPU_SF_SIZE + ___basic_sf_t_xpsr_OFFSET)]
/*
* Store the address of z_arm_do_syscall for the exit so the exception
* return goes there in system state.
*/
ldr r1, =z_arm_do_syscall
str r1, [ip, #24] /* overwrite the LR to point to z_arm_do_syscall */
str r1, [ip, #(FPU_SF_SIZE + ___basic_sf_t_pc_OFFSET)]
/* validate syscall limit, only set priv mode if valid */
ldr ip, =K_SYSCALL_LIMIT

View file

@ -112,6 +112,13 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
#endif /* CONFIG_COMPILER_ISA_THUMB2 */
#endif /* CONFIG_CPU_CORTEX_M */
#if !defined(CONFIG_CPU_CORTEX_M) \
&& defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
iframe = (struct __basic_sf *)
((uintptr_t)iframe - sizeof(struct __fpu_sf));
memset(iframe, 0, sizeof(struct __fpu_sf));
#endif
thread->callee_saved.psp = (uint32_t)iframe;
thread->arch.basepri = 0;
@ -470,7 +477,11 @@ int arch_float_disable(struct k_thread *thread)
thread->base.user_options &= ~K_FP_REGS;
#if defined(CONFIG_CPU_CORTEX_M)
__set_CONTROL(__get_CONTROL() & (~CONTROL_FPCA_Msk));
#else
__set_FPEXC(0);
#endif
/* No need to add an ISB barrier after setting the CONTROL
* register; arch_irq_unlock() already adds one.
@ -483,7 +494,7 @@ int arch_float_disable(struct k_thread *thread)
int arch_float_enable(struct k_thread *thread, unsigned int options)
{
/* This is not supported in Cortex-M and Cortex-R does not have FPU */
/* This is not supported in Cortex-M */
return -ENOTSUP;
}
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
@ -508,7 +519,7 @@ static void z_arm_prepare_switch_to_main(void)
#else
__set_FPSCR(0);
#endif
#if defined(CONFIG_FPU_SHARING)
#if defined(CONFIG_CPU_CORTEX_M) && defined(CONFIG_FPU_SHARING)
/* In Sharing mode clearing FPSCR may set the CONTROL.FPCA flag. */
__set_CONTROL(__get_CONTROL() & (~(CONTROL_FPCA_Msk)));
__ISB();

View file

@ -126,6 +126,10 @@ struct _cpu {
uint8_t id;
#if defined(CONFIG_FPU_SHARING)
void *fp_ctx;
#endif
#ifdef CONFIG_SMP
/* True when _current is allowed to context switch */
uint8_t swap_ok;

View file

@ -34,6 +34,10 @@ GEN_ABSOLUTE_SYM(___cpu_t_SIZEOF, sizeof(struct _cpu));
GEN_OFFSET_SYM(_kernel_t, cpus);
#if defined(CONFIG_FPU_SHARING)
GEN_OFFSET_SYM(_cpu_t, fp_ctx);
#endif
#if defined(CONFIG_THREAD_MONITOR)
GEN_OFFSET_SYM(_kernel_t, threads);
#endif

View file

@ -24,6 +24,11 @@
#define _kernel_offset_to_current \
(___cpu_t_current_OFFSET)
#if defined(CONFIG_FPU_SHARING)
#define _kernel_offset_to_fp_ctx \
(___cpu_t_fp_ctx_OFFSET)
#endif /* CONFIG_FPU_SHARING */
#endif /* CONFIG_SMP */
#define _kernel_offset_to_idle \