diff --git a/arch/arm/core/aarch32/cortex_a_r/exc.S b/arch/arm/core/aarch32/cortex_a_r/exc.S index ee407ed7166..8017d0c855d 100644 --- a/arch/arm/core/aarch32/cortex_a_r/exc.S +++ b/arch/arm/core/aarch32/cortex_a_r/exc.S @@ -30,6 +30,9 @@ _ASM_FILE_PROLOGUE +#if defined(CONFIG_FPU_SHARING) +GTEXT(z_arm_fault_undef_instruction_fp) +#endif GTEXT(z_arm_fault_undef_instruction) GTEXT(z_arm_fault_prefetch) GTEXT(z_arm_fault_data) @@ -47,6 +50,19 @@ GTEXT(z_arm_data_abort) stmfd sp, {r0-r3, r12, lr}^ sub sp, #24 +#if defined(CONFIG_FPU_SHARING) + sub sp, #___fpu_t_SIZEOF + + vmrs r1, fpexc + mov r0, #FPEXC_EN + vmsr fpexc, r0 + vmrs r0, fpscr + + mov r2, sp + vstmia r2!, {s0-s15} + stm r2, {r0, r1} +#endif + #if defined(CONFIG_EXTRA_EXCEPTION_INFO) /* Pointer to extra esf info */ sub sp, #___extra_esf_info_t_SIZEOF @@ -100,7 +116,56 @@ SECTION_SUBSEC_FUNC(TEXT, __exc, z_arm_undef_instruction) subne lr, #2 /* Thumb (T_BIT) */ pop {r0} - exception_entry MODE_UND + /* + * Store r0-r3, r12, lr, lr_und and spsr_und into the stack to + * construct an exception stack frame. + */ + srsdb sp!, #MODE_UND + stmfd sp, {r0-r3, r12, lr}^ + sub sp, #24 + + /* Increment exception nesting count */ + ldr r2, =_kernel + ldr r1, [r2, #_kernel_offset_to_nested] + add r1, r1, #1 + str r1, [r2, #_kernel_offset_to_nested] + +#if defined(CONFIG_FPU_SHARING) + sub sp, #___fpu_t_SIZEOF + + bl z_arm_fault_undef_instruction_fp + cmp r0, #0 + beq z_arm_exc_exit + + vmrs r1, fpexc + mov r0, #FPEXC_EN + vmsr fpexc, r0 + vmrs r0, fpscr + + mov r2, sp + vstmia r2!, {s0-s15} + stm r2, {r0, r1} +#endif + +#if defined(CONFIG_EXTRA_EXCEPTION_INFO) + /* Pointer to extra esf info */ + sub sp, #___extra_esf_info_t_SIZEOF + mov r0, #0 + str r0, [sp, #4] + str r0, [sp, #8] + + sub r1, sp, #___callee_saved_t_SIZEOF + str r1, [sp] + cps #MODE_SYS + stm r1, {r4-r11, sp} + cps #MODE_UND + + mov r0, sp + mov sp, r1 +#else + mov r0, sp +#endif + bl z_arm_fault_undef_instruction exception_exit @@ -125,6 +190,12 @@ SECTION_SUBSEC_FUNC(TEXT, __exc, z_arm_prefetch_abort) b z_arm_exc_exit +#if defined(CONFIG_FPU_SHARING) +#define FPU_SF_SIZE ___fpu_t_SIZEOF +#else +#define FPU_SF_SIZE 0 +#endif + /** * @brief Data abort exception handler * @@ -148,10 +219,10 @@ SECTION_SUBSEC_FUNC(TEXT, __exc, z_arm_data_abort) * the true esf from the one passed to z_arm_fault_data. */ cmp r0, #0 - ldreq r1, [sp, #24] + ldreq r1, [sp, #24 + FPU_SF_SIZE] exception_exit - streq r1, [sp, #24] + streq r1, [sp, #24 + FPU_SF_SIZE] b z_arm_exc_exit diff --git a/arch/arm/core/aarch32/cortex_a_r/exc_exit.S b/arch/arm/core/aarch32/cortex_a_r/exc_exit.S index 438f763be95..43d4183ee58 100644 --- a/arch/arm/core/aarch32/cortex_a_r/exc_exit.S +++ b/arch/arm/core/aarch32/cortex_a_r/exc_exit.S @@ -62,6 +62,45 @@ system_thread_exit\@: #endif .endm +.macro fpu_exc_exit +#if defined(CONFIG_FPU_SHARING) + /* + * If the floating point context pointer is null, then a context was + * saved so restore the float context from the exception stack frame. + */ + ldr r2, =_kernel + ldr r1, [r2, #_kernel_offset_to_fp_ctx] + cmp r1, #0 + beq vfp_restore\@ + + /* + * If leaving the last interrupt context, remove the floating point + * context pointer. + */ + cmp r0, #0 + moveq r1, #0 + streq r1, [r2, #_kernel_offset_to_fp_ctx] + b vfp_exit\@ + +vfp_restore\@: + add r3, sp, #___fpu_sf_t_fpscr_OFFSET + ldm r3, {r1, r2} + tst r2, #FPEXC_EN + beq vfp_exit\@ + + vmsr fpexc, r2 + vmsr fpscr, r1 + vldmia sp, {s0-s15} + +vfp_exit\@: + /* Leave the VFP disabled when leaving */ + mov r1, #0 + vmsr fpexc, r1 + + add sp, sp, #___fpu_t_SIZEOF +#endif +.endm + /** * @brief Kernel housekeeping when exiting interrupt handler installed directly * in the vector table @@ -133,6 +172,11 @@ __EXIT_INT: * out or they are the args to _new_thread for a new thread. */ cps #MODE_SYS + +#if defined(CONFIG_FPU_SHARING) + fpu_exc_exit +#endif + pop {r0-r3, r12, lr} userspace_exc_exit rfeia sp! @@ -173,6 +217,9 @@ SECTION_SUBSEC_FUNC(TEXT, _HandlerModeExit, z_arm_exc_exit) */ /* Clean up exception stack frame */ +#if defined(CONFIG_FPU_SHARING) + add sp, sp, #___fpu_t_SIZEOF +#endif add sp, #32 /* @@ -193,6 +240,9 @@ SECTION_SUBSEC_FUNC(TEXT, _HandlerModeExit, z_arm_exc_exit) /* Return to the switched thread */ cps #MODE_SYS +#if defined(CONFIG_FPU_SHARING) + fpu_exc_exit +#endif pop {r0-r3, r12, lr} userspace_exc_exit rfeia sp! @@ -203,6 +253,9 @@ __EXIT_EXC: sub r0, r0, #1 str r0, [r3, #_kernel_offset_to_nested] +#if defined(CONFIG_FPU_SHARING) + add sp, sp, #___fpu_t_SIZEOF +#endif /* * Restore r0-r3, r12, lr, lr_und and spsr_und from the exception stack * and return to the current thread. diff --git a/arch/arm/core/aarch32/cortex_a_r/fault.c b/arch/arm/core/aarch32/cortex_a_r/fault.c index f4a77fe8bed..ae037f75d56 100644 --- a/arch/arm/core/aarch32/cortex_a_r/fault.c +++ b/arch/arm/core/aarch32/cortex_a_r/fault.c @@ -86,6 +86,80 @@ static void dump_fault(uint32_t status, uint32_t addr) } #endif +#if defined(CONFIG_FPU_SHARING) +/** + * @brief FPU undefined instruction fault handler + * + * @return Returns true if the FPU is already enabled + * implying a true undefined instruction + * Returns false if the FPU was disabled + */ +bool z_arm_fault_undef_instruction_fp(void) +{ + /* + * Assume this is a floating point instruction that faulted because + * the FP unit was disabled. Enable the FP unit and try again. If + * the FP was already enabled then this was an actual undefined + * instruction. + */ + if (__get_FPEXC() & FPEXC_EN) + return true; + + __set_FPEXC(FPEXC_EN); + + if (_kernel.cpus[0].nested > 1) { + /* + * If the nested count is greater than 1, the undefined + * instruction exception came from an irq/svc context. (The + * irq/svc handler would have the nested count at 1 and then + * the undef exception would increment it to 2). + */ + struct __fpu_sf *spill_esf = + (struct __fpu_sf *)_kernel.cpus[0].fp_ctx; + + if (spill_esf == NULL) + return false; + + _kernel.cpus[0].fp_ctx = NULL; + + /* + * If the nested count is 2 and the current thread has used the + * VFP (whether or not it was actually using the VFP before the + * current exception) OR if the nested count is greater than 2 + * and the VFP was enabled on the irq/svc entrance for the + * saved exception stack frame, then save the floating point + * context because it is about to be overwritten. + */ + if (((_kernel.cpus[0].nested == 2) + && (_current->base.user_options & K_FP_REGS)) + || ((_kernel.cpus[0].nested > 2) + && (spill_esf->undefined & FPEXC_EN))) { + /* + * Spill VFP registers to specified exception stack + * frame + */ + spill_esf->undefined |= FPEXC_EN; + spill_esf->fpscr = __get_FPSCR(); + __asm__ volatile ( + "vstmia %0, {s0-s15};\n" + : : "r" (&spill_esf->s[0]) + : "memory" + ); + } + } else { + /* + * If the nested count is one, a thread was the faulting + * context. Just flag that this thread uses the VFP. This + * means that a thread that uses the VFP does not have to, + * but should, set K_FP_REGS on thread creation. + */ + _current->base.user_options |= K_FP_REGS; + } + + return false; +} +#endif + /** * @brief Undefined instruction fault handler * @@ -93,6 +167,20 @@ static void dump_fault(uint32_t status, uint32_t addr) */ bool z_arm_fault_undef_instruction(z_arch_esf_t *esf) { +#if defined(CONFIG_FPU_SHARING) + /* + * This is a true undefined instruction and we will be crashing + * so save away the VFP registers. + */ + esf->fpu.undefined = __get_FPEXC(); + esf->fpu.fpscr = __get_FPSCR(); + __asm__ volatile ( + "vstmia %0, {s0-s15};\n" + : : "r" (&esf->fpu.s[0]) + : "memory" + ); +#endif + /* Print fault information */ LOG_ERR("***** UNDEFINED INSTRUCTION ABORT *****"); diff --git a/arch/arm/core/aarch32/isr_wrapper.S b/arch/arm/core/aarch32/isr_wrapper.S index 89012709915..1c7027c80ec 100644 --- a/arch/arm/core/aarch32/isr_wrapper.S +++ b/arch/arm/core/aarch32/isr_wrapper.S @@ -88,6 +88,31 @@ isr_system_thread: cps #MODE_SYS push {r0-r3, r12, lr} +#if defined(CONFIG_FPU_SHARING) + sub sp, sp, #___fpu_t_SIZEOF + + /* + * Note that this handler was entered with the VFP unit enabled. + * The undefined instruction handler uses this to know that it + * needs to save the current floating context. + */ + vmrs r0, fpexc + str r0, [sp, #___fpu_t_SIZEOF - 4] + + /* Disable VFP */ + mov r0, #0 + vmsr fpexc, r0 + + /* + * Mark where to store the floating context for the undefined + * instruction handler + */ + ldr r2, =_kernel + ldr r0, [r2, #_kernel_offset_to_fp_ctx] + cmp r0, #0 + streq sp, [r2, #_kernel_offset_to_fp_ctx] +#endif /* CONFIG_FPU_SHARING */ + /* * Use SVC mode stack for predictable interrupt behaviour; running ISRs * in the SYS/USR mode stack (i.e. interrupted thread stack) leaves the diff --git a/arch/arm/core/aarch32/swap_helper.S b/arch/arm/core/aarch32/swap_helper.S index 2f9518af265..7cf61b4d565 100644 --- a/arch/arm/core/aarch32/swap_helper.S +++ b/arch/arm/core/aarch32/swap_helper.S @@ -19,6 +19,7 @@ #include #include #include +#include #if defined(CONFIG_CPU_CORTEX_M) #include @@ -126,6 +127,45 @@ out_fp_endif: cps #MODE_SYS stm r0, {r4-r11, sp} cps #MODE_SVC + +#if defined(CONFIG_FPU_SHARING) + ldrb r0, [r2, #_thread_offset_to_user_options] + tst r0, #K_FP_REGS /* _current->base.user_options & K_FP_REGS */ + beq out_fp_inactive + + mov ip, #FPEXC_EN + vmsr fpexc, ip + + /* + * If the float context pointer is not null, then the VFP has not been + * used since this thread has used it. Consequently, the caller-saved + * float registers have not been saved away, so write them to the + * exception stack frame. + */ + ldr r0, [r1, #_kernel_offset_to_fp_ctx] + cmp r0, #0 + beq out_store_thread_context + + vstmia r0!, {s0-s15} + vmrs r3, fpscr + stm r0, {r3, ip} + +out_store_thread_context: + /* Store s16-s31 to thread context */ + add r0, r2, #_thread_offset_to_preempt_float + vstmia r0, {s16-s31} + + mov ip, #0 + vmsr fpexc, ip + +out_fp_inactive: + /* + * The floating context has now been saved to the exception stack + * frame, so zero out the global pointer to note this. + */ + mov r0, #0 + str r0, [r1, #_kernel_offset_to_fp_ctx] +#endif /* CONFIG_FPU_SHARING */ #else #error Unknown ARM architecture #endif /* CONFIG_ARMV6_M_ARMV8_M_BASELINE */ @@ -362,6 +402,24 @@ _thread_irq_disabled: ldm r0, {r4-r11, sp} cps #MODE_SVC +#if defined(CONFIG_FPU_SHARING) + ldrb r0, [r2, #_thread_offset_to_user_options] + tst r0, #K_FP_REGS /* _current->base.user_options & K_FP_REGS */ + beq in_fp_inactive + + mov r3, #FPEXC_EN + vmsr fpexc, r3 + + /* Restore s16-s31 from thread context */ + add r0, r2, #_thread_offset_to_preempt_float + vldmia r0, {s16-s31} + + mov r3, #0 + vmsr fpexc, r3 + +in_fp_inactive: +#endif /* CONFIG_FPU_SHARING */ + #if defined (CONFIG_ARM_MPU) /* r2 contains k_thread */ mov r0, r2 @@ -608,6 +666,12 @@ valid_syscall_id: #elif defined(CONFIG_ARMV7_R) || defined(CONFIG_AARCH32_ARMV8_R) \ || defined(CONFIG_ARMV7_A) +#if defined(CONFIG_FPU_SHARING) +#define FPU_SF_SIZE ___fpu_t_SIZEOF +#else +#define FPU_SF_SIZE 0 +#endif + /** * * @brief Service call handler @@ -650,7 +714,34 @@ svc_system_thread: srsdb #MODE_SYS! cps #MODE_SYS push {r0-r3, r12, lr} + +#if defined(CONFIG_FPU_SHARING) + sub sp, sp, #___fpu_t_SIZEOF + + /* + * Note that this handler was entered with the VFP unit enabled. + * The undefined instruction handler uses this to know that it + * needs to save the current floating context. + */ + vmrs r0, fpexc + str r0, [sp, #___fpu_t_SIZEOF - 4] + + /* Disable VFP */ + mov r0, #0 + vmsr fpexc, r0 + + /* + * Mark where to store the floating context for the undefined + * instruction handler + */ + ldr r2, =_kernel + ldr r0, [r2, #_kernel_offset_to_fp_ctx] + cmp r0, #0 + streq sp, [r2, #_kernel_offset_to_fp_ctx] +#endif /* CONFIG_FPU_SHARING */ + mov ip, sp + cps #MODE_SVC /* @@ -735,7 +826,7 @@ _oops: * the SVC. * * On SVC exception, the USER/SYSTEM stack looks like the following: - * r0 - r1 - r2 - r3 - r12 - LR - { possible FPU space } - PC - SPSR + * { possible FPU space } - r0 - r1 - r2 - r3 - r12 - LR - PC - SPSR * * Registers look like: * r0 - arg1 @@ -748,10 +839,11 @@ _oops: * r8 - saved link register */ _do_syscall: - ldr r8, [ip, #24] /* grab address of LR from stack frame */ + /* grab address of LR from stack frame */ + ldr r8, [ip, #(FPU_SF_SIZE + ___basic_sf_t_pc_OFFSET)] /* Make the exception return to system state */ - ldr r1, [ip, #28] + ldr r1, [ip, #(FPU_SF_SIZE + ___basic_sf_t_xpsr_OFFSET)] /* If leaving thumb mode, set the return address to thumb mode */ tst r1, #T_BIT @@ -759,14 +851,14 @@ _do_syscall: bic r1, #(MODE_MASK | T_BIT) orr r1, r1, #MODE_SYS - str r1, [ip, #28] + str r1, [ip, #(FPU_SF_SIZE + ___basic_sf_t_xpsr_OFFSET)] /* * Store the address of z_arm_do_syscall for the exit so the exception * return goes there in system state. */ ldr r1, =z_arm_do_syscall - str r1, [ip, #24] /* overwrite the LR to point to z_arm_do_syscall */ + str r1, [ip, #(FPU_SF_SIZE + ___basic_sf_t_pc_OFFSET)] /* validate syscall limit, only set priv mode if valid */ ldr ip, =K_SYSCALL_LIMIT diff --git a/arch/arm/core/aarch32/thread.c b/arch/arm/core/aarch32/thread.c index 68032314bd2..0f127f47a64 100644 --- a/arch/arm/core/aarch32/thread.c +++ b/arch/arm/core/aarch32/thread.c @@ -112,6 +112,13 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack, #endif /* CONFIG_COMPILER_ISA_THUMB2 */ #endif /* CONFIG_CPU_CORTEX_M */ +#if !defined(CONFIG_CPU_CORTEX_M) \ + && defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) + iframe = (struct __basic_sf *) + ((uintptr_t)iframe - sizeof(struct __fpu_sf)); + memset(iframe, 0, sizeof(struct __fpu_sf)); +#endif + thread->callee_saved.psp = (uint32_t)iframe; thread->arch.basepri = 0; @@ -470,7 +477,11 @@ int arch_float_disable(struct k_thread *thread) thread->base.user_options &= ~K_FP_REGS; +#if defined(CONFIG_CPU_CORTEX_M) __set_CONTROL(__get_CONTROL() & (~CONTROL_FPCA_Msk)); +#else + __set_FPEXC(0); +#endif /* No need to add an ISB barrier after setting the CONTROL * register; arch_irq_unlock() already adds one. @@ -483,7 +494,7 @@ int arch_float_disable(struct k_thread *thread) int arch_float_enable(struct k_thread *thread, unsigned int options) { - /* This is not supported in Cortex-M and Cortex-R does not have FPU */ + /* This is not supported in Cortex-M */ return -ENOTSUP; } #endif /* CONFIG_FPU && CONFIG_FPU_SHARING */ @@ -508,7 +519,7 @@ static void z_arm_prepare_switch_to_main(void) #else __set_FPSCR(0); #endif -#if defined(CONFIG_FPU_SHARING) +#if defined(CONFIG_CPU_CORTEX_M) && defined(CONFIG_FPU_SHARING) /* In Sharing mode clearing FPSCR may set the CONTROL.FPCA flag. */ __set_CONTROL(__get_CONTROL() & (~(CONTROL_FPCA_Msk))); __ISB(); diff --git a/include/zephyr/kernel_structs.h b/include/zephyr/kernel_structs.h index 52f630ba399..cd2e13689bc 100644 --- a/include/zephyr/kernel_structs.h +++ b/include/zephyr/kernel_structs.h @@ -126,6 +126,10 @@ struct _cpu { uint8_t id; +#if defined(CONFIG_FPU_SHARING) + void *fp_ctx; +#endif + #ifdef CONFIG_SMP /* True when _current is allowed to context switch */ uint8_t swap_ok; diff --git a/kernel/include/kernel_offsets.h b/kernel/include/kernel_offsets.h index d7298863c38..df83112c136 100644 --- a/kernel/include/kernel_offsets.h +++ b/kernel/include/kernel_offsets.h @@ -34,6 +34,10 @@ GEN_ABSOLUTE_SYM(___cpu_t_SIZEOF, sizeof(struct _cpu)); GEN_OFFSET_SYM(_kernel_t, cpus); +#if defined(CONFIG_FPU_SHARING) +GEN_OFFSET_SYM(_cpu_t, fp_ctx); +#endif + #if defined(CONFIG_THREAD_MONITOR) GEN_OFFSET_SYM(_kernel_t, threads); #endif diff --git a/kernel/include/offsets_short.h b/kernel/include/offsets_short.h index 4e4125760fe..f9fb901db6b 100644 --- a/kernel/include/offsets_short.h +++ b/kernel/include/offsets_short.h @@ -24,6 +24,11 @@ #define _kernel_offset_to_current \ (___cpu_t_current_OFFSET) + +#if defined(CONFIG_FPU_SHARING) +#define _kernel_offset_to_fp_ctx \ + (___cpu_t_fp_ctx_OFFSET) +#endif /* CONFIG_FPU_SHARING */ #endif /* CONFIG_SMP */ #define _kernel_offset_to_idle \