diff --git a/arch/riscv/core/CMakeLists.txt b/arch/riscv/core/CMakeLists.txt index fd297bbed0a..1d8daac060c 100644 --- a/arch/riscv/core/CMakeLists.txt +++ b/arch/riscv/core/CMakeLists.txt @@ -18,6 +18,7 @@ if ((CONFIG_MP_MAX_NUM_CPUS GREATER 1) OR (CONFIG_SMP)) zephyr_library_sources(smp.c) endif () +zephyr_library_sources_ifdef(CONFIG_FPU_SHARING fpu.c fpu.S) zephyr_library_sources_ifdef(CONFIG_DEBUG_COREDUMP coredump.c) zephyr_library_sources_ifdef(CONFIG_IRQ_OFFLOAD irq_offload.c) zephyr_library_sources_ifdef(CONFIG_RISCV_PMP pmp.c pmp.S) diff --git a/arch/riscv/core/isr.S b/arch/riscv/core/isr.S index 886e692960b..188aadca814 100644 --- a/arch/riscv/core/isr.S +++ b/arch/riscv/core/isr.S @@ -21,30 +21,7 @@ #include #endif -/* Convenience macros for loading/storing register states. */ - -#define DO_FP_CALLER_SAVED(op, reg) \ - op ft0, __z_arch_esf_t_ft0_OFFSET(reg) ;\ - op ft1, __z_arch_esf_t_ft1_OFFSET(reg) ;\ - op ft2, __z_arch_esf_t_ft2_OFFSET(reg) ;\ - op ft3, __z_arch_esf_t_ft3_OFFSET(reg) ;\ - op ft4, __z_arch_esf_t_ft4_OFFSET(reg) ;\ - op ft5, __z_arch_esf_t_ft5_OFFSET(reg) ;\ - op ft6, __z_arch_esf_t_ft6_OFFSET(reg) ;\ - op ft7, __z_arch_esf_t_ft7_OFFSET(reg) ;\ - op ft8, __z_arch_esf_t_ft8_OFFSET(reg) ;\ - op ft9, __z_arch_esf_t_ft9_OFFSET(reg) ;\ - op ft10, __z_arch_esf_t_ft10_OFFSET(reg) ;\ - op ft11, __z_arch_esf_t_ft11_OFFSET(reg) ;\ - op fa0, __z_arch_esf_t_fa0_OFFSET(reg) ;\ - op fa1, __z_arch_esf_t_fa1_OFFSET(reg) ;\ - op fa2, __z_arch_esf_t_fa2_OFFSET(reg) ;\ - op fa3, __z_arch_esf_t_fa3_OFFSET(reg) ;\ - op fa4, __z_arch_esf_t_fa4_OFFSET(reg) ;\ - op fa5, __z_arch_esf_t_fa5_OFFSET(reg) ;\ - op fa6, __z_arch_esf_t_fa6_OFFSET(reg) ;\ - op fa7, __z_arch_esf_t_fa7_OFFSET(reg) ; - +/* Convenience macro for loading/storing register states. */ #define DO_CALLER_SAVED(op) \ RV_E( op t0, __z_arch_esf_t_t0_OFFSET(sp) );\ RV_E( op t1, __z_arch_esf_t_t1_OFFSET(sp) );\ @@ -186,14 +163,67 @@ SECTION_FUNC(exception.entry, _isr_wrapper) csrr t2, mstatus sr t2, __z_arch_esf_t_mstatus_OFFSET(sp) -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) - /* Assess whether floating-point registers need to be saved. */ - li t1, MSTATUS_FS_INIT - and t0, t2, t1 - beqz t0, skip_store_fp_caller_saved - DO_FP_CALLER_SAVED(fsr, sp) -skip_store_fp_caller_saved: -#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */ +#if defined(CONFIG_FPU_SHARING) + /* determine if this is an Illegal Instruction exception */ + csrr t0, mcause + li t1, 2 /* 2 = illegal instruction */ + bne t0, t1, no_fp + /* determine if FPU access was disabled */ + csrr t0, mstatus + li t1, MSTATUS_FS + and t0, t0, t1 + bnez t0, no_fp + /* determine if we trapped on an FP instruction. */ + csrr t2, mtval /* get faulting instruction */ + andi t0, t2, 0x7f /* keep only the opcode bits */ + xori t1, t0, 0b1010011 /* OP-FP */ + beqz t1, is_fp + ori t0, t0, 0b0100000 + xori t1, t0, 0b0100111 /* LOAD-FP / STORE-FP */ +#if !defined(CONFIG_RISCV_ISA_EXT_C) + bnez t1, no_fp +#else + beqz t1, is_fp + /* remaining non RVC (0b11) and RVC with 0b01 are not FP instructions */ + andi t1, t0, 1 + bnez t1, no_fp + /* + * 001...........00 = C.FLD RV32/64 (RV128 = C.LQ) + * 001...........10 = C.FLDSP RV32/64 (RV128 = C.LQSP) + * 011...........00 = C.FLW RV32 (RV64/128 = C.LD) + * 011...........10 = C.FLWSPP RV32 (RV64/128 = C.LDSP) + * 101...........00 = C.FSD RV32/64 (RV128 = C.SQ) + * 101...........10 = C.FSDSP RV32/64 (RV128 = C.SQSP) + * 111...........00 = C.FSW RV32 (RV64/128 = C.SD) + * 111...........10 = C.FSWSP RV32 (RV64/128 = C.SDSP) + * + * so must be .01............. on RV64 and ..1............. on RV32. + */ + srli t0, t2, 8 +#if defined(CONFIG_64BIT) + andi t1, t0, 0b01100000 + xori t1, t1, 0b00100000 + bnez t1, no_fp +#else + andi t1, t0, 0b00100000 + beqz t1, no_fp +#endif +#endif /* CONFIG_RISCV_ISA_EXT_C */ + +is_fp: /* Process the FP trap and quickly return from exception */ + la ra, fp_trap_exit + mv a0, sp + tail z_riscv_fpu_trap + +no_fp: /* increment _current->arch.exception_depth */ + lr t0, ___cpu_t_current_OFFSET(s0) + lb t1, _thread_offset_to_exception_depth(t0) + add t1, t1, 1 + sb t1, _thread_offset_to_exception_depth(t0) + + /* configure the FPU for exception mode */ + call z_riscv_fpu_enter_exc +#endif #ifdef CONFIG_RISCV_SOC_CONTEXT_SAVE /* Handle context saving at SOC level. */ @@ -528,10 +558,8 @@ reschedule: z_riscv_thread_start: might_have_rescheduled: -#ifdef CONFIG_SMP - /* reload s0 with &_current_cpu as it might have changed */ + /* reload s0 with &_current_cpu as it might have changed or be unset */ get_current_cpu s0 -#endif no_reschedule: @@ -541,32 +569,24 @@ no_reschedule: jal ra, __soc_restore_context #endif /* CONFIG_RISCV_SOC_CONTEXT_SAVE */ - /* Restore MEPC register */ +#if defined(CONFIG_FPU_SHARING) + /* FPU handling upon exception mode exit */ + mv a0, sp + call z_riscv_fpu_exit_exc + + /* decrement _current->arch.exception_depth */ + lr t0, ___cpu_t_current_OFFSET(s0) + lb t1, _thread_offset_to_exception_depth(t0) + add t1, t1, -1 + sb t1, _thread_offset_to_exception_depth(t0) +fp_trap_exit: +#endif + + /* Restore MEPC and MSTATUS registers */ lr t0, __z_arch_esf_t_mepc_OFFSET(sp) - csrw mepc, t0 - - /* Restore MSTATUS register */ lr t2, __z_arch_esf_t_mstatus_OFFSET(sp) - csrrw t0, mstatus, t2 - -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) - /* - * Determine if we need to restore FP regs based on the previous - * (before the csr above) mstatus value available in t0. - */ - li t1, MSTATUS_FS_INIT - and t0, t0, t1 - beqz t0, no_fp - - /* make sure FP is enabled in the restored mstatus */ - csrs mstatus, t1 - DO_FP_CALLER_SAVED(flr, sp) - j 1f - -no_fp: /* make sure this is reflected in the restored mstatus */ - csrc mstatus, t1 -1: -#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */ + csrw mepc, t0 + csrw mstatus, t2 #ifdef CONFIG_USERSPACE /* diff --git a/arch/riscv/core/offsets/offsets.c b/arch/riscv/core/offsets/offsets.c index 320fc55faae..7730138a376 100644 --- a/arch/riscv/core/offsets/offsets.c +++ b/arch/riscv/core/offsets/offsets.c @@ -43,21 +43,48 @@ GEN_OFFSET_SYM(_callee_saved_t, s10); GEN_OFFSET_SYM(_callee_saved_t, s11); #endif /* !CONFIG_RISCV_ISA_RV32E */ -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) -GEN_OFFSET_SYM(_callee_saved_t, fcsr); -GEN_OFFSET_SYM(_callee_saved_t, fs0); -GEN_OFFSET_SYM(_callee_saved_t, fs1); -GEN_OFFSET_SYM(_callee_saved_t, fs2); -GEN_OFFSET_SYM(_callee_saved_t, fs3); -GEN_OFFSET_SYM(_callee_saved_t, fs4); -GEN_OFFSET_SYM(_callee_saved_t, fs5); -GEN_OFFSET_SYM(_callee_saved_t, fs6); -GEN_OFFSET_SYM(_callee_saved_t, fs7); -GEN_OFFSET_SYM(_callee_saved_t, fs8); -GEN_OFFSET_SYM(_callee_saved_t, fs9); -GEN_OFFSET_SYM(_callee_saved_t, fs10); -GEN_OFFSET_SYM(_callee_saved_t, fs11); -#endif +#if defined(CONFIG_FPU_SHARING) + +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa0); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa1); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa2); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa3); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa4); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa5); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa6); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fa7); + +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft0); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft1); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft2); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft3); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft4); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft5); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft6); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft7); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft8); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft9); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft10); +GEN_OFFSET_SYM(z_riscv_fp_context_t, ft11); + +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs0); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs1); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs2); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs3); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs4); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs5); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs6); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs7); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs8); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs9); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs10); +GEN_OFFSET_SYM(z_riscv_fp_context_t, fs11); + +GEN_OFFSET_SYM(z_riscv_fp_context_t, fcsr); + +GEN_OFFSET_SYM(_thread_arch_t, exception_depth); + +#endif /* CONFIG_FPU_SHARING */ /* esf member offsets */ GEN_OFFSET_SYM(z_arch_esf_t, ra); @@ -89,29 +116,6 @@ GEN_OFFSET_SYM(z_arch_esf_t, s0); GEN_OFFSET_SYM(z_arch_esf_t, sp); #endif -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) -GEN_OFFSET_SYM(z_arch_esf_t, ft0); -GEN_OFFSET_SYM(z_arch_esf_t, ft1); -GEN_OFFSET_SYM(z_arch_esf_t, ft2); -GEN_OFFSET_SYM(z_arch_esf_t, ft3); -GEN_OFFSET_SYM(z_arch_esf_t, ft4); -GEN_OFFSET_SYM(z_arch_esf_t, ft5); -GEN_OFFSET_SYM(z_arch_esf_t, ft6); -GEN_OFFSET_SYM(z_arch_esf_t, ft7); -GEN_OFFSET_SYM(z_arch_esf_t, ft8); -GEN_OFFSET_SYM(z_arch_esf_t, ft9); -GEN_OFFSET_SYM(z_arch_esf_t, ft10); -GEN_OFFSET_SYM(z_arch_esf_t, ft11); -GEN_OFFSET_SYM(z_arch_esf_t, fa0); -GEN_OFFSET_SYM(z_arch_esf_t, fa1); -GEN_OFFSET_SYM(z_arch_esf_t, fa2); -GEN_OFFSET_SYM(z_arch_esf_t, fa3); -GEN_OFFSET_SYM(z_arch_esf_t, fa4); -GEN_OFFSET_SYM(z_arch_esf_t, fa5); -GEN_OFFSET_SYM(z_arch_esf_t, fa6); -GEN_OFFSET_SYM(z_arch_esf_t, fa7); -#endif - #if defined(CONFIG_RISCV_SOC_CONTEXT_SAVE) GEN_OFFSET_SYM(z_arch_esf_t, soc_context); #endif diff --git a/arch/riscv/core/smp.c b/arch/riscv/core/smp.c index 8b5e6919327..ac9e5f60c47 100644 --- a/arch/riscv/core/smp.c +++ b/arch/riscv/core/smp.c @@ -60,6 +60,7 @@ void z_riscv_secondary_cpu_init(int cpu_num) static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS]; #define IPI_SCHED BIT(0) +#define IPI_FPU_FLUSH BIT(1) void arch_sched_ipi(void) { @@ -77,6 +78,14 @@ void arch_sched_ipi(void) arch_irq_unlock(key); } +#ifdef CONFIG_FPU_SHARING +void z_riscv_flush_fpu_ipi(unsigned int cpu) +{ + atomic_or(&cpu_pending_ipi[cpu], IPI_FPU_FLUSH); + MSIP(_kernel.cpus[cpu].arch.hartid) = 1; +} +#endif + static void ipi_handler(const void *unused) { ARG_UNUSED(unused); @@ -88,6 +97,18 @@ static void ipi_handler(const void *unused) if (pending_ipi & IPI_SCHED) { z_sched_ipi(); } +#ifdef CONFIG_FPU_SHARING + if (pending_ipi & IPI_FPU_FLUSH) { + /* disable IRQs */ + csr_clear(mstatus, MSTATUS_IEN); + /* perform the flush */ + z_riscv_flush_local_fpu(); + /* + * No need to re-enable IRQs here as long as + * this remains the last case. + */ + } +#endif } static int riscv_smp_init(const struct device *dev) diff --git a/arch/riscv/core/switch.S b/arch/riscv/core/switch.S index 375fc25f4c5..d177d92c848 100644 --- a/arch/riscv/core/switch.S +++ b/arch/riscv/core/switch.S @@ -29,23 +29,10 @@ RV_I( op s10, _thread_offset_to_s10(reg) );\ RV_I( op s11, _thread_offset_to_s11(reg) ) -#define DO_FP_CALLEE_SAVED(op, reg) \ - op fs0, _thread_offset_to_fs0(reg) ;\ - op fs1, _thread_offset_to_fs1(reg) ;\ - op fs2, _thread_offset_to_fs2(reg) ;\ - op fs3, _thread_offset_to_fs3(reg) ;\ - op fs4, _thread_offset_to_fs4(reg) ;\ - op fs5, _thread_offset_to_fs5(reg) ;\ - op fs6, _thread_offset_to_fs6(reg) ;\ - op fs7, _thread_offset_to_fs7(reg) ;\ - op fs8, _thread_offset_to_fs8(reg) ;\ - op fs9, _thread_offset_to_fs9(reg) ;\ - op fs10, _thread_offset_to_fs10(reg) ;\ - op fs11, _thread_offset_to_fs11(reg) - GTEXT(z_riscv_switch) GTEXT(z_thread_mark_switched_in) GTEXT(z_riscv_configure_stack_guard) +GTEXT(z_riscv_fpu_thread_context_switch) /* void z_riscv_switch(k_thread_t *switch_to, k_thread_t *switch_from) */ SECTION_FUNC(TEXT, z_riscv_switch) @@ -53,18 +40,6 @@ SECTION_FUNC(TEXT, z_riscv_switch) /* Save the old thread's callee-saved registers */ DO_CALLEE_SAVED(sr, a1) -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) - /* Assess whether floating-point registers need to be saved. */ - lb t0, _thread_offset_to_user_options(a1) - andi t0, t0, K_FP_REGS - beqz t0, skip_store_fp_callee_saved - - frcsr t0 - sw t0, _thread_offset_to_fcsr(a1) - DO_FP_CALLEE_SAVED(fsr, a1) -skip_store_fp_callee_saved: -#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */ - /* Save the old thread's stack pointer */ sr sp, _thread_offset_to_sp(a1) @@ -79,11 +54,15 @@ skip_store_fp_callee_saved: lr tp, _thread_offset_to_tls(a0) #endif +#if defined(CONFIG_FPU_SHARING) + /* Preserve a0 across following call. s0 is not yet restored. */ + mv s0, a0 + call z_riscv_fpu_thread_context_switch + mv a0, s0 +#endif + #if defined(CONFIG_PMP_STACK_GUARD) - /* - * Stack guard has priority over user space for PMP usage. - * Preserve a0 across following call. s0 is not yet restored. - */ + /* Stack guard has priority over user space for PMP usage. */ mv s0, a0 call z_riscv_pmp_stackguard_enable mv a0, s0 @@ -111,27 +90,5 @@ not_user_task: /* Restore the new thread's callee-saved registers */ DO_CALLEE_SAVED(lr, a0) -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) - /* Determine if we need to restore floating-point registers. */ - lb t0, _thread_offset_to_user_options(a0) - li t1, MSTATUS_FS_INIT - andi t0, t0, K_FP_REGS - beqz t0, no_fp - - /* Enable floating point access */ - csrs mstatus, t1 - - /* Restore FP regs */ - lw t1, _thread_offset_to_fcsr(a0) - fscsr t1 - DO_FP_CALLEE_SAVED(flr, a0) - j 1f - -no_fp: - /* Disable floating point access */ - csrc mstatus, t1 -1: -#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */ - /* Return to arch_switch() or _irq_wrapper() */ ret diff --git a/arch/riscv/core/thread.c b/arch/riscv/core/thread.c index 7ef3c6a67dd..dd15617748f 100644 --- a/arch/riscv/core/thread.c +++ b/arch/riscv/core/thread.c @@ -65,12 +65,9 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack, */ stack_init->mstatus = MSTATUS_DEF_RESTORE; -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) - /* Shared FP mode: enable FPU of threads with K_FP_REGS. */ - if ((thread->base.user_options & K_FP_REGS) != 0) { - stack_init->mstatus |= MSTATUS_FS_INIT; - } - thread->callee_saved.fcsr = 0; +#if defined(CONFIG_FPU_SHARING) + /* thread birth happens through the exception return path */ + thread->arch.exception_depth = 1; #elif defined(CONFIG_FPU) /* Unshared FP mode: enable FPU of each thread. */ stack_init->mstatus |= MSTATUS_FS_INIT; @@ -118,72 +115,6 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack, thread->switch_handle = thread; } -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) -int arch_float_disable(struct k_thread *thread) -{ - unsigned int key; - - if (thread != _current) { - return -EINVAL; - } - - if (arch_is_in_isr()) { - return -EINVAL; - } - - /* Ensure a preemptive context switch does not occur */ - key = irq_lock(); - - /* Disable all floating point capabilities for the thread */ - thread->base.user_options &= ~K_FP_REGS; - - /* Clear the FS bits to disable the FPU. */ - __asm__ volatile ( - "mv t0, %0\n" - "csrrc x0, mstatus, t0\n" - : - : "r" (MSTATUS_FS_MASK) - ); - - irq_unlock(key); - - return 0; -} - - -int arch_float_enable(struct k_thread *thread, unsigned int options) -{ - unsigned int key; - - if (thread != _current) { - return -EINVAL; - } - - if (arch_is_in_isr()) { - return -EINVAL; - } - - /* Ensure a preemptive context switch does not occur */ - key = irq_lock(); - - /* Enable all floating point capabilities for the thread. */ - thread->base.user_options |= K_FP_REGS; - - /* Set the FS bits to Initial and clear the fcsr to enable the FPU. */ - __asm__ volatile ( - "mv t0, %0\n" - "csrrs x0, mstatus, t0\n" - "fscsr x0, x0\n" - : - : "r" (MSTATUS_FS_INIT) - ); - - irq_unlock(key); - - return 0; -} -#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */ - #ifdef CONFIG_USERSPACE /* diff --git a/arch/riscv/include/offsets_short_arch.h b/arch/riscv/include/offsets_short_arch.h index 8edb73f884d..18933413b69 100644 --- a/arch/riscv/include/offsets_short_arch.h +++ b/arch/riscv/include/offsets_short_arch.h @@ -57,48 +57,12 @@ #define _thread_offset_to_swap_return_value \ (___thread_t_arch_OFFSET + ___thread_arch_t_swap_return_value_OFFSET) -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) +#if defined(CONFIG_FPU_SHARING) -#define _thread_offset_to_fcsr \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fcsr_OFFSET) +#define _thread_offset_to_exception_depth \ + (___thread_t_arch_OFFSET + ___thread_arch_t_exception_depth_OFFSET) -#define _thread_offset_to_fs0 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs0_OFFSET) - -#define _thread_offset_to_fs1 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs1_OFFSET) - -#define _thread_offset_to_fs2 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs2_OFFSET) - -#define _thread_offset_to_fs3 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs3_OFFSET) - -#define _thread_offset_to_fs4 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs4_OFFSET) - -#define _thread_offset_to_fs5 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs5_OFFSET) - -#define _thread_offset_to_fs6 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs6_OFFSET) - -#define _thread_offset_to_fs7 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs7_OFFSET) - -#define _thread_offset_to_fs8 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs8_OFFSET) - -#define _thread_offset_to_fs9 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs9_OFFSET) - -#define _thread_offset_to_fs10 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs10_OFFSET) - -#define _thread_offset_to_fs11 \ - (___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs11_OFFSET) - -#endif /* defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) */ +#endif #ifdef CONFIG_USERSPACE diff --git a/doc/kernel/services/other/float.rst b/doc/kernel/services/other/float.rst index 902295a70d2..8a649e66369 100644 --- a/doc/kernel/services/other/float.rst +++ b/doc/kernel/services/other/float.rst @@ -190,34 +190,25 @@ of stack space is required to load and store floating point registers. RISC-V architecture ------------------- -On the RISC-V architecture, the kernel treats each thread as a non-user -or FPU user and the thread must be tagged by one of the -following techniques: +On the RISC-V architecture the kernel treats each thread as an FPU +user on a case-by-case basis with the FPU access allocated on demand. +A "lazy save" algorithm is used during context switching which updates +the floating point registers only when it is absolutely necessary. +For example, the FPU registers are *not* saved when switching from an +FPU user to a non-user thread (or an FPU user that doesn't touch the FPU +during its scheduling slot), and then back to the original FPU user. -* A statically-created RISC-V thread can be tagged by passing the - :c:macro:`K_FP_REGS` option to :c:macro:`K_THREAD_DEFINE`. +FPU register usage by ISRs is supported although not recommended. When an +ISR uses floating point or SIMD registers, then the access is trapped, the +current FPU user context is saved in the thread object and the ISR is resumed +with interrupts disabled so to prevent another IRQ from interrupting the ISR +and potentially requesting FPU usage. Because ISR don't have a persistent +register context, there are no provision for saving an ISR's FPU context +either, hence the IRQ disabling. -* A dynamically-created RISC-V thread can be tagged by passing the - :c:macro:`K_FP_REGS` to :c:func:`k_thread_create`. - -* A running RISC-V thread can be tagged by calling :c:func:`k_float_enable`. - This function can only be called from the thread itself. - -If a RISC-V thread no longer requires the use of the floating point registers, -it can call :c:func:`k_float_disable`. This instructs the kernel not to -save or restore its FP context during thread context switching. This function -can only be called from the thread itself. - -During thread context switching the RISC-V kernel saves the *callee-saved* -floating point registers, if the switched-out thread is tagged with -:c:macro:`K_FP_REGS`. Additionally, the *caller-saved* floating point -registers are saved on the thread's stack. If the switched-in thread has been -tagged with :c:macro:`K_FP_REGS`, then the kernel restores the *callee-saved* -FP registers of the switched-in thread and the *caller-saved* FP context is -restored from the thread's stack. Thus, the kernel does not save or restore the -FP context of threads that are not using the FP registers. An extra 84 bytes -(single floating point hardware) or 164 bytes (double floating point hardware) -of stack space is required to load and store floating point registers. +Each thread object becomes 136 bytes (single-precision floating point +hardware) or 264 bytes (double-precision floating point hardware) larger +when Shared FP registers mode is enabled. SPARC architecture ------------------ diff --git a/include/zephyr/arch/riscv/arch.h b/include/zephyr/arch/riscv/arch.h index aafe6b00e5f..da636a55fe4 100644 --- a/include/zephyr/arch/riscv/arch.h +++ b/include/zephyr/arch/riscv/arch.h @@ -149,9 +149,11 @@ #define MSTATUS_IEN (1UL << 3) #define MSTATUS_MPP_M (3UL << 11) #define MSTATUS_MPIE_EN (1UL << 7) -#define MSTATUS_FS_INIT (1UL << 13) -#define MSTATUS_FS_MASK ((1UL << 13) | (1UL << 14)) +#define MSTATUS_FS_OFF (0UL << 13) +#define MSTATUS_FS_INIT (1UL << 13) +#define MSTATUS_FS_CLEAN (2UL << 13) +#define MSTATUS_FS_DIRTY (3UL << 13) /* This comes from openisa_rv32m1, but doesn't seem to hurt on other * platforms: diff --git a/include/zephyr/arch/riscv/exp.h b/include/zephyr/arch/riscv/exp.h index ad53af7e287..e661e5aa86e 100644 --- a/include/zephyr/arch/riscv/exp.h +++ b/include/zephyr/arch/riscv/exp.h @@ -45,14 +45,6 @@ struct soc_esf { }; #endif -#if !defined(RV_FP_TYPE) && defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) -#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION -#define RV_FP_TYPE uint64_t -#else -#define RV_FP_TYPE uint32_t -#endif -#endif - #if defined(CONFIG_RISCV_SOC_HAS_ISR_STACKING) SOC_ISR_STACKING_ESF_DECLARE; #else @@ -89,29 +81,6 @@ struct __esf { unsigned long sp; /* preserved (user or kernel) stack pointer */ #endif -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) - RV_FP_TYPE ft0; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft1; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft2; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft3; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft4; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft5; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft6; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft7; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft8; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft9; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft10; /* Caller-saved temporary floating register */ - RV_FP_TYPE ft11; /* Caller-saved temporary floating register */ - RV_FP_TYPE fa0; /* function argument/return value */ - RV_FP_TYPE fa1; /* function argument/return value */ - RV_FP_TYPE fa2; /* function argument */ - RV_FP_TYPE fa3; /* function argument */ - RV_FP_TYPE fa4; /* function argument */ - RV_FP_TYPE fa5; /* function argument */ - RV_FP_TYPE fa6; /* function argument */ - RV_FP_TYPE fa7; /* function argument */ -#endif - #ifdef CONFIG_RISCV_SOC_CONTEXT_SAVE struct soc_esf soc_context; #endif diff --git a/include/zephyr/arch/riscv/structs.h b/include/zephyr/arch/riscv/structs.h index 4a1260ba525..60d99f94ec5 100644 --- a/include/zephyr/arch/riscv/structs.h +++ b/include/zephyr/arch/riscv/structs.h @@ -18,6 +18,10 @@ struct _cpu_arch { unsigned long hartid; bool online; #endif +#ifdef CONFIG_FPU_SHARING + atomic_ptr_val_t fpu_owner; + uint32_t fpu_state; +#endif }; #endif /* ZEPHYR_INCLUDE_RISCV_STRUCTS_H_ */ diff --git a/include/zephyr/arch/riscv/thread.h b/include/zephyr/arch/riscv/thread.h index d0ff1294045..6ad48803511 100644 --- a/include/zephyr/arch/riscv/thread.h +++ b/include/zephyr/arch/riscv/thread.h @@ -22,14 +22,6 @@ #ifndef _ASMLANGUAGE #include -#if !defined(RV_FP_TYPE) && defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) -#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION -#define RV_FP_TYPE uint64_t -#else -#define RV_FP_TYPE uint32_t -#endif -#endif - /* * The following structure defines the list of registers that need to be * saved/restored when a context switch occurs. @@ -52,28 +44,32 @@ struct _callee_saved { unsigned long s10; /* saved register */ unsigned long s11; /* saved register */ #endif - -#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) - uint32_t fcsr; /* Control and status register */ - RV_FP_TYPE fs0; /* saved floating-point register */ - RV_FP_TYPE fs1; /* saved floating-point register */ - RV_FP_TYPE fs2; /* saved floating-point register */ - RV_FP_TYPE fs3; /* saved floating-point register */ - RV_FP_TYPE fs4; /* saved floating-point register */ - RV_FP_TYPE fs5; /* saved floating-point register */ - RV_FP_TYPE fs6; /* saved floating-point register */ - RV_FP_TYPE fs7; /* saved floating-point register */ - RV_FP_TYPE fs8; /* saved floating-point register */ - RV_FP_TYPE fs9; /* saved floating-point register */ - RV_FP_TYPE fs10; /* saved floating-point register */ - RV_FP_TYPE fs11; /* saved floating-point register */ -#endif }; typedef struct _callee_saved _callee_saved_t; +#if !defined(RV_FP_TYPE) +#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION +#define RV_FP_TYPE uint64_t +#else +#define RV_FP_TYPE uint32_t +#endif +#endif + +struct z_riscv_fp_context { + RV_FP_TYPE fa0, fa1, fa2, fa3, fa4, fa5, fa6, fa7; + RV_FP_TYPE ft0, ft1, ft2, ft3, ft4, ft5, ft6, ft7, ft8, ft9, ft10, ft11; + RV_FP_TYPE fs0, fs1, fs2, fs3, fs4, fs5, fs6, fs7, fs8, fs9, fs10, fs11; + uint32_t fcsr; +}; +typedef struct z_riscv_fp_context z_riscv_fp_context_t; + #define PMP_M_MODE_SLOTS 8 /* 8 is plenty enough for m-mode */ struct _thread_arch { +#ifdef CONFIG_FPU_SHARING + struct z_riscv_fp_context saved_fp_context; + uint8_t exception_depth; +#endif #ifdef CONFIG_USERSPACE unsigned long priv_stack_start; unsigned long u_mode_pmpaddr_regs[CONFIG_PMP_SLOTS];