riscv: integrate the new FPU context switching support
FPU context switching is always performed on demand through the FPU access exception handler. Actual task switching only grants or denies FPU access depending on the current FPU owner. Because RISC-V doesn't have a dedicated FPU access exception, we must catch the Illegal Instruction exception and look for actual FP opcodes. There is no longer a need to allocate FPU storage on the stack for every exception making esf smaller and stack overflows less likely. Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
parent
cb4c0f6c94
commit
ff07da6ff1
12 changed files with 204 additions and 344 deletions
|
@ -18,6 +18,7 @@ if ((CONFIG_MP_MAX_NUM_CPUS GREATER 1) OR (CONFIG_SMP))
|
|||
zephyr_library_sources(smp.c)
|
||||
endif ()
|
||||
|
||||
zephyr_library_sources_ifdef(CONFIG_FPU_SHARING fpu.c fpu.S)
|
||||
zephyr_library_sources_ifdef(CONFIG_DEBUG_COREDUMP coredump.c)
|
||||
zephyr_library_sources_ifdef(CONFIG_IRQ_OFFLOAD irq_offload.c)
|
||||
zephyr_library_sources_ifdef(CONFIG_RISCV_PMP pmp.c pmp.S)
|
||||
|
|
|
@ -21,30 +21,7 @@
|
|||
#include <soc_isr_stacking.h>
|
||||
#endif
|
||||
|
||||
/* Convenience macros for loading/storing register states. */
|
||||
|
||||
#define DO_FP_CALLER_SAVED(op, reg) \
|
||||
op ft0, __z_arch_esf_t_ft0_OFFSET(reg) ;\
|
||||
op ft1, __z_arch_esf_t_ft1_OFFSET(reg) ;\
|
||||
op ft2, __z_arch_esf_t_ft2_OFFSET(reg) ;\
|
||||
op ft3, __z_arch_esf_t_ft3_OFFSET(reg) ;\
|
||||
op ft4, __z_arch_esf_t_ft4_OFFSET(reg) ;\
|
||||
op ft5, __z_arch_esf_t_ft5_OFFSET(reg) ;\
|
||||
op ft6, __z_arch_esf_t_ft6_OFFSET(reg) ;\
|
||||
op ft7, __z_arch_esf_t_ft7_OFFSET(reg) ;\
|
||||
op ft8, __z_arch_esf_t_ft8_OFFSET(reg) ;\
|
||||
op ft9, __z_arch_esf_t_ft9_OFFSET(reg) ;\
|
||||
op ft10, __z_arch_esf_t_ft10_OFFSET(reg) ;\
|
||||
op ft11, __z_arch_esf_t_ft11_OFFSET(reg) ;\
|
||||
op fa0, __z_arch_esf_t_fa0_OFFSET(reg) ;\
|
||||
op fa1, __z_arch_esf_t_fa1_OFFSET(reg) ;\
|
||||
op fa2, __z_arch_esf_t_fa2_OFFSET(reg) ;\
|
||||
op fa3, __z_arch_esf_t_fa3_OFFSET(reg) ;\
|
||||
op fa4, __z_arch_esf_t_fa4_OFFSET(reg) ;\
|
||||
op fa5, __z_arch_esf_t_fa5_OFFSET(reg) ;\
|
||||
op fa6, __z_arch_esf_t_fa6_OFFSET(reg) ;\
|
||||
op fa7, __z_arch_esf_t_fa7_OFFSET(reg) ;
|
||||
|
||||
/* Convenience macro for loading/storing register states. */
|
||||
#define DO_CALLER_SAVED(op) \
|
||||
RV_E( op t0, __z_arch_esf_t_t0_OFFSET(sp) );\
|
||||
RV_E( op t1, __z_arch_esf_t_t1_OFFSET(sp) );\
|
||||
|
@ -186,14 +163,67 @@ SECTION_FUNC(exception.entry, _isr_wrapper)
|
|||
csrr t2, mstatus
|
||||
sr t2, __z_arch_esf_t_mstatus_OFFSET(sp)
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
/* Assess whether floating-point registers need to be saved. */
|
||||
li t1, MSTATUS_FS_INIT
|
||||
and t0, t2, t1
|
||||
beqz t0, skip_store_fp_caller_saved
|
||||
DO_FP_CALLER_SAVED(fsr, sp)
|
||||
skip_store_fp_caller_saved:
|
||||
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
|
||||
#if defined(CONFIG_FPU_SHARING)
|
||||
/* determine if this is an Illegal Instruction exception */
|
||||
csrr t0, mcause
|
||||
li t1, 2 /* 2 = illegal instruction */
|
||||
bne t0, t1, no_fp
|
||||
/* determine if FPU access was disabled */
|
||||
csrr t0, mstatus
|
||||
li t1, MSTATUS_FS
|
||||
and t0, t0, t1
|
||||
bnez t0, no_fp
|
||||
/* determine if we trapped on an FP instruction. */
|
||||
csrr t2, mtval /* get faulting instruction */
|
||||
andi t0, t2, 0x7f /* keep only the opcode bits */
|
||||
xori t1, t0, 0b1010011 /* OP-FP */
|
||||
beqz t1, is_fp
|
||||
ori t0, t0, 0b0100000
|
||||
xori t1, t0, 0b0100111 /* LOAD-FP / STORE-FP */
|
||||
#if !defined(CONFIG_RISCV_ISA_EXT_C)
|
||||
bnez t1, no_fp
|
||||
#else
|
||||
beqz t1, is_fp
|
||||
/* remaining non RVC (0b11) and RVC with 0b01 are not FP instructions */
|
||||
andi t1, t0, 1
|
||||
bnez t1, no_fp
|
||||
/*
|
||||
* 001...........00 = C.FLD RV32/64 (RV128 = C.LQ)
|
||||
* 001...........10 = C.FLDSP RV32/64 (RV128 = C.LQSP)
|
||||
* 011...........00 = C.FLW RV32 (RV64/128 = C.LD)
|
||||
* 011...........10 = C.FLWSPP RV32 (RV64/128 = C.LDSP)
|
||||
* 101...........00 = C.FSD RV32/64 (RV128 = C.SQ)
|
||||
* 101...........10 = C.FSDSP RV32/64 (RV128 = C.SQSP)
|
||||
* 111...........00 = C.FSW RV32 (RV64/128 = C.SD)
|
||||
* 111...........10 = C.FSWSP RV32 (RV64/128 = C.SDSP)
|
||||
*
|
||||
* so must be .01............. on RV64 and ..1............. on RV32.
|
||||
*/
|
||||
srli t0, t2, 8
|
||||
#if defined(CONFIG_64BIT)
|
||||
andi t1, t0, 0b01100000
|
||||
xori t1, t1, 0b00100000
|
||||
bnez t1, no_fp
|
||||
#else
|
||||
andi t1, t0, 0b00100000
|
||||
beqz t1, no_fp
|
||||
#endif
|
||||
#endif /* CONFIG_RISCV_ISA_EXT_C */
|
||||
|
||||
is_fp: /* Process the FP trap and quickly return from exception */
|
||||
la ra, fp_trap_exit
|
||||
mv a0, sp
|
||||
tail z_riscv_fpu_trap
|
||||
|
||||
no_fp: /* increment _current->arch.exception_depth */
|
||||
lr t0, ___cpu_t_current_OFFSET(s0)
|
||||
lb t1, _thread_offset_to_exception_depth(t0)
|
||||
add t1, t1, 1
|
||||
sb t1, _thread_offset_to_exception_depth(t0)
|
||||
|
||||
/* configure the FPU for exception mode */
|
||||
call z_riscv_fpu_enter_exc
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RISCV_SOC_CONTEXT_SAVE
|
||||
/* Handle context saving at SOC level. */
|
||||
|
@ -528,10 +558,8 @@ reschedule:
|
|||
|
||||
z_riscv_thread_start:
|
||||
might_have_rescheduled:
|
||||
#ifdef CONFIG_SMP
|
||||
/* reload s0 with &_current_cpu as it might have changed */
|
||||
/* reload s0 with &_current_cpu as it might have changed or be unset */
|
||||
get_current_cpu s0
|
||||
#endif
|
||||
|
||||
no_reschedule:
|
||||
|
||||
|
@ -541,32 +569,24 @@ no_reschedule:
|
|||
jal ra, __soc_restore_context
|
||||
#endif /* CONFIG_RISCV_SOC_CONTEXT_SAVE */
|
||||
|
||||
/* Restore MEPC register */
|
||||
#if defined(CONFIG_FPU_SHARING)
|
||||
/* FPU handling upon exception mode exit */
|
||||
mv a0, sp
|
||||
call z_riscv_fpu_exit_exc
|
||||
|
||||
/* decrement _current->arch.exception_depth */
|
||||
lr t0, ___cpu_t_current_OFFSET(s0)
|
||||
lb t1, _thread_offset_to_exception_depth(t0)
|
||||
add t1, t1, -1
|
||||
sb t1, _thread_offset_to_exception_depth(t0)
|
||||
fp_trap_exit:
|
||||
#endif
|
||||
|
||||
/* Restore MEPC and MSTATUS registers */
|
||||
lr t0, __z_arch_esf_t_mepc_OFFSET(sp)
|
||||
csrw mepc, t0
|
||||
|
||||
/* Restore MSTATUS register */
|
||||
lr t2, __z_arch_esf_t_mstatus_OFFSET(sp)
|
||||
csrrw t0, mstatus, t2
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
/*
|
||||
* Determine if we need to restore FP regs based on the previous
|
||||
* (before the csr above) mstatus value available in t0.
|
||||
*/
|
||||
li t1, MSTATUS_FS_INIT
|
||||
and t0, t0, t1
|
||||
beqz t0, no_fp
|
||||
|
||||
/* make sure FP is enabled in the restored mstatus */
|
||||
csrs mstatus, t1
|
||||
DO_FP_CALLER_SAVED(flr, sp)
|
||||
j 1f
|
||||
|
||||
no_fp: /* make sure this is reflected in the restored mstatus */
|
||||
csrc mstatus, t1
|
||||
1:
|
||||
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
|
||||
csrw mepc, t0
|
||||
csrw mstatus, t2
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/*
|
||||
|
|
|
@ -43,21 +43,48 @@ GEN_OFFSET_SYM(_callee_saved_t, s10);
|
|||
GEN_OFFSET_SYM(_callee_saved_t, s11);
|
||||
#endif /* !CONFIG_RISCV_ISA_RV32E */
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fcsr);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs0);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs1);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs2);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs3);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs4);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs5);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs6);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs7);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs8);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs9);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs10);
|
||||
GEN_OFFSET_SYM(_callee_saved_t, fs11);
|
||||
#endif
|
||||
#if defined(CONFIG_FPU_SHARING)
|
||||
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa0);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa1);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa2);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa3);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa4);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa5);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa6);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fa7);
|
||||
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft0);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft1);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft2);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft3);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft4);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft5);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft6);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft7);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft8);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft9);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft10);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, ft11);
|
||||
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs0);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs1);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs2);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs3);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs4);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs5);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs6);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs7);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs8);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs9);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs10);
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fs11);
|
||||
|
||||
GEN_OFFSET_SYM(z_riscv_fp_context_t, fcsr);
|
||||
|
||||
GEN_OFFSET_SYM(_thread_arch_t, exception_depth);
|
||||
|
||||
#endif /* CONFIG_FPU_SHARING */
|
||||
|
||||
/* esf member offsets */
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ra);
|
||||
|
@ -89,29 +116,6 @@ GEN_OFFSET_SYM(z_arch_esf_t, s0);
|
|||
GEN_OFFSET_SYM(z_arch_esf_t, sp);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft0);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft1);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft2);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft3);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft4);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft5);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft6);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft7);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft8);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft9);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft10);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, ft11);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa0);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa1);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa2);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa3);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa4);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa5);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa6);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, fa7);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_RISCV_SOC_CONTEXT_SAVE)
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, soc_context);
|
||||
#endif
|
||||
|
|
|
@ -60,6 +60,7 @@ void z_riscv_secondary_cpu_init(int cpu_num)
|
|||
|
||||
static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS];
|
||||
#define IPI_SCHED BIT(0)
|
||||
#define IPI_FPU_FLUSH BIT(1)
|
||||
|
||||
void arch_sched_ipi(void)
|
||||
{
|
||||
|
@ -77,6 +78,14 @@ void arch_sched_ipi(void)
|
|||
arch_irq_unlock(key);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
void z_riscv_flush_fpu_ipi(unsigned int cpu)
|
||||
{
|
||||
atomic_or(&cpu_pending_ipi[cpu], IPI_FPU_FLUSH);
|
||||
MSIP(_kernel.cpus[cpu].arch.hartid) = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void ipi_handler(const void *unused)
|
||||
{
|
||||
ARG_UNUSED(unused);
|
||||
|
@ -88,6 +97,18 @@ static void ipi_handler(const void *unused)
|
|||
if (pending_ipi & IPI_SCHED) {
|
||||
z_sched_ipi();
|
||||
}
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
if (pending_ipi & IPI_FPU_FLUSH) {
|
||||
/* disable IRQs */
|
||||
csr_clear(mstatus, MSTATUS_IEN);
|
||||
/* perform the flush */
|
||||
z_riscv_flush_local_fpu();
|
||||
/*
|
||||
* No need to re-enable IRQs here as long as
|
||||
* this remains the last case.
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int riscv_smp_init(const struct device *dev)
|
||||
|
|
|
@ -29,23 +29,10 @@
|
|||
RV_I( op s10, _thread_offset_to_s10(reg) );\
|
||||
RV_I( op s11, _thread_offset_to_s11(reg) )
|
||||
|
||||
#define DO_FP_CALLEE_SAVED(op, reg) \
|
||||
op fs0, _thread_offset_to_fs0(reg) ;\
|
||||
op fs1, _thread_offset_to_fs1(reg) ;\
|
||||
op fs2, _thread_offset_to_fs2(reg) ;\
|
||||
op fs3, _thread_offset_to_fs3(reg) ;\
|
||||
op fs4, _thread_offset_to_fs4(reg) ;\
|
||||
op fs5, _thread_offset_to_fs5(reg) ;\
|
||||
op fs6, _thread_offset_to_fs6(reg) ;\
|
||||
op fs7, _thread_offset_to_fs7(reg) ;\
|
||||
op fs8, _thread_offset_to_fs8(reg) ;\
|
||||
op fs9, _thread_offset_to_fs9(reg) ;\
|
||||
op fs10, _thread_offset_to_fs10(reg) ;\
|
||||
op fs11, _thread_offset_to_fs11(reg)
|
||||
|
||||
GTEXT(z_riscv_switch)
|
||||
GTEXT(z_thread_mark_switched_in)
|
||||
GTEXT(z_riscv_configure_stack_guard)
|
||||
GTEXT(z_riscv_fpu_thread_context_switch)
|
||||
|
||||
/* void z_riscv_switch(k_thread_t *switch_to, k_thread_t *switch_from) */
|
||||
SECTION_FUNC(TEXT, z_riscv_switch)
|
||||
|
@ -53,18 +40,6 @@ SECTION_FUNC(TEXT, z_riscv_switch)
|
|||
/* Save the old thread's callee-saved registers */
|
||||
DO_CALLEE_SAVED(sr, a1)
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
/* Assess whether floating-point registers need to be saved. */
|
||||
lb t0, _thread_offset_to_user_options(a1)
|
||||
andi t0, t0, K_FP_REGS
|
||||
beqz t0, skip_store_fp_callee_saved
|
||||
|
||||
frcsr t0
|
||||
sw t0, _thread_offset_to_fcsr(a1)
|
||||
DO_FP_CALLEE_SAVED(fsr, a1)
|
||||
skip_store_fp_callee_saved:
|
||||
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
|
||||
|
||||
/* Save the old thread's stack pointer */
|
||||
sr sp, _thread_offset_to_sp(a1)
|
||||
|
||||
|
@ -79,11 +54,15 @@ skip_store_fp_callee_saved:
|
|||
lr tp, _thread_offset_to_tls(a0)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FPU_SHARING)
|
||||
/* Preserve a0 across following call. s0 is not yet restored. */
|
||||
mv s0, a0
|
||||
call z_riscv_fpu_thread_context_switch
|
||||
mv a0, s0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PMP_STACK_GUARD)
|
||||
/*
|
||||
* Stack guard has priority over user space for PMP usage.
|
||||
* Preserve a0 across following call. s0 is not yet restored.
|
||||
*/
|
||||
/* Stack guard has priority over user space for PMP usage. */
|
||||
mv s0, a0
|
||||
call z_riscv_pmp_stackguard_enable
|
||||
mv a0, s0
|
||||
|
@ -111,27 +90,5 @@ not_user_task:
|
|||
/* Restore the new thread's callee-saved registers */
|
||||
DO_CALLEE_SAVED(lr, a0)
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
/* Determine if we need to restore floating-point registers. */
|
||||
lb t0, _thread_offset_to_user_options(a0)
|
||||
li t1, MSTATUS_FS_INIT
|
||||
andi t0, t0, K_FP_REGS
|
||||
beqz t0, no_fp
|
||||
|
||||
/* Enable floating point access */
|
||||
csrs mstatus, t1
|
||||
|
||||
/* Restore FP regs */
|
||||
lw t1, _thread_offset_to_fcsr(a0)
|
||||
fscsr t1
|
||||
DO_FP_CALLEE_SAVED(flr, a0)
|
||||
j 1f
|
||||
|
||||
no_fp:
|
||||
/* Disable floating point access */
|
||||
csrc mstatus, t1
|
||||
1:
|
||||
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
|
||||
|
||||
/* Return to arch_switch() or _irq_wrapper() */
|
||||
ret
|
||||
|
|
|
@ -65,12 +65,9 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
|||
*/
|
||||
stack_init->mstatus = MSTATUS_DEF_RESTORE;
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
/* Shared FP mode: enable FPU of threads with K_FP_REGS. */
|
||||
if ((thread->base.user_options & K_FP_REGS) != 0) {
|
||||
stack_init->mstatus |= MSTATUS_FS_INIT;
|
||||
}
|
||||
thread->callee_saved.fcsr = 0;
|
||||
#if defined(CONFIG_FPU_SHARING)
|
||||
/* thread birth happens through the exception return path */
|
||||
thread->arch.exception_depth = 1;
|
||||
#elif defined(CONFIG_FPU)
|
||||
/* Unshared FP mode: enable FPU of each thread. */
|
||||
stack_init->mstatus |= MSTATUS_FS_INIT;
|
||||
|
@ -118,72 +115,6 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
|||
thread->switch_handle = thread;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
int arch_float_disable(struct k_thread *thread)
|
||||
{
|
||||
unsigned int key;
|
||||
|
||||
if (thread != _current) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (arch_is_in_isr()) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Ensure a preemptive context switch does not occur */
|
||||
key = irq_lock();
|
||||
|
||||
/* Disable all floating point capabilities for the thread */
|
||||
thread->base.user_options &= ~K_FP_REGS;
|
||||
|
||||
/* Clear the FS bits to disable the FPU. */
|
||||
__asm__ volatile (
|
||||
"mv t0, %0\n"
|
||||
"csrrc x0, mstatus, t0\n"
|
||||
:
|
||||
: "r" (MSTATUS_FS_MASK)
|
||||
);
|
||||
|
||||
irq_unlock(key);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int arch_float_enable(struct k_thread *thread, unsigned int options)
|
||||
{
|
||||
unsigned int key;
|
||||
|
||||
if (thread != _current) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (arch_is_in_isr()) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Ensure a preemptive context switch does not occur */
|
||||
key = irq_lock();
|
||||
|
||||
/* Enable all floating point capabilities for the thread. */
|
||||
thread->base.user_options |= K_FP_REGS;
|
||||
|
||||
/* Set the FS bits to Initial and clear the fcsr to enable the FPU. */
|
||||
__asm__ volatile (
|
||||
"mv t0, %0\n"
|
||||
"csrrs x0, mstatus, t0\n"
|
||||
"fscsr x0, x0\n"
|
||||
:
|
||||
: "r" (MSTATUS_FS_INIT)
|
||||
);
|
||||
|
||||
irq_unlock(key);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
|
||||
/*
|
||||
|
|
|
@ -57,48 +57,12 @@
|
|||
#define _thread_offset_to_swap_return_value \
|
||||
(___thread_t_arch_OFFSET + ___thread_arch_t_swap_return_value_OFFSET)
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
#if defined(CONFIG_FPU_SHARING)
|
||||
|
||||
#define _thread_offset_to_fcsr \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fcsr_OFFSET)
|
||||
#define _thread_offset_to_exception_depth \
|
||||
(___thread_t_arch_OFFSET + ___thread_arch_t_exception_depth_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs0 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs0_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs1 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs1_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs2 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs2_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs3 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs3_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs4 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs4_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs5 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs5_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs6 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs6_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs7 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs7_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs8 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs8_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs9 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs9_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs10 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs10_OFFSET)
|
||||
|
||||
#define _thread_offset_to_fs11 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs11_OFFSET)
|
||||
|
||||
#endif /* defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
|
||||
|
|
|
@ -190,34 +190,25 @@ of stack space is required to load and store floating point registers.
|
|||
RISC-V architecture
|
||||
-------------------
|
||||
|
||||
On the RISC-V architecture, the kernel treats each thread as a non-user
|
||||
or FPU user and the thread must be tagged by one of the
|
||||
following techniques:
|
||||
On the RISC-V architecture the kernel treats each thread as an FPU
|
||||
user on a case-by-case basis with the FPU access allocated on demand.
|
||||
A "lazy save" algorithm is used during context switching which updates
|
||||
the floating point registers only when it is absolutely necessary.
|
||||
For example, the FPU registers are *not* saved when switching from an
|
||||
FPU user to a non-user thread (or an FPU user that doesn't touch the FPU
|
||||
during its scheduling slot), and then back to the original FPU user.
|
||||
|
||||
* A statically-created RISC-V thread can be tagged by passing the
|
||||
:c:macro:`K_FP_REGS` option to :c:macro:`K_THREAD_DEFINE`.
|
||||
FPU register usage by ISRs is supported although not recommended. When an
|
||||
ISR uses floating point or SIMD registers, then the access is trapped, the
|
||||
current FPU user context is saved in the thread object and the ISR is resumed
|
||||
with interrupts disabled so to prevent another IRQ from interrupting the ISR
|
||||
and potentially requesting FPU usage. Because ISR don't have a persistent
|
||||
register context, there are no provision for saving an ISR's FPU context
|
||||
either, hence the IRQ disabling.
|
||||
|
||||
* A dynamically-created RISC-V thread can be tagged by passing the
|
||||
:c:macro:`K_FP_REGS` to :c:func:`k_thread_create`.
|
||||
|
||||
* A running RISC-V thread can be tagged by calling :c:func:`k_float_enable`.
|
||||
This function can only be called from the thread itself.
|
||||
|
||||
If a RISC-V thread no longer requires the use of the floating point registers,
|
||||
it can call :c:func:`k_float_disable`. This instructs the kernel not to
|
||||
save or restore its FP context during thread context switching. This function
|
||||
can only be called from the thread itself.
|
||||
|
||||
During thread context switching the RISC-V kernel saves the *callee-saved*
|
||||
floating point registers, if the switched-out thread is tagged with
|
||||
:c:macro:`K_FP_REGS`. Additionally, the *caller-saved* floating point
|
||||
registers are saved on the thread's stack. If the switched-in thread has been
|
||||
tagged with :c:macro:`K_FP_REGS`, then the kernel restores the *callee-saved*
|
||||
FP registers of the switched-in thread and the *caller-saved* FP context is
|
||||
restored from the thread's stack. Thus, the kernel does not save or restore the
|
||||
FP context of threads that are not using the FP registers. An extra 84 bytes
|
||||
(single floating point hardware) or 164 bytes (double floating point hardware)
|
||||
of stack space is required to load and store floating point registers.
|
||||
Each thread object becomes 136 bytes (single-precision floating point
|
||||
hardware) or 264 bytes (double-precision floating point hardware) larger
|
||||
when Shared FP registers mode is enabled.
|
||||
|
||||
SPARC architecture
|
||||
------------------
|
||||
|
|
|
@ -149,9 +149,11 @@
|
|||
#define MSTATUS_IEN (1UL << 3)
|
||||
#define MSTATUS_MPP_M (3UL << 11)
|
||||
#define MSTATUS_MPIE_EN (1UL << 7)
|
||||
#define MSTATUS_FS_INIT (1UL << 13)
|
||||
#define MSTATUS_FS_MASK ((1UL << 13) | (1UL << 14))
|
||||
|
||||
#define MSTATUS_FS_OFF (0UL << 13)
|
||||
#define MSTATUS_FS_INIT (1UL << 13)
|
||||
#define MSTATUS_FS_CLEAN (2UL << 13)
|
||||
#define MSTATUS_FS_DIRTY (3UL << 13)
|
||||
|
||||
/* This comes from openisa_rv32m1, but doesn't seem to hurt on other
|
||||
* platforms:
|
||||
|
|
|
@ -45,14 +45,6 @@ struct soc_esf {
|
|||
};
|
||||
#endif
|
||||
|
||||
#if !defined(RV_FP_TYPE) && defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION
|
||||
#define RV_FP_TYPE uint64_t
|
||||
#else
|
||||
#define RV_FP_TYPE uint32_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_RISCV_SOC_HAS_ISR_STACKING)
|
||||
SOC_ISR_STACKING_ESF_DECLARE;
|
||||
#else
|
||||
|
@ -89,29 +81,6 @@ struct __esf {
|
|||
unsigned long sp; /* preserved (user or kernel) stack pointer */
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
RV_FP_TYPE ft0; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft1; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft2; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft3; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft4; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft5; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft6; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft7; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft8; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft9; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft10; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE ft11; /* Caller-saved temporary floating register */
|
||||
RV_FP_TYPE fa0; /* function argument/return value */
|
||||
RV_FP_TYPE fa1; /* function argument/return value */
|
||||
RV_FP_TYPE fa2; /* function argument */
|
||||
RV_FP_TYPE fa3; /* function argument */
|
||||
RV_FP_TYPE fa4; /* function argument */
|
||||
RV_FP_TYPE fa5; /* function argument */
|
||||
RV_FP_TYPE fa6; /* function argument */
|
||||
RV_FP_TYPE fa7; /* function argument */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RISCV_SOC_CONTEXT_SAVE
|
||||
struct soc_esf soc_context;
|
||||
#endif
|
||||
|
|
|
@ -18,6 +18,10 @@ struct _cpu_arch {
|
|||
unsigned long hartid;
|
||||
bool online;
|
||||
#endif
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
atomic_ptr_val_t fpu_owner;
|
||||
uint32_t fpu_state;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif /* ZEPHYR_INCLUDE_RISCV_STRUCTS_H_ */
|
||||
|
|
|
@ -22,14 +22,6 @@
|
|||
#ifndef _ASMLANGUAGE
|
||||
#include <zephyr/types.h>
|
||||
|
||||
#if !defined(RV_FP_TYPE) && defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION
|
||||
#define RV_FP_TYPE uint64_t
|
||||
#else
|
||||
#define RV_FP_TYPE uint32_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The following structure defines the list of registers that need to be
|
||||
* saved/restored when a context switch occurs.
|
||||
|
@ -52,28 +44,32 @@ struct _callee_saved {
|
|||
unsigned long s10; /* saved register */
|
||||
unsigned long s11; /* saved register */
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
|
||||
uint32_t fcsr; /* Control and status register */
|
||||
RV_FP_TYPE fs0; /* saved floating-point register */
|
||||
RV_FP_TYPE fs1; /* saved floating-point register */
|
||||
RV_FP_TYPE fs2; /* saved floating-point register */
|
||||
RV_FP_TYPE fs3; /* saved floating-point register */
|
||||
RV_FP_TYPE fs4; /* saved floating-point register */
|
||||
RV_FP_TYPE fs5; /* saved floating-point register */
|
||||
RV_FP_TYPE fs6; /* saved floating-point register */
|
||||
RV_FP_TYPE fs7; /* saved floating-point register */
|
||||
RV_FP_TYPE fs8; /* saved floating-point register */
|
||||
RV_FP_TYPE fs9; /* saved floating-point register */
|
||||
RV_FP_TYPE fs10; /* saved floating-point register */
|
||||
RV_FP_TYPE fs11; /* saved floating-point register */
|
||||
#endif
|
||||
};
|
||||
typedef struct _callee_saved _callee_saved_t;
|
||||
|
||||
#if !defined(RV_FP_TYPE)
|
||||
#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION
|
||||
#define RV_FP_TYPE uint64_t
|
||||
#else
|
||||
#define RV_FP_TYPE uint32_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
struct z_riscv_fp_context {
|
||||
RV_FP_TYPE fa0, fa1, fa2, fa3, fa4, fa5, fa6, fa7;
|
||||
RV_FP_TYPE ft0, ft1, ft2, ft3, ft4, ft5, ft6, ft7, ft8, ft9, ft10, ft11;
|
||||
RV_FP_TYPE fs0, fs1, fs2, fs3, fs4, fs5, fs6, fs7, fs8, fs9, fs10, fs11;
|
||||
uint32_t fcsr;
|
||||
};
|
||||
typedef struct z_riscv_fp_context z_riscv_fp_context_t;
|
||||
|
||||
#define PMP_M_MODE_SLOTS 8 /* 8 is plenty enough for m-mode */
|
||||
|
||||
struct _thread_arch {
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
struct z_riscv_fp_context saved_fp_context;
|
||||
uint8_t exception_depth;
|
||||
#endif
|
||||
#ifdef CONFIG_USERSPACE
|
||||
unsigned long priv_stack_start;
|
||||
unsigned long u_mode_pmpaddr_regs[CONFIG_PMP_SLOTS];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue