diff --git a/arch/riscv/core/fpu.S b/arch/riscv/core/fpu.S new file mode 100644 index 00000000000..2708d11fec1 --- /dev/null +++ b/arch/riscv/core/fpu.S @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 BayLibre SAS + * Written by: Nicolas Pitre + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION +#define LOAD fld +#define STORE fsd +#else +#define LOAD flw +#define STORE fsw +#endif + +#define DO_FP_REGS(op, ptr) \ + op fa0, __z_riscv_fp_context_t_fa0_OFFSET (ptr); \ + op fa1, __z_riscv_fp_context_t_fa1_OFFSET (ptr); \ + op fa2, __z_riscv_fp_context_t_fa2_OFFSET (ptr); \ + op fa3, __z_riscv_fp_context_t_fa3_OFFSET (ptr); \ + op fa4, __z_riscv_fp_context_t_fa4_OFFSET (ptr); \ + op fa5, __z_riscv_fp_context_t_fa5_OFFSET (ptr); \ + op fa6, __z_riscv_fp_context_t_fa6_OFFSET (ptr); \ + op fa7, __z_riscv_fp_context_t_fa7_OFFSET (ptr); \ + op fs0, __z_riscv_fp_context_t_fs0_OFFSET (ptr); \ + op fs1, __z_riscv_fp_context_t_fs1_OFFSET (ptr); \ + op fs2, __z_riscv_fp_context_t_fs2_OFFSET (ptr); \ + op fs3, __z_riscv_fp_context_t_fs3_OFFSET (ptr); \ + op fs4, __z_riscv_fp_context_t_fs4_OFFSET (ptr); \ + op fs5, __z_riscv_fp_context_t_fs5_OFFSET (ptr); \ + op fs6, __z_riscv_fp_context_t_fs6_OFFSET (ptr); \ + op fs7, __z_riscv_fp_context_t_fs7_OFFSET (ptr); \ + op fs8, __z_riscv_fp_context_t_fs8_OFFSET (ptr); \ + op fs9, __z_riscv_fp_context_t_fs9_OFFSET (ptr); \ + op fs10, __z_riscv_fp_context_t_fs10_OFFSET(ptr); \ + op fs11, __z_riscv_fp_context_t_fs11_OFFSET(ptr); \ + op ft0, __z_riscv_fp_context_t_ft0_OFFSET (ptr); \ + op ft1, __z_riscv_fp_context_t_ft1_OFFSET (ptr); \ + op ft2, __z_riscv_fp_context_t_ft2_OFFSET (ptr); \ + op ft3, __z_riscv_fp_context_t_ft3_OFFSET (ptr); \ + op ft4, __z_riscv_fp_context_t_ft4_OFFSET (ptr); \ + op ft5, __z_riscv_fp_context_t_ft5_OFFSET (ptr); \ + op ft6, __z_riscv_fp_context_t_ft6_OFFSET (ptr); \ + op ft7, __z_riscv_fp_context_t_ft7_OFFSET (ptr); \ + op ft8, __z_riscv_fp_context_t_ft8_OFFSET (ptr); \ + op ft9, __z_riscv_fp_context_t_ft9_OFFSET (ptr); \ + op ft10, __z_riscv_fp_context_t_ft10_OFFSET(ptr); \ + op ft11, __z_riscv_fp_context_t_ft11_OFFSET(ptr) + +GTEXT(z_riscv_fpu_save) +SECTION_FUNC(TEXT, z_riscv_fpu_save) + + frcsr t0 + DO_FP_REGS(STORE, a0) + sw t0, __z_riscv_fp_context_t_fcsr_OFFSET(a0) + ret + +GTEXT(z_riscv_fpu_restore) +SECTION_FUNC(TEXT, z_riscv_fpu_restore) + + DO_FP_REGS(LOAD, a0) + lw t0, __z_riscv_fp_context_t_fcsr_OFFSET(a0) + fscsr t0 + ret + diff --git a/arch/riscv/core/fpu.c b/arch/riscv/core/fpu.c new file mode 100644 index 00000000000..3d87538f502 --- /dev/null +++ b/arch/riscv/core/fpu.c @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2023 BayLibre SAS + * Written by: Nicolas Pitre + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +/* to be found in fpu.S */ +extern void z_riscv_fpu_save(struct z_riscv_fp_context *saved_fp_context); +extern void z_riscv_fpu_restore(struct z_riscv_fp_context *saved_fp_context); + +#define FPU_DEBUG 0 + +#if FPU_DEBUG + +/* + * Debug traces have to be produced without printk() or any other functions + * using a va_list as va_start() may copy the FPU registers that could be + * used to pass float arguments, and that would trigger an FPU access trap. + * Note: Apparently gcc doesn't use float regs with variadic functions on + * RISC-V even if -mabi is used with f or d so this precaution might be + * unnecessary. But better be safe than sorry especially for debugging code. + */ + +#include + +static void DBG(char *msg, struct k_thread *th) +{ + char buf[80], *p; + unsigned int v; + + strcpy(buf, "CPU# exc# "); + buf[3] = '0' + _current_cpu->id; + buf[8] = '0' + _current->arch.exception_depth; + strcat(buf, _current->name); + strcat(buf, ": "); + strcat(buf, msg); + strcat(buf, " "); + strcat(buf, th->name); + + v = *(unsigned char *)&th->arch.saved_fp_context; + p = buf + strlen(buf); + *p++ = ' '; + *p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a'); + *p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a'); + *p++ = '\n'; + *p = 0; + + k_str_out(buf, p - buf); +} + +#else + +static inline void DBG(char *msg, struct k_thread *t) { } + +#endif /* FPU_DEBUG */ + +static void z_riscv_fpu_disable(void) +{ + unsigned long status = csr_read(mstatus); + + __ASSERT((status & MSTATUS_IEN) == 0, "must be called with IRQs disabled"); + + if ((status & MSTATUS_FS) != 0) { + csr_clear(mstatus, MSTATUS_FS); + + /* remember its clean/dirty state */ + _current_cpu->arch.fpu_state = (status & MSTATUS_FS); + } +} + +/* + * Flush FPU content and clear ownership. If the saved FPU state is "clean" + * then we know the in-memory copy is up to date and skip the FPU content + * transfer. The saved FPU state is updated upon disabling FPU access so + * we require that this be called only when the FPU is disabled. + * + * This is called locally and also from flush_fpu_ipi_handler(). + */ +void z_riscv_flush_local_fpu(void) +{ + __ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0, + "must be called with IRQs disabled"); + __ASSERT((csr_read(mstatus) & MSTATUS_FS) == 0, + "must be called with FPU access disabled"); + + struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner); + + if (owner != NULL) { + bool dirty = (_current_cpu->arch.fpu_state == MSTATUS_FS_DIRTY); + + if (dirty) { + /* turn on FPU access */ + csr_set(mstatus, MSTATUS_FS_CLEAN); + /* save current owner's content */ + z_riscv_fpu_save(&owner->arch.saved_fp_context); + } + + /* disable FPU access */ + csr_clear(mstatus, MSTATUS_FS); + + /* release ownership */ + atomic_ptr_clear(&_current_cpu->arch.fpu_owner); + DBG("disable", owner); + } +} + +#ifdef CONFIG_SMP +static void flush_owned_fpu(struct k_thread *thread) +{ + __ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0, + "must be called with IRQs disabled"); + + int i; + atomic_ptr_val_t owner; + + /* search all CPUs for the owner we want */ + unsigned int num_cpus = arch_num_cpus(); + + for (i = 0; i < num_cpus; i++) { + owner = atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner); + if (owner != thread) { + continue; + } + /* we found it live on CPU i */ + if (i == _current_cpu->id) { + z_riscv_fpu_disable(); + z_riscv_flush_local_fpu(); + break; + } + /* the FPU context is live on another CPU */ + z_riscv_flush_fpu_ipi(i); + + /* + * Wait for it only if this is about the thread + * currently running on this CPU. Otherwise the + * other CPU running some other thread could regain + * ownership the moment it is removed from it and + * we would be stuck here. + * + * Also, if this is for the thread running on this + * CPU, then we preemptively flush any live context + * on this CPU as well since we're likely to + * replace it, and this avoids a deadlock where + * two CPUs want to pull each other's FPU context. + */ + if (thread == _current) { + z_riscv_fpu_disable(); + z_riscv_flush_local_fpu(); + do { + arch_nop(); + owner = atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner); + } while (owner == thread); + } + break; + } +} +#endif + +void z_riscv_fpu_enter_exc(void) +{ + /* always deny FPU access whenever an exception is entered */ + z_riscv_fpu_disable(); +} + +/* + * Process the FPU trap. + * + * This usually means that FP regs belong to another thread. Save them + * to that thread's save area and restore the current thread's content. + * + * We also get here when FP regs are used while in exception as FP access + * is always disabled by default in that case. If so we save the FPU content + * to the owning thread and simply enable FPU access. Exceptions should be + * short and don't have persistent register contexts when they're done so + * there is nothing to save/restore for that context... as long as we + * don't get interrupted that is. To ensure that we mask interrupts to + * the triggering exception context. + * + * Note that the exception depth count was not incremented before this call + * as no further exceptions are expected before returning to normal mode. + */ +void z_riscv_fpu_trap(z_arch_esf_t *esf) +{ + __ASSERT((esf->mstatus & MSTATUS_FS) == 0 && + (csr_read(mstatus) & MSTATUS_FS) == 0, + "called despite FPU being accessible"); + + /* save current owner's content if any */ + z_riscv_flush_local_fpu(); + + if (_current->arch.exception_depth > 0) { + /* + * We were already in exception when the FPU access trapped. + * We give it access and prevent any further IRQ recursion + * by disabling IRQs as we wouldn't be able to preserve the + * interrupted exception's FPU context. + */ + esf->mstatus &= ~MSTATUS_IEN; + + /* make it accessible to the returning context */ + esf->mstatus |= MSTATUS_FS_INIT; + + return; + } + +#ifdef CONFIG_SMP + /* + * Make sure the FPU context we need isn't live on another CPU. + * The current CPU's FPU context is NULL at this point. + */ + flush_owned_fpu(_current); +#endif + + /* become new owner */ + atomic_ptr_set(&_current_cpu->arch.fpu_owner, _current); + + /* make it accessible and clean to the returning context */ + esf->mstatus |= MSTATUS_FS_CLEAN; + + /* restore our content */ + csr_set(mstatus, MSTATUS_FS_INIT); + z_riscv_fpu_restore(&_current->arch.saved_fp_context); + DBG("restore", _current); +} + +/* + * Perform lazy FPU context switching by simply granting or denying + * access to FP regs based on FPU ownership before leaving the last + * exception level in case of exceptions, or during a thread context + * switch with the exception level of the new thread being 0. + * If current thread doesn't own the FP regs then it will trap on its + * first access and then the actual FPU context switching will occur. + */ +static bool fpu_access_allowed(unsigned int exc_update_level) +{ + __ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0, + "must be called with IRQs disabled"); + + if (_current->arch.exception_depth == exc_update_level) { + /* We're about to execute non-exception code */ + return (_current_cpu->arch.fpu_owner == _current); + } + /* + * Any new exception level should always trap on FPU + * access as we want to make sure IRQs are disabled before + * granting it access (see z_riscv_fpu_trap() documentation). + */ + return false; +} + +/* + * This is called on every exception exit except for z_riscv_fpu_trap(). + * In that case the exception level of interest is 1 (soon to be 0). + */ +void z_riscv_fpu_exit_exc(z_arch_esf_t *esf) +{ + if (fpu_access_allowed(1)) { + esf->mstatus |= _current_cpu->arch.fpu_state; + } else { + esf->mstatus &= ~MSTATUS_FS; + } +} + +/* + * This is called from z_riscv_context_switch(). FPU access may be granted + * only if exception level is 0. If we switch to a thread that is still in + * some exception context then FPU access would be re-evaluated at exception + * exit time via z_riscv_fpu_exit_exc(). + */ +void z_riscv_fpu_thread_context_switch(void) +{ + if (fpu_access_allowed(0)) { + csr_clear(mstatus, MSTATUS_FS); + csr_set(mstatus, _current_cpu->arch.fpu_state); + } else { + z_riscv_fpu_disable(); + } +} + +int arch_float_disable(struct k_thread *thread) +{ + if (thread != NULL) { + unsigned int key = arch_irq_lock(); + +#ifdef CONFIG_SMP + flush_owned_fpu(thread); +#else + if (thread == _current_cpu->arch.fpu_owner) { + z_riscv_fpu_disable(); + z_riscv_flush_local_fpu(); + } +#endif + + arch_irq_unlock(key); + } + + return 0; +} + +int arch_float_enable(struct k_thread *thread, unsigned int options) +{ + /* floats always gets enabled automatically at the moment */ + return 0; +} diff --git a/arch/riscv/include/kernel_arch_func.h b/arch/riscv/include/kernel_arch_func.h index edbd956d20a..c15d0cc8563 100644 --- a/arch/riscv/include/kernel_arch_func.h +++ b/arch/riscv/include/kernel_arch_func.h @@ -80,6 +80,11 @@ extern FUNC_NORETURN void z_riscv_userspace_enter(k_thread_entry_t user_entry, int z_irq_do_offload(void); #endif +#ifdef CONFIG_FPU_SHARING +void z_riscv_flush_local_fpu(void); +void z_riscv_flush_fpu_ipi(unsigned int cpu); +#endif + #endif /* _ASMLANGUAGE */ #ifdef __cplusplus