zephyr/arch/riscv/core/fpu.c
Anas Nashif 7d3b6c6a40 arch: smp: make flush_fpu_ipi a common, optional interfaces
The interface to flush fpu is not unique to one architecture, make it a
generic, optional interface that can be implemented (and overriden) by a
platform.

Signed-off-by: Anas Nashif <anas.nashif@intel.com>
2024-01-09 10:00:17 +01:00

347 lines
9.5 KiB
C

/*
* Copyright (c) 2023 BayLibre SAS
* Written by: Nicolas Pitre
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/kernel.h>
#include <zephyr/kernel_structs.h>
#include <kernel_arch_interface.h>
#include <zephyr/sys/atomic.h>
/* to be found in fpu.S */
extern void z_riscv_fpu_save(struct z_riscv_fp_context *saved_fp_context);
extern void z_riscv_fpu_restore(struct z_riscv_fp_context *saved_fp_context);
#define FPU_DEBUG 0
#if FPU_DEBUG
/*
* Debug traces have to be produced without printk() or any other functions
* using a va_list as va_start() may copy the FPU registers that could be
* used to pass float arguments, and that would trigger an FPU access trap.
* Note: Apparently gcc doesn't use float regs with variadic functions on
* RISC-V even if -mabi is used with f or d so this precaution might be
* unnecessary. But better be safe than sorry especially for debugging code.
*/
#include <string.h>
static void DBG(char *msg, struct k_thread *th)
{
char buf[80], *p;
unsigned int v;
strcpy(buf, "CPU# exc# ");
buf[3] = '0' + _current_cpu->id;
buf[8] = '0' + _current->arch.exception_depth;
strcat(buf, _current->name);
strcat(buf, ": ");
strcat(buf, msg);
strcat(buf, " ");
strcat(buf, th->name);
v = *(unsigned char *)&th->arch.saved_fp_context;
p = buf + strlen(buf);
*p++ = ' ';
*p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a');
*p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a');
*p++ = '\n';
*p = 0;
k_str_out(buf, p - buf);
}
#else
static inline void DBG(char *msg, struct k_thread *t) { }
#endif /* FPU_DEBUG */
static void z_riscv_fpu_disable(void)
{
unsigned long status = csr_read(mstatus);
__ASSERT((status & MSTATUS_IEN) == 0, "must be called with IRQs disabled");
if ((status & MSTATUS_FS) != 0) {
csr_clear(mstatus, MSTATUS_FS);
/* remember its clean/dirty state */
_current_cpu->arch.fpu_state = (status & MSTATUS_FS);
}
}
static void z_riscv_fpu_load(void)
{
__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
"must be called with IRQs disabled");
__ASSERT((csr_read(mstatus) & MSTATUS_FS) == 0,
"must be called with FPU access disabled");
/* become new owner */
atomic_ptr_set(&_current_cpu->arch.fpu_owner, _current);
/* restore our content */
csr_set(mstatus, MSTATUS_FS_INIT);
z_riscv_fpu_restore(&_current->arch.saved_fp_context);
DBG("restore", _current);
}
/*
* Flush FPU content and clear ownership. If the saved FPU state is "clean"
* then we know the in-memory copy is up to date and skip the FPU content
* transfer. The saved FPU state is updated upon disabling FPU access so
* we require that this be called only when the FPU is disabled.
*
* This is called locally and also from flush_fpu_ipi_handler().
*/
void arch_flush_local_fpu(void)
{
__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
"must be called with IRQs disabled");
__ASSERT((csr_read(mstatus) & MSTATUS_FS) == 0,
"must be called with FPU access disabled");
struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
if (owner != NULL) {
bool dirty = (_current_cpu->arch.fpu_state == MSTATUS_FS_DIRTY);
if (dirty) {
/* turn on FPU access */
csr_set(mstatus, MSTATUS_FS_CLEAN);
/* save current owner's content */
z_riscv_fpu_save(&owner->arch.saved_fp_context);
}
/* dirty means active use */
owner->arch.fpu_recently_used = dirty;
/* disable FPU access */
csr_clear(mstatus, MSTATUS_FS);
/* release ownership */
atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
DBG("disable", owner);
}
}
#ifdef CONFIG_SMP
static void flush_owned_fpu(struct k_thread *thread)
{
__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
"must be called with IRQs disabled");
int i;
atomic_ptr_val_t owner;
/* search all CPUs for the owner we want */
unsigned int num_cpus = arch_num_cpus();
for (i = 0; i < num_cpus; i++) {
owner = atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner);
if (owner != thread) {
continue;
}
/* we found it live on CPU i */
if (i == _current_cpu->id) {
z_riscv_fpu_disable();
arch_flush_local_fpu();
break;
}
/* the FPU context is live on another CPU */
arch_flush_fpu_ipi(i);
/*
* Wait for it only if this is about the thread
* currently running on this CPU. Otherwise the
* other CPU running some other thread could regain
* ownership the moment it is removed from it and
* we would be stuck here.
*
* Also, if this is for the thread running on this
* CPU, then we preemptively flush any live context
* on this CPU as well since we're likely to
* replace it, and this avoids a deadlock where
* two CPUs want to pull each other's FPU context.
*/
if (thread == _current) {
z_riscv_fpu_disable();
arch_flush_local_fpu();
do {
arch_nop();
owner = atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner);
} while (owner == thread);
}
break;
}
}
#endif
void z_riscv_fpu_enter_exc(void)
{
/* always deny FPU access whenever an exception is entered */
z_riscv_fpu_disable();
}
/*
* Process the FPU trap.
*
* This usually means that FP regs belong to another thread. Save them
* to that thread's save area and restore the current thread's content.
*
* We also get here when FP regs are used while in exception as FP access
* is always disabled by default in that case. If so we save the FPU content
* to the owning thread and simply enable FPU access. Exceptions should be
* short and don't have persistent register contexts when they're done so
* there is nothing to save/restore for that context... as long as we
* don't get interrupted that is. To ensure that we mask interrupts to
* the triggering exception context.
*
* Note that the exception depth count was not incremented before this call
* as no further exceptions are expected before returning to normal mode.
*/
void z_riscv_fpu_trap(z_arch_esf_t *esf)
{
__ASSERT((esf->mstatus & MSTATUS_FS) == 0 &&
(csr_read(mstatus) & MSTATUS_FS) == 0,
"called despite FPU being accessible");
/* save current owner's content if any */
arch_flush_local_fpu();
if (_current->arch.exception_depth > 0) {
/*
* We were already in exception when the FPU access trapped.
* We give it access and prevent any further IRQ recursion
* by disabling IRQs as we wouldn't be able to preserve the
* interrupted exception's FPU context.
*/
esf->mstatus &= ~MSTATUS_MPIE_EN;
/* make it accessible to the returning context */
esf->mstatus |= MSTATUS_FS_INIT;
return;
}
#ifdef CONFIG_SMP
/*
* Make sure the FPU context we need isn't live on another CPU.
* The current CPU's FPU context is NULL at this point.
*/
flush_owned_fpu(_current);
#endif
/* make it accessible and clean to the returning context */
esf->mstatus |= MSTATUS_FS_CLEAN;
/* and load it with corresponding content */
z_riscv_fpu_load();
}
/*
* Perform lazy FPU context switching by simply granting or denying
* access to FP regs based on FPU ownership before leaving the last
* exception level in case of exceptions, or during a thread context
* switch with the exception level of the new thread being 0.
* If current thread doesn't own the FP regs then it will trap on its
* first access and then the actual FPU context switching will occur.
*/
static bool fpu_access_allowed(unsigned int exc_update_level)
{
__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
"must be called with IRQs disabled");
if (_current->arch.exception_depth == exc_update_level) {
/* We're about to execute non-exception code */
if (_current_cpu->arch.fpu_owner == _current) {
/* everything is already in place */
return true;
}
if (_current->arch.fpu_recently_used) {
/*
* Before this thread was context-switched out,
* it made active use of the FPU, but someone else
* took it away in the mean time. Let's preemptively
* claim it back to avoid the likely exception trap
* to come otherwise.
*/
z_riscv_fpu_disable();
arch_flush_local_fpu();
#ifdef CONFIG_SMP
flush_owned_fpu(_current);
#endif
z_riscv_fpu_load();
_current_cpu->arch.fpu_state = MSTATUS_FS_CLEAN;
return true;
}
return false;
}
/*
* Any new exception level should always trap on FPU
* access as we want to make sure IRQs are disabled before
* granting it access (see z_riscv_fpu_trap() documentation).
*/
return false;
}
/*
* This is called on every exception exit except for z_riscv_fpu_trap().
* In that case the exception level of interest is 1 (soon to be 0).
*/
void z_riscv_fpu_exit_exc(z_arch_esf_t *esf)
{
if (fpu_access_allowed(1)) {
esf->mstatus &= ~MSTATUS_FS;
esf->mstatus |= _current_cpu->arch.fpu_state;
} else {
esf->mstatus &= ~MSTATUS_FS;
}
}
/*
* This is called from z_riscv_context_switch(). FPU access may be granted
* only if exception level is 0. If we switch to a thread that is still in
* some exception context then FPU access would be re-evaluated at exception
* exit time via z_riscv_fpu_exit_exc().
*/
void z_riscv_fpu_thread_context_switch(void)
{
if (fpu_access_allowed(0)) {
csr_clear(mstatus, MSTATUS_FS);
csr_set(mstatus, _current_cpu->arch.fpu_state);
} else {
z_riscv_fpu_disable();
}
}
int arch_float_disable(struct k_thread *thread)
{
if (thread != NULL) {
unsigned int key = arch_irq_lock();
#ifdef CONFIG_SMP
flush_owned_fpu(thread);
#else
if (thread == _current_cpu->arch.fpu_owner) {
z_riscv_fpu_disable();
arch_flush_local_fpu();
}
#endif
arch_irq_unlock(key);
}
return 0;
}
int arch_float_enable(struct k_thread *thread, unsigned int options)
{
/* floats always gets enabled automatically at the moment */
return 0;
}