arch: arm64: Use atomic for fpu_owner pointer
Currently the lazy fpu saving algorithm in arm64 is using the fpu_owner pointer from the cpu structure to understand the owner of the context in the cpu and save it in case someone different from the owner is accessing the fpu. The semantics for memory consistency across smp systems is quite prone to errors and reworks on the current code might miss some barriers that could lead to inconsistent state across cores, so to overcome the issue, use atomics to hide the complexity and be sure that the code will behave as intended. While there, add some isb barriers after writes to cpacr_el1, following the guidance of ARM ARM specs about writes on system registers. Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
This commit is contained in:
parent
bcd0e8c175
commit
c8a9634a30
2 changed files with 13 additions and 10 deletions
|
@ -10,6 +10,7 @@
|
|||
#include <kernel_arch_interface.h>
|
||||
#include <zephyr/arch/cpu.h>
|
||||
#include <zephyr/sys/barrier.h>
|
||||
#include <zephyr/sys/atomic.h>
|
||||
|
||||
/* to be found in fpu.S */
|
||||
extern void z_arm64_fpu_save(struct z_arm64_fp_context *saved_fp_context);
|
||||
|
@ -67,7 +68,7 @@ void z_arm64_flush_local_fpu(void)
|
|||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
struct k_thread *owner = _current_cpu->arch.fpu_owner;
|
||||
struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
|
||||
|
||||
if (owner != NULL) {
|
||||
uint64_t cpacr = read_cpacr_el1();
|
||||
|
@ -81,11 +82,12 @@ void z_arm64_flush_local_fpu(void)
|
|||
/* make sure content made it to memory before releasing */
|
||||
barrier_dsync_fence_full();
|
||||
/* release ownership */
|
||||
_current_cpu->arch.fpu_owner = NULL;
|
||||
atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
|
||||
DBG("disable", owner);
|
||||
|
||||
/* disable FPU access */
|
||||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
||||
barrier_isync_fence_full();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -100,7 +102,7 @@ static void flush_owned_fpu(struct k_thread *thread)
|
|||
unsigned int num_cpus = arch_num_cpus();
|
||||
|
||||
for (i = 0; i < num_cpus; i++) {
|
||||
if (_kernel.cpus[i].arch.fpu_owner != thread) {
|
||||
if (atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner) != thread) {
|
||||
continue;
|
||||
}
|
||||
/* we found it live on CPU i */
|
||||
|
@ -125,7 +127,7 @@ static void flush_owned_fpu(struct k_thread *thread)
|
|||
*/
|
||||
if (thread == _current) {
|
||||
z_arm64_flush_local_fpu();
|
||||
while (_kernel.cpus[i].arch.fpu_owner == thread) {
|
||||
while (atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner) == thread) {
|
||||
barrier_dsync_fence_full();
|
||||
}
|
||||
}
|
||||
|
@ -233,12 +235,12 @@ void z_arm64_fpu_trap(z_arch_esf_t *esf)
|
|||
barrier_isync_fence_full();
|
||||
|
||||
/* save current owner's content if any */
|
||||
struct k_thread *owner = _current_cpu->arch.fpu_owner;
|
||||
struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
|
||||
|
||||
if (owner) {
|
||||
z_arm64_fpu_save(&owner->arch.saved_fp_context);
|
||||
barrier_dsync_fence_full();
|
||||
_current_cpu->arch.fpu_owner = NULL;
|
||||
atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
|
||||
DBG("save", owner);
|
||||
}
|
||||
|
||||
|
@ -262,7 +264,7 @@ void z_arm64_fpu_trap(z_arch_esf_t *esf)
|
|||
#endif
|
||||
|
||||
/* become new owner */
|
||||
_current_cpu->arch.fpu_owner = _current;
|
||||
atomic_ptr_set(&_current_cpu->arch.fpu_owner, _current);
|
||||
|
||||
/* restore our content */
|
||||
z_arm64_fpu_restore(&_current->arch.saved_fp_context);
|
||||
|
@ -285,7 +287,7 @@ static void fpu_access_update(unsigned int exc_update_level)
|
|||
|
||||
if (arch_exception_depth() == exc_update_level) {
|
||||
/* We're about to execute non-exception code */
|
||||
if (_current_cpu->arch.fpu_owner == _current) {
|
||||
if (atomic_ptr_get(&_current_cpu->arch.fpu_owner) == _current) {
|
||||
/* turn on FPU access */
|
||||
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
|
||||
} else {
|
||||
|
@ -300,6 +302,7 @@ static void fpu_access_update(unsigned int exc_update_level)
|
|||
*/
|
||||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
||||
}
|
||||
barrier_isync_fence_full();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -330,7 +333,7 @@ int arch_float_disable(struct k_thread *thread)
|
|||
#ifdef CONFIG_SMP
|
||||
flush_owned_fpu(thread);
|
||||
#else
|
||||
if (thread == _current_cpu->arch.fpu_owner) {
|
||||
if (thread == atomic_ptr_get(&_current_cpu->arch.fpu_owner)) {
|
||||
z_arm64_flush_local_fpu();
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
/* Per CPU architecture specifics */
|
||||
struct _cpu_arch {
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
struct k_thread *fpu_owner;
|
||||
atomic_ptr_val_t fpu_owner;
|
||||
#endif
|
||||
#ifdef CONFIG_ARM64_SAFE_EXCEPTION_STACK
|
||||
uint64_t safe_exception_stack;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue