arm64: mitigate FPU-in-exception usage side effects
Every va_start() currently triggers a FPU access trap if FPU is not already used. This is due to the fact that va_start() must copy FPU registers that are used for float argument passing into the va_list object. Flushing the FPU context to its owner and granting access to the current thread is wasteful if this is only for va_start(), especially since in most cases there are simply no FP arguments being passed by the caller. This is made even worse with exception code (syscalls, IRQ handlers, etc.) where the exception code has to be resumed with interrupts disabled upon FPU access as there is no provision for preserving an interrupted exception mode's FPU context. Fix those issues by simply simulating the sequence of STR instructions that the va_start() generates without actually granting FPU access. We limit ourselves only to exception context to keep changes to a minimum for now. This also allows for reverting the ARM64 exception in the nested IRQ test as it now works properly even if FPU_SHARING is enabled. Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
parent
58d3760533
commit
b8d24ffb45
2 changed files with 65 additions and 9 deletions
|
@ -141,6 +141,66 @@ void z_arm64_fpu_enter_exc(void)
|
|||
isb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Simulate some FPU store instructions.
|
||||
*
|
||||
* In many cases, the FPU trap is triggered by va_start() that copies
|
||||
* the content of FP registers used for floating point argument passing
|
||||
* into the va_list object in case there were actual float arguments from
|
||||
* the caller. In practice this is almost never the case, especially if
|
||||
* FPU access is disabled and we're trapped while in exception context.
|
||||
* Rather than flushing the FPU context to its owner and enabling access
|
||||
* just to let the corresponding STR instructions execute, we simply
|
||||
* simulate them and leave the FPU access disabled. This also avoids the
|
||||
* need for disabling interrupts in syscalls and IRQ handlers as well.
|
||||
*/
|
||||
static bool simulate_str_q_insn(z_arch_esf_t *esf)
|
||||
{
|
||||
/*
|
||||
* Support only the "FP in exception" cases for now.
|
||||
* We know there is no saved FPU context to check nor any
|
||||
* userspace stack memory to validate in that case.
|
||||
*/
|
||||
if (arch_exception_depth() <= 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t *pc = (uint32_t *)esf->elr;
|
||||
/* The original (interrupted) sp is the top of the esf structure */
|
||||
uintptr_t sp = (uintptr_t)esf + sizeof(*esf);
|
||||
|
||||
for (;;) {
|
||||
uint32_t insn = *pc;
|
||||
|
||||
/*
|
||||
* We're looking for STR (immediate, SIMD&FP) of the form:
|
||||
*
|
||||
* STR Q<n>, [SP, #<pimm>]
|
||||
*
|
||||
* where 0 <= <n> <= 7 and <pimm> is a 12-bits multiple of 16.
|
||||
*/
|
||||
if ((insn & 0xffc003f8) != 0x3d8003e0)
|
||||
break;
|
||||
|
||||
uint32_t pimm = (insn >> 10) & 0xfff;
|
||||
|
||||
/* Zero the location as the above STR would have done */
|
||||
*(__int128 *)(sp + pimm * 16) = 0;
|
||||
|
||||
/* move to the next instruction */
|
||||
pc++;
|
||||
}
|
||||
|
||||
/* did we do something? */
|
||||
if (pc != (uint32_t *)esf->elr) {
|
||||
/* resume execution past the simulated instructions */
|
||||
esf->elr = (uintptr_t)pc;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process the FPU trap.
|
||||
*
|
||||
|
@ -159,6 +219,11 @@ void z_arm64_fpu_trap(z_arch_esf_t *esf)
|
|||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
/* check if a quick simulation can do it */
|
||||
if (simulate_str_q_insn(esf)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* turn on FPU access */
|
||||
write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
|
||||
isb();
|
||||
|
|
|
@ -16,15 +16,6 @@
|
|||
#define TEST_NESTED_ISR
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARM64) && defined(CONFIG_FPU_SHARING)
|
||||
/*
|
||||
* The various log outputs trigger FP access due to the va_list used by
|
||||
* printk() and friends. IRQs are masked to prevent further IRQ nesting
|
||||
* when that happens.
|
||||
*/
|
||||
#undef TEST_NESTED_ISR
|
||||
#endif
|
||||
|
||||
#define DURATION 5
|
||||
|
||||
#define ISR0_TOKEN 0xDEADBEEF
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue