soc: intel_adsp/ace: use custom arch_spin_relax()

Intel Audio DSP ACE needs to use arch_spin_relax() to give
the bus more time to propagate the RCW transactions among
CPUs, and to avoid sending too many requests to the bus
after failing to lock spinlocks. However, the number of
NOPs results in a very big arch_spin_relax() that spans
multiple instruction cache lines, and requires evicting
them just for NOPs.  With 5 CPUs, it can span 6 cache
lines (if using nop.n instead of nop). That's a waste of
space and cache. So instead, we do a tight loop instead.
Since the SoC supports zero-overhead loops, this should
have minimal performance impact.

Signed-off-by: Daniel Leung <daniel.leung@intel.com>
This commit is contained in:
Daniel Leung 2025-02-13 09:44:55 -08:00 committed by Benjamin Cabé
commit d08981527d
4 changed files with 58 additions and 14 deletions

View file

@ -18,6 +18,10 @@ zephyr_include_directories(include)
zephyr_include_directories(include/${CONFIG_SOC})
zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_COMM_WIDGET comm_widget.c)
zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_COMM_WIDGET comm_widget_messages.c)
zephyr_library_sources_ifdef(
CONFIG_SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS
spin_relax.c
)
if (CONFIG_XTENSA_MMU)
zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_ACE30 mmu_ace30.c)

View file

@ -31,3 +31,24 @@ config SRAM_RETENTION_MODE
When this option is enabled, the SRAM retention mode will be
activated during the firmware boot-up process. If disabled,
the retention mode will not be activated.
config SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS
bool "Use Intel Audio DSP specific arch_spin_relax() with more NOPs"
depends on !XTENSA_MORE_SPIN_RELAX_NOPS
default y if SMP && MP_MAX_NUM_CPUS > 1
help
Add some NOPs after failure to lock a spinlock. This gives
the bus extra time to synchronize the RCW transaction
among CPUs.
config SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS
int "Number of NOPs to be used in Intel Audio DSP specific arch_spin_relax()"
depends on SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS
default 32 if MP_MAX_NUM_CPUS = 1
default 64 if MP_MAX_NUM_CPUS = 2
default 96 if MP_MAX_NUM_CPUS = 3
default 128 if MP_MAX_NUM_CPUS = 4
default 160 if MP_MAX_NUM_CPUS = 5
help
Specify the number of NOPs in Intel Audio DSP specific
arch_spin_relax().

View file

@ -57,20 +57,6 @@ config LOG_BACKEND_ADSP
endif # LOG
config XTENSA_MORE_SPIN_RELAX_NOPS
default y if SMP && MP_MAX_NUM_CPUS > 1
if XTENSA_MORE_SPIN_RELAX_NOPS
config XTENSA_NUM_SPIN_RELAX_NOPS
default 32 if MP_MAX_NUM_CPUS = 1
default 64 if MP_MAX_NUM_CPUS = 2
default 96 if MP_MAX_NUM_CPUS = 3
default 128 if MP_MAX_NUM_CPUS = 4
default 160 if MP_MAX_NUM_CPUS = 5
endif # XTENSA_MORE_SPIN_RELAX_NOPS
if KERNEL_VM_SUPPORT
config KERNEL_VM_SIZE

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2025 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
#include <zephyr/toolchain.h>
#include <zephyr/sys/util_macro.h>
#ifdef CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS
void arch_spin_relax(void)
{
register uint32_t remaining = CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS;
while (remaining > 0) {
#if (CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS % 4) == 0
remaining -= 4;
/*
* Note the xcc/xt-clang likes to "truncate"
* continuous NOPs to max 4 NOPs. So this is
* the most we can do in one loop.
*/
__asm__("nop.n; nop.n; nop.n; nop.n;");
#else
remaining--;
__asm__("nop.n");
#endif
}
}
#endif /* CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS */