From d08981527df92728d0226753e1fd35228bb09ff0 Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Thu, 13 Feb 2025 09:44:55 -0800 Subject: [PATCH] soc: intel_adsp/ace: use custom arch_spin_relax() Intel Audio DSP ACE needs to use arch_spin_relax() to give the bus more time to propagate the RCW transactions among CPUs, and to avoid sending too many requests to the bus after failing to lock spinlocks. However, the number of NOPs results in a very big arch_spin_relax() that spans multiple instruction cache lines, and requires evicting them just for NOPs. With 5 CPUs, it can span 6 cache lines (if using nop.n instead of nop). That's a waste of space and cache. So instead, we do a tight loop instead. Since the SoC supports zero-overhead loops, this should have minimal performance impact. Signed-off-by: Daniel Leung --- soc/intel/intel_adsp/ace/CMakeLists.txt | 4 +++ soc/intel/intel_adsp/ace/Kconfig | 21 ++++++++++++ .../intel_adsp/ace/Kconfig.defconfig.series | 14 -------- soc/intel/intel_adsp/ace/spin_relax.c | 33 +++++++++++++++++++ 4 files changed, 58 insertions(+), 14 deletions(-) create mode 100644 soc/intel/intel_adsp/ace/spin_relax.c diff --git a/soc/intel/intel_adsp/ace/CMakeLists.txt b/soc/intel/intel_adsp/ace/CMakeLists.txt index 519275d9047..9aebd85c43c 100644 --- a/soc/intel/intel_adsp/ace/CMakeLists.txt +++ b/soc/intel/intel_adsp/ace/CMakeLists.txt @@ -18,6 +18,10 @@ zephyr_include_directories(include) zephyr_include_directories(include/${CONFIG_SOC}) zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_COMM_WIDGET comm_widget.c) zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_COMM_WIDGET comm_widget_messages.c) +zephyr_library_sources_ifdef( + CONFIG_SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS + spin_relax.c +) if (CONFIG_XTENSA_MMU) zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_ACE30 mmu_ace30.c) diff --git a/soc/intel/intel_adsp/ace/Kconfig b/soc/intel/intel_adsp/ace/Kconfig index e38ebdeb9c5..f4c7903e7a0 100644 --- a/soc/intel/intel_adsp/ace/Kconfig +++ b/soc/intel/intel_adsp/ace/Kconfig @@ -31,3 +31,24 @@ config SRAM_RETENTION_MODE When this option is enabled, the SRAM retention mode will be activated during the firmware boot-up process. If disabled, the retention mode will not be activated. + +config SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS + bool "Use Intel Audio DSP specific arch_spin_relax() with more NOPs" + depends on !XTENSA_MORE_SPIN_RELAX_NOPS + default y if SMP && MP_MAX_NUM_CPUS > 1 + help + Add some NOPs after failure to lock a spinlock. This gives + the bus extra time to synchronize the RCW transaction + among CPUs. + +config SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS + int "Number of NOPs to be used in Intel Audio DSP specific arch_spin_relax()" + depends on SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS + default 32 if MP_MAX_NUM_CPUS = 1 + default 64 if MP_MAX_NUM_CPUS = 2 + default 96 if MP_MAX_NUM_CPUS = 3 + default 128 if MP_MAX_NUM_CPUS = 4 + default 160 if MP_MAX_NUM_CPUS = 5 + help + Specify the number of NOPs in Intel Audio DSP specific + arch_spin_relax(). diff --git a/soc/intel/intel_adsp/ace/Kconfig.defconfig.series b/soc/intel/intel_adsp/ace/Kconfig.defconfig.series index 0d606715e09..36b512d27cb 100644 --- a/soc/intel/intel_adsp/ace/Kconfig.defconfig.series +++ b/soc/intel/intel_adsp/ace/Kconfig.defconfig.series @@ -57,20 +57,6 @@ config LOG_BACKEND_ADSP endif # LOG -config XTENSA_MORE_SPIN_RELAX_NOPS - default y if SMP && MP_MAX_NUM_CPUS > 1 - -if XTENSA_MORE_SPIN_RELAX_NOPS - -config XTENSA_NUM_SPIN_RELAX_NOPS - default 32 if MP_MAX_NUM_CPUS = 1 - default 64 if MP_MAX_NUM_CPUS = 2 - default 96 if MP_MAX_NUM_CPUS = 3 - default 128 if MP_MAX_NUM_CPUS = 4 - default 160 if MP_MAX_NUM_CPUS = 5 - -endif # XTENSA_MORE_SPIN_RELAX_NOPS - if KERNEL_VM_SUPPORT config KERNEL_VM_SIZE diff --git a/soc/intel/intel_adsp/ace/spin_relax.c b/soc/intel/intel_adsp/ace/spin_relax.c new file mode 100644 index 00000000000..073deb5a6d0 --- /dev/null +++ b/soc/intel/intel_adsp/ace/spin_relax.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2025 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include + +#ifdef CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS +void arch_spin_relax(void) +{ + register uint32_t remaining = CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS; + + while (remaining > 0) { +#if (CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS % 4) == 0 + remaining -= 4; + + /* + * Note the xcc/xt-clang likes to "truncate" + * continuous NOPs to max 4 NOPs. So this is + * the most we can do in one loop. + */ + __asm__("nop.n; nop.n; nop.n; nop.n;"); +#else + remaining--; + __asm__("nop.n"); +#endif + } +} +#endif /* CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS */