arch/xtensa: Add non-HAL caching primitives

The Xtensa L1 cache layer has straightforward semantics accessible via single-instructions that operate on cache lines via physical addresses. These are very amenable to inlining. Unfortunately the Xtensa HAL layer requires function calls to do this, leading to significant code waste at the calling site, an extra frame on the stack and needless runtime instructions for situations where the call is over a constant region that could elide the loop. This is made even worse because the HAL library is not built with -ffunction-sections, so pulling in even one of these tiny cache functions has the effect of importing a 1500-byte object file into the link! Add our own tiny cache layer to include/arch/xtensa/cache.h and use that instead. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
2021-02-14 16:09:43 -08:00 · 2021-02-14 16:09:43 -08:00 · 64cf33952d
commit 64cf33952d
parent d0c538e9a2
10 changed files with 105 additions and 19 deletions
--- a/soc/xtensa/intel_adsp/common/bootloader/boot_loader.c
+++ b/soc/xtensa/intel_adsp/common/bootloader/boot_loader.c
@ -15,6 +15,7 @@
 #include <soc/shim.h>
 #include <adsp/io.h>
 #include <soc.h>
+#include <arch/xtensa/cache.h>
 #include "manifest.h"

 #if CONFIG_SOC_INTEL_S1000
@ -70,7 +71,7 @@ static inline void bmemcpy(void *dest, void *src, size_t bytes)
 	for (i = 0; i < (bytes >> 2); i++)
 		d[i] = s[i];

-	SOC_DCACHE_FLUSH(dest, bytes);
+	z_xtensa_cache_flush(dest, bytes);
 }

 /* bzero used by bootloader */
@ -82,7 +83,7 @@ static inline void bbzero(void *dest, size_t bytes)
 	for (i = 0; i < (bytes >> 2); i++)
 		d[i] = 0;

-	SOC_DCACHE_FLUSH(dest, bytes);
+	z_xtensa_cache_flush(dest, bytes);
 }

 static void parse_module(struct sof_man_fw_header *hdr,
--- a/soc/xtensa/intel_adsp/common/include/adsp/cache.h
+++ b/soc/xtensa/intel_adsp/common/include/adsp/cache.h
@ -7,13 +7,13 @@
 #ifndef __COMMON_ADSP_CACHE_H__
 #define __COMMON_ADSP_CACHE_H__

-#include <xtensa/hal.h>
+#include <arch/xtensa/cache.h>

 /* macros for data cache operations */
 #define SOC_DCACHE_FLUSH(addr, size)		\
-	xthal_dcache_region_writeback((addr), (size))
+	z_xtensa_cache_flush((addr), (size))
 #define SOC_DCACHE_INVALIDATE(addr, size)	\
-	xthal_dcache_region_invalidate((addr), (size))
+	z_xtensa_cache_inv((addr), (size))

 /**
 * @brief Return uncached pointer to a RAM address
--- a/soc/xtensa/intel_adsp/common/soc_mp.c
+++ b/soc/xtensa/intel_adsp/common/soc_mp.c
@ -18,6 +18,7 @@
 LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);

 #include <soc.h>
+#include <arch/xtensa/cache.h>
 #include <adsp/io.h>

 #include <soc/shim.h>
@ -125,7 +126,7 @@ void z_mp_entry(void)
 	 * isn't using yet.  Manual inspection of generated code says
 	 * we're safe, but really we need a better solution here.
 	 */
-	xthal_dcache_all_writeback_inv();
+	z_xtensa_cache_flush_inv_all();

 	/* Copy over VECBASE from the main CPU for an initial value
 	 * (will need to revisit this if we ever allow a user API to
--- a/soc/xtensa/intel_s1000/soc.h
+++ b/soc/xtensa/intel_s1000/soc.h
@ -6,6 +6,8 @@
 #ifndef __INC_SOC_H
 #define __INC_SOC_H

+#include <arch/xtensa/cache.h>
+
 /* macros related to interrupt handling */
 #define XTENSA_IRQ_NUM_SHIFT			0
 #define CAVS_IRQ_NUM_SHIFT			8
@ -216,9 +218,9 @@ struct soc_global_regs {

 /* macros for data cache operations */
 #define SOC_DCACHE_FLUSH(addr, size)		\
-	xthal_dcache_region_writeback((addr), (size))
+	z_xtensa_cache_flush((addr), (size))
 #define SOC_DCACHE_INVALIDATE(addr, size)	\
-	xthal_dcache_region_invalidate((addr), (size))
+	z_xtensa_cache_inv((addr), (size))

 extern void z_soc_irq_enable(uint32_t irq);
 extern void z_soc_irq_disable(uint32_t irq);
--- a/soc/xtensa/intel_s1000/soc_mp.c
+++ b/soc/xtensa/intel_s1000/soc_mp.c
@ -10,6 +10,7 @@
 #include <kernel_structs.h>
 #include <sys/sys_io.h>
 #include <sys/__assert.h>
+#include <xtensa/corebits.h>

 #include <logging/log.h>
 LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);