From 64cf33952da37bd0a497b87aff047dd9615f0fc5 Mon Sep 17 00:00:00 2001 From: Andy Ross Date: Sun, 14 Feb 2021 16:09:43 -0800 Subject: [PATCH] arch/xtensa: Add non-HAL caching primitives The Xtensa L1 cache layer has straightforward semantics accessible via single-instructions that operate on cache lines via physical addresses. These are very amenable to inlining. Unfortunately the Xtensa HAL layer requires function calls to do this, leading to significant code waste at the calling site, an extra frame on the stack and needless runtime instructions for situations where the call is over a constant region that could elide the loop. This is made even worse because the HAL library is not built with -ffunction-sections, so pulling in even one of these tiny cache functions has the effect of importing a 1500-byte object file into the link! Add our own tiny cache layer to include/arch/xtensa/cache.h and use that instead. Signed-off-by: Andy Ross --- arch/xtensa/core/xtensa-asm2.c | 2 +- arch/xtensa/include/kernel_arch_func.h | 8 +- include/arch/xtensa/cache.h | 81 +++++++++++++++++++ .../common/bootloader/boot_loader.c | 5 +- .../intel_adsp/common/include/adsp/cache.h | 6 +- soc/xtensa/intel_adsp/common/soc_mp.c | 3 +- soc/xtensa/intel_s1000/soc.h | 6 +- soc/xtensa/intel_s1000/soc_mp.c | 1 + .../intel_s1000_crb/cache/src/cache_test.c | 4 +- .../intel_s1000_crb/main/src/dma_test.c | 8 +- 10 files changed, 105 insertions(+), 19 deletions(-) create mode 100644 include/arch/xtensa/cache.h diff --git a/arch/xtensa/core/xtensa-asm2.c b/arch/xtensa/core/xtensa-asm2.c index 6c40dde067b..46636b53fc3 100644 --- a/arch/xtensa/core/xtensa-asm2.c +++ b/arch/xtensa/core/xtensa-asm2.c @@ -61,7 +61,7 @@ void *xtensa_init_stack(struct k_thread *thread, int *stack_top, ret = &bsa[-9]; #ifdef CONFIG_KERNEL_COHERENCE - xthal_dcache_region_writeback(ret, (char *)stack_top - (char *)ret); + z_xtensa_cache_flush(ret, (char *)stack_top - (char *)ret); #endif return ret; } diff --git a/arch/xtensa/include/kernel_arch_func.h b/arch/xtensa/include/kernel_arch_func.h index 53c6661c3e5..607d78ecd9c 100644 --- a/arch/xtensa/include/kernel_arch_func.h +++ b/arch/xtensa/include/kernel_arch_func.h @@ -13,6 +13,7 @@ #ifndef _ASMLANGUAGE #include #include +#include #ifdef __cplusplus extern "C" { @@ -31,7 +32,7 @@ static ALWAYS_INLINE void arch_kernel_init(void) /* Make sure we don't have live data for unexpected cached * regions due to boot firmware */ - xthal_dcache_all_writeback_inv(); + z_xtensa_cache_flush_inv_all(); #endif cpu0->nested = 0; @@ -82,7 +83,7 @@ static inline void arch_cohere_stacks(struct k_thread *old_thread, size_t nsz = new_thread->stack_info.size; size_t nsp = (size_t) new_thread->switch_handle; - xthal_dcache_region_invalidate((void *)nsp, (nstack + nsz) - nsp); + z_xtensa_cache_inv((void *)nsp, (nstack + nsz) - nsp); /* FIXME: dummy initializion threads don't have stack info set * up and explode the logic above. Find a way to get this @@ -98,8 +99,7 @@ static inline void arch_cohere_stacks(struct k_thread *old_thread, * calculate the boundary for it. */ if (old_switch_handle != NULL) { - xthal_dcache_region_writeback((void *)osp, - (ostack + osz) - osp); + z_xtensa_cache_flush((void *)osp, (ostack + osz) - osp); } else { /* FIXME: hardcoding EXCSAVE3 is bad, should be * configurable a-la XTENSA_KERNEL_CPU_PTR_SR. diff --git a/include/arch/xtensa/cache.h b/include/arch/xtensa/cache.h new file mode 100644 index 00000000000..ba89079c273 --- /dev/null +++ b/include/arch/xtensa/cache.h @@ -0,0 +1,81 @@ +/* + * Copyright 2021 Intel Corporation + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ +#define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS) + +#if XCHAL_DCACHE_SIZE +#define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0)) +BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE)); +BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX)); +#endif + +static inline void z_xtensa_cache_flush(void *addr, size_t bytes) +{ +#if XCHAL_DCACHE_SIZE + size_t step = XCHAL_DCACHE_LINESIZE; + size_t first = ROUND_DOWN(addr, step); + size_t last = ROUND_UP(((long)addr) + bytes, step); + + for (size_t line = first; bytes && line < last; line += step) { + __asm__ volatile("dhwb %0, 0" :: "r"(line)); + } +#endif +} + +static inline void z_xtensa_cache_flush_inv(void *addr, size_t bytes) +{ +#if XCHAL_DCACHE_SIZE + size_t step = XCHAL_DCACHE_LINESIZE; + size_t first = ROUND_DOWN(addr, step); + size_t last = ROUND_UP(((long)addr) + bytes, step); + + for (size_t line = first; bytes && line < last; line += step) { + __asm__ volatile("dhwbi %0, 0" :: "r"(line)); + } +#endif +} + +static inline void z_xtensa_cache_inv(void *addr, size_t bytes) +{ +#if XCHAL_DCACHE_SIZE + size_t step = XCHAL_DCACHE_LINESIZE; + size_t first = ROUND_DOWN(addr, step); + size_t last = ROUND_UP(((long)addr) + bytes, step); + + for (size_t line = first; bytes && line < last; line += step) { + __asm__ volatile("dhi %0, 0" :: "r"(line)); + } +#endif +} + +static inline void z_xtensa_cache_inv_all(void) +{ + z_xtensa_cache_inv(NULL, Z_DCACHE_MAX); +} + +static inline void z_xtensa_cache_flush_all(void) +{ + z_xtensa_cache_flush(NULL, Z_DCACHE_MAX); +} + +static inline void z_xtensa_cache_flush_inv_all(void) +{ + z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */ diff --git a/soc/xtensa/intel_adsp/common/bootloader/boot_loader.c b/soc/xtensa/intel_adsp/common/bootloader/boot_loader.c index 13919c09306..f6e63c70bd3 100644 --- a/soc/xtensa/intel_adsp/common/bootloader/boot_loader.c +++ b/soc/xtensa/intel_adsp/common/bootloader/boot_loader.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "manifest.h" #if CONFIG_SOC_INTEL_S1000 @@ -70,7 +71,7 @@ static inline void bmemcpy(void *dest, void *src, size_t bytes) for (i = 0; i < (bytes >> 2); i++) d[i] = s[i]; - SOC_DCACHE_FLUSH(dest, bytes); + z_xtensa_cache_flush(dest, bytes); } /* bzero used by bootloader */ @@ -82,7 +83,7 @@ static inline void bbzero(void *dest, size_t bytes) for (i = 0; i < (bytes >> 2); i++) d[i] = 0; - SOC_DCACHE_FLUSH(dest, bytes); + z_xtensa_cache_flush(dest, bytes); } static void parse_module(struct sof_man_fw_header *hdr, diff --git a/soc/xtensa/intel_adsp/common/include/adsp/cache.h b/soc/xtensa/intel_adsp/common/include/adsp/cache.h index ea7c66f456b..6758de55780 100644 --- a/soc/xtensa/intel_adsp/common/include/adsp/cache.h +++ b/soc/xtensa/intel_adsp/common/include/adsp/cache.h @@ -7,13 +7,13 @@ #ifndef __COMMON_ADSP_CACHE_H__ #define __COMMON_ADSP_CACHE_H__ -#include +#include /* macros for data cache operations */ #define SOC_DCACHE_FLUSH(addr, size) \ - xthal_dcache_region_writeback((addr), (size)) + z_xtensa_cache_flush((addr), (size)) #define SOC_DCACHE_INVALIDATE(addr, size) \ - xthal_dcache_region_invalidate((addr), (size)) + z_xtensa_cache_inv((addr), (size)) /** * @brief Return uncached pointer to a RAM address diff --git a/soc/xtensa/intel_adsp/common/soc_mp.c b/soc/xtensa/intel_adsp/common/soc_mp.c index 6dac17b7d9d..710b1821e28 100644 --- a/soc/xtensa/intel_adsp/common/soc_mp.c +++ b/soc/xtensa/intel_adsp/common/soc_mp.c @@ -18,6 +18,7 @@ LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL); #include +#include #include #include @@ -125,7 +126,7 @@ void z_mp_entry(void) * isn't using yet. Manual inspection of generated code says * we're safe, but really we need a better solution here. */ - xthal_dcache_all_writeback_inv(); + z_xtensa_cache_flush_inv_all(); /* Copy over VECBASE from the main CPU for an initial value * (will need to revisit this if we ever allow a user API to diff --git a/soc/xtensa/intel_s1000/soc.h b/soc/xtensa/intel_s1000/soc.h index 7c9eafdcbbf..620ecd6fb3d 100644 --- a/soc/xtensa/intel_s1000/soc.h +++ b/soc/xtensa/intel_s1000/soc.h @@ -6,6 +6,8 @@ #ifndef __INC_SOC_H #define __INC_SOC_H +#include + /* macros related to interrupt handling */ #define XTENSA_IRQ_NUM_SHIFT 0 #define CAVS_IRQ_NUM_SHIFT 8 @@ -216,9 +218,9 @@ struct soc_global_regs { /* macros for data cache operations */ #define SOC_DCACHE_FLUSH(addr, size) \ - xthal_dcache_region_writeback((addr), (size)) + z_xtensa_cache_flush((addr), (size)) #define SOC_DCACHE_INVALIDATE(addr, size) \ - xthal_dcache_region_invalidate((addr), (size)) + z_xtensa_cache_inv((addr), (size)) extern void z_soc_irq_enable(uint32_t irq); extern void z_soc_irq_disable(uint32_t irq); diff --git a/soc/xtensa/intel_s1000/soc_mp.c b/soc/xtensa/intel_s1000/soc_mp.c index b48ba7de09a..876e5a28021 100644 --- a/soc/xtensa/intel_s1000/soc_mp.c +++ b/soc/xtensa/intel_s1000/soc_mp.c @@ -10,6 +10,7 @@ #include #include #include +#include #include LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL); diff --git a/tests/boards/intel_s1000_crb/cache/src/cache_test.c b/tests/boards/intel_s1000_crb/cache/src/cache_test.c index d249f9badb1..0f3d46b1516 100644 --- a/tests/boards/intel_s1000_crb/cache/src/cache_test.c +++ b/tests/boards/intel_s1000_crb/cache/src/cache_test.c @@ -51,7 +51,7 @@ static void cache_flush_test(void) } LOG_INF("Flushing cache to commit contents to main memory ..."); - xthal_dcache_region_writeback(cached_buffer->flush, + z_xtensa_cache_flush(cached_buffer->flush, CACHE_TEST_BUFFER_SIZE); LOG_INF("Comparing contents of cached memory vs main memory ..."); @@ -80,7 +80,7 @@ static void cache_invalidation_test(void) } LOG_INF("Invalidating cache to read contents from main memory ..."); - xthal_dcache_region_invalidate(cached_buffer->invalidate, + z_xtensa_cache_inv(cached_buffer->invalidate, CACHE_TEST_BUFFER_SIZE); LOG_INF("Comparing contents of cached memory vs main memory ..."); diff --git a/tests/boards/intel_s1000_crb/main/src/dma_test.c b/tests/boards/intel_s1000_crb/main/src/dma_test.c index ea742bef6c2..029b4ed9793 100644 --- a/tests/boards/intel_s1000_crb/main/src/dma_test.c +++ b/tests/boards/intel_s1000_crb/main/src/dma_test.c @@ -190,10 +190,10 @@ static int test_task(uint32_t chan_id, uint32_t blen, uint32_t block_count) printk("*** timed out waiting for dma to complete ***\n"); } - xthal_dcache_region_invalidate(rx_data, RX_BUFF_SIZE); - xthal_dcache_region_invalidate(rx_data2, RX_BUFF_SIZE); - xthal_dcache_region_invalidate(rx_data3, RX_BUFF_SIZE); - xthal_dcache_region_invalidate(rx_data4, RX_BUFF_SIZE); + z_xtensa_cache_inv(rx_data, RX_BUFF_SIZE); + z_xtensa_cache_inv(rx_data2, RX_BUFF_SIZE); + z_xtensa_cache_inv(rx_data3, RX_BUFF_SIZE); + z_xtensa_cache_inv(rx_data4, RX_BUFF_SIZE); /* Intentionally break has been omitted (fall-through) */ switch (block_count) {