arch/xtensa: Add non-HAL caching primitives

The Xtensa L1 cache layer has straightforward semantics accessible via
single-instructions that operate on cache lines via physical
addresses.  These are very amenable to inlining.

Unfortunately the Xtensa HAL layer requires function calls to do this,
leading to significant code waste at the calling site, an extra frame
on the stack and needless runtime instructions for situations where
the call is over a constant region that could elide the loop.  This is
made even worse because the HAL library is not built with
-ffunction-sections, so pulling in even one of these tiny cache
functions has the effect of importing a 1500-byte object file into the
link!

Add our own tiny cache layer to include/arch/xtensa/cache.h and use
that instead.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
Andy Ross 2021-02-14 16:09:43 -08:00 committed by Anas Nashif
commit 64cf33952d
10 changed files with 105 additions and 19 deletions

View file

@ -61,7 +61,7 @@ void *xtensa_init_stack(struct k_thread *thread, int *stack_top,
ret = &bsa[-9];
#ifdef CONFIG_KERNEL_COHERENCE
xthal_dcache_region_writeback(ret, (char *)stack_top - (char *)ret);
z_xtensa_cache_flush(ret, (char *)stack_top - (char *)ret);
#endif
return ret;
}

View file

@ -13,6 +13,7 @@
#ifndef _ASMLANGUAGE
#include <kernel_internal.h>
#include <string.h>
#include <arch/xtensa/cache.h>
#ifdef __cplusplus
extern "C" {
@ -31,7 +32,7 @@ static ALWAYS_INLINE void arch_kernel_init(void)
/* Make sure we don't have live data for unexpected cached
* regions due to boot firmware
*/
xthal_dcache_all_writeback_inv();
z_xtensa_cache_flush_inv_all();
#endif
cpu0->nested = 0;
@ -82,7 +83,7 @@ static inline void arch_cohere_stacks(struct k_thread *old_thread,
size_t nsz = new_thread->stack_info.size;
size_t nsp = (size_t) new_thread->switch_handle;
xthal_dcache_region_invalidate((void *)nsp, (nstack + nsz) - nsp);
z_xtensa_cache_inv((void *)nsp, (nstack + nsz) - nsp);
/* FIXME: dummy initializion threads don't have stack info set
* up and explode the logic above. Find a way to get this
@ -98,8 +99,7 @@ static inline void arch_cohere_stacks(struct k_thread *old_thread,
* calculate the boundary for it.
*/
if (old_switch_handle != NULL) {
xthal_dcache_region_writeback((void *)osp,
(ostack + osz) - osp);
z_xtensa_cache_flush((void *)osp, (ostack + osz) - osp);
} else {
/* FIXME: hardcoding EXCSAVE3 is bad, should be
* configurable a-la XTENSA_KERNEL_CPU_PTR_SR.

View file

@ -0,0 +1,81 @@
/*
* Copyright 2021 Intel Corporation
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
#define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
#include <xtensa/config/core-isa.h>
#include <sys/util.h>
#ifdef __cplusplus
extern "C" {
#endif
#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)
#if XCHAL_DCACHE_SIZE
#define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0))
BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
#endif
static inline void z_xtensa_cache_flush(void *addr, size_t bytes)
{
#if XCHAL_DCACHE_SIZE
size_t step = XCHAL_DCACHE_LINESIZE;
size_t first = ROUND_DOWN(addr, step);
size_t last = ROUND_UP(((long)addr) + bytes, step);
for (size_t line = first; bytes && line < last; line += step) {
__asm__ volatile("dhwb %0, 0" :: "r"(line));
}
#endif
}
static inline void z_xtensa_cache_flush_inv(void *addr, size_t bytes)
{
#if XCHAL_DCACHE_SIZE
size_t step = XCHAL_DCACHE_LINESIZE;
size_t first = ROUND_DOWN(addr, step);
size_t last = ROUND_UP(((long)addr) + bytes, step);
for (size_t line = first; bytes && line < last; line += step) {
__asm__ volatile("dhwbi %0, 0" :: "r"(line));
}
#endif
}
static inline void z_xtensa_cache_inv(void *addr, size_t bytes)
{
#if XCHAL_DCACHE_SIZE
size_t step = XCHAL_DCACHE_LINESIZE;
size_t first = ROUND_DOWN(addr, step);
size_t last = ROUND_UP(((long)addr) + bytes, step);
for (size_t line = first; bytes && line < last; line += step) {
__asm__ volatile("dhi %0, 0" :: "r"(line));
}
#endif
}
static inline void z_xtensa_cache_inv_all(void)
{
z_xtensa_cache_inv(NULL, Z_DCACHE_MAX);
}
static inline void z_xtensa_cache_flush_all(void)
{
z_xtensa_cache_flush(NULL, Z_DCACHE_MAX);
}
static inline void z_xtensa_cache_flush_inv_all(void)
{
z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */

View file

@ -15,6 +15,7 @@
#include <soc/shim.h>
#include <adsp/io.h>
#include <soc.h>
#include <arch/xtensa/cache.h>
#include "manifest.h"
#if CONFIG_SOC_INTEL_S1000
@ -70,7 +71,7 @@ static inline void bmemcpy(void *dest, void *src, size_t bytes)
for (i = 0; i < (bytes >> 2); i++)
d[i] = s[i];
SOC_DCACHE_FLUSH(dest, bytes);
z_xtensa_cache_flush(dest, bytes);
}
/* bzero used by bootloader */
@ -82,7 +83,7 @@ static inline void bbzero(void *dest, size_t bytes)
for (i = 0; i < (bytes >> 2); i++)
d[i] = 0;
SOC_DCACHE_FLUSH(dest, bytes);
z_xtensa_cache_flush(dest, bytes);
}
static void parse_module(struct sof_man_fw_header *hdr,

View file

@ -7,13 +7,13 @@
#ifndef __COMMON_ADSP_CACHE_H__
#define __COMMON_ADSP_CACHE_H__
#include <xtensa/hal.h>
#include <arch/xtensa/cache.h>
/* macros for data cache operations */
#define SOC_DCACHE_FLUSH(addr, size) \
xthal_dcache_region_writeback((addr), (size))
z_xtensa_cache_flush((addr), (size))
#define SOC_DCACHE_INVALIDATE(addr, size) \
xthal_dcache_region_invalidate((addr), (size))
z_xtensa_cache_inv((addr), (size))
/**
* @brief Return uncached pointer to a RAM address

View file

@ -18,6 +18,7 @@
LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);
#include <soc.h>
#include <arch/xtensa/cache.h>
#include <adsp/io.h>
#include <soc/shim.h>
@ -125,7 +126,7 @@ void z_mp_entry(void)
* isn't using yet. Manual inspection of generated code says
* we're safe, but really we need a better solution here.
*/
xthal_dcache_all_writeback_inv();
z_xtensa_cache_flush_inv_all();
/* Copy over VECBASE from the main CPU for an initial value
* (will need to revisit this if we ever allow a user API to

View file

@ -6,6 +6,8 @@
#ifndef __INC_SOC_H
#define __INC_SOC_H
#include <arch/xtensa/cache.h>
/* macros related to interrupt handling */
#define XTENSA_IRQ_NUM_SHIFT 0
#define CAVS_IRQ_NUM_SHIFT 8
@ -216,9 +218,9 @@ struct soc_global_regs {
/* macros for data cache operations */
#define SOC_DCACHE_FLUSH(addr, size) \
xthal_dcache_region_writeback((addr), (size))
z_xtensa_cache_flush((addr), (size))
#define SOC_DCACHE_INVALIDATE(addr, size) \
xthal_dcache_region_invalidate((addr), (size))
z_xtensa_cache_inv((addr), (size))
extern void z_soc_irq_enable(uint32_t irq);
extern void z_soc_irq_disable(uint32_t irq);

View file

@ -10,6 +10,7 @@
#include <kernel_structs.h>
#include <sys/sys_io.h>
#include <sys/__assert.h>
#include <xtensa/corebits.h>
#include <logging/log.h>
LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);

View file

@ -51,7 +51,7 @@ static void cache_flush_test(void)
}
LOG_INF("Flushing cache to commit contents to main memory ...");
xthal_dcache_region_writeback(cached_buffer->flush,
z_xtensa_cache_flush(cached_buffer->flush,
CACHE_TEST_BUFFER_SIZE);
LOG_INF("Comparing contents of cached memory vs main memory ...");
@ -80,7 +80,7 @@ static void cache_invalidation_test(void)
}
LOG_INF("Invalidating cache to read contents from main memory ...");
xthal_dcache_region_invalidate(cached_buffer->invalidate,
z_xtensa_cache_inv(cached_buffer->invalidate,
CACHE_TEST_BUFFER_SIZE);
LOG_INF("Comparing contents of cached memory vs main memory ...");

View file

@ -190,10 +190,10 @@ static int test_task(uint32_t chan_id, uint32_t blen, uint32_t block_count)
printk("*** timed out waiting for dma to complete ***\n");
}
xthal_dcache_region_invalidate(rx_data, RX_BUFF_SIZE);
xthal_dcache_region_invalidate(rx_data2, RX_BUFF_SIZE);
xthal_dcache_region_invalidate(rx_data3, RX_BUFF_SIZE);
xthal_dcache_region_invalidate(rx_data4, RX_BUFF_SIZE);
z_xtensa_cache_inv(rx_data, RX_BUFF_SIZE);
z_xtensa_cache_inv(rx_data2, RX_BUFF_SIZE);
z_xtensa_cache_inv(rx_data3, RX_BUFF_SIZE);
z_xtensa_cache_inv(rx_data4, RX_BUFF_SIZE);
/* Intentionally break has been omitted (fall-through) */
switch (block_count) {