arch/xtensa: Add non-HAL caching primitives
The Xtensa L1 cache layer has straightforward semantics accessible via single-instructions that operate on cache lines via physical addresses. These are very amenable to inlining. Unfortunately the Xtensa HAL layer requires function calls to do this, leading to significant code waste at the calling site, an extra frame on the stack and needless runtime instructions for situations where the call is over a constant region that could elide the loop. This is made even worse because the HAL library is not built with -ffunction-sections, so pulling in even one of these tiny cache functions has the effect of importing a 1500-byte object file into the link! Add our own tiny cache layer to include/arch/xtensa/cache.h and use that instead. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
parent
d0c538e9a2
commit
64cf33952d
10 changed files with 105 additions and 19 deletions
|
@ -61,7 +61,7 @@ void *xtensa_init_stack(struct k_thread *thread, int *stack_top,
|
|||
ret = &bsa[-9];
|
||||
|
||||
#ifdef CONFIG_KERNEL_COHERENCE
|
||||
xthal_dcache_region_writeback(ret, (char *)stack_top - (char *)ret);
|
||||
z_xtensa_cache_flush(ret, (char *)stack_top - (char *)ret);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#ifndef _ASMLANGUAGE
|
||||
#include <kernel_internal.h>
|
||||
#include <string.h>
|
||||
#include <arch/xtensa/cache.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -31,7 +32,7 @@ static ALWAYS_INLINE void arch_kernel_init(void)
|
|||
/* Make sure we don't have live data for unexpected cached
|
||||
* regions due to boot firmware
|
||||
*/
|
||||
xthal_dcache_all_writeback_inv();
|
||||
z_xtensa_cache_flush_inv_all();
|
||||
#endif
|
||||
|
||||
cpu0->nested = 0;
|
||||
|
@ -82,7 +83,7 @@ static inline void arch_cohere_stacks(struct k_thread *old_thread,
|
|||
size_t nsz = new_thread->stack_info.size;
|
||||
size_t nsp = (size_t) new_thread->switch_handle;
|
||||
|
||||
xthal_dcache_region_invalidate((void *)nsp, (nstack + nsz) - nsp);
|
||||
z_xtensa_cache_inv((void *)nsp, (nstack + nsz) - nsp);
|
||||
|
||||
/* FIXME: dummy initializion threads don't have stack info set
|
||||
* up and explode the logic above. Find a way to get this
|
||||
|
@ -98,8 +99,7 @@ static inline void arch_cohere_stacks(struct k_thread *old_thread,
|
|||
* calculate the boundary for it.
|
||||
*/
|
||||
if (old_switch_handle != NULL) {
|
||||
xthal_dcache_region_writeback((void *)osp,
|
||||
(ostack + osz) - osp);
|
||||
z_xtensa_cache_flush((void *)osp, (ostack + osz) - osp);
|
||||
} else {
|
||||
/* FIXME: hardcoding EXCSAVE3 is bad, should be
|
||||
* configurable a-la XTENSA_KERNEL_CPU_PTR_SR.
|
||||
|
|
81
include/arch/xtensa/cache.h
Normal file
81
include/arch/xtensa/cache.h
Normal file
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright 2021 Intel Corporation
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
#ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
|
||||
#define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
|
||||
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <sys/util.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)
|
||||
|
||||
#if XCHAL_DCACHE_SIZE
|
||||
#define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0))
|
||||
BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
|
||||
BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
|
||||
#endif
|
||||
|
||||
static inline void z_xtensa_cache_flush(void *addr, size_t bytes)
|
||||
{
|
||||
#if XCHAL_DCACHE_SIZE
|
||||
size_t step = XCHAL_DCACHE_LINESIZE;
|
||||
size_t first = ROUND_DOWN(addr, step);
|
||||
size_t last = ROUND_UP(((long)addr) + bytes, step);
|
||||
|
||||
for (size_t line = first; bytes && line < last; line += step) {
|
||||
__asm__ volatile("dhwb %0, 0" :: "r"(line));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void z_xtensa_cache_flush_inv(void *addr, size_t bytes)
|
||||
{
|
||||
#if XCHAL_DCACHE_SIZE
|
||||
size_t step = XCHAL_DCACHE_LINESIZE;
|
||||
size_t first = ROUND_DOWN(addr, step);
|
||||
size_t last = ROUND_UP(((long)addr) + bytes, step);
|
||||
|
||||
for (size_t line = first; bytes && line < last; line += step) {
|
||||
__asm__ volatile("dhwbi %0, 0" :: "r"(line));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void z_xtensa_cache_inv(void *addr, size_t bytes)
|
||||
{
|
||||
#if XCHAL_DCACHE_SIZE
|
||||
size_t step = XCHAL_DCACHE_LINESIZE;
|
||||
size_t first = ROUND_DOWN(addr, step);
|
||||
size_t last = ROUND_UP(((long)addr) + bytes, step);
|
||||
|
||||
for (size_t line = first; bytes && line < last; line += step) {
|
||||
__asm__ volatile("dhi %0, 0" :: "r"(line));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void z_xtensa_cache_inv_all(void)
|
||||
{
|
||||
z_xtensa_cache_inv(NULL, Z_DCACHE_MAX);
|
||||
}
|
||||
|
||||
static inline void z_xtensa_cache_flush_all(void)
|
||||
{
|
||||
z_xtensa_cache_flush(NULL, Z_DCACHE_MAX);
|
||||
}
|
||||
|
||||
static inline void z_xtensa_cache_flush_inv_all(void)
|
||||
{
|
||||
z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */
|
|
@ -15,6 +15,7 @@
|
|||
#include <soc/shim.h>
|
||||
#include <adsp/io.h>
|
||||
#include <soc.h>
|
||||
#include <arch/xtensa/cache.h>
|
||||
#include "manifest.h"
|
||||
|
||||
#if CONFIG_SOC_INTEL_S1000
|
||||
|
@ -70,7 +71,7 @@ static inline void bmemcpy(void *dest, void *src, size_t bytes)
|
|||
for (i = 0; i < (bytes >> 2); i++)
|
||||
d[i] = s[i];
|
||||
|
||||
SOC_DCACHE_FLUSH(dest, bytes);
|
||||
z_xtensa_cache_flush(dest, bytes);
|
||||
}
|
||||
|
||||
/* bzero used by bootloader */
|
||||
|
@ -82,7 +83,7 @@ static inline void bbzero(void *dest, size_t bytes)
|
|||
for (i = 0; i < (bytes >> 2); i++)
|
||||
d[i] = 0;
|
||||
|
||||
SOC_DCACHE_FLUSH(dest, bytes);
|
||||
z_xtensa_cache_flush(dest, bytes);
|
||||
}
|
||||
|
||||
static void parse_module(struct sof_man_fw_header *hdr,
|
||||
|
|
|
@ -7,13 +7,13 @@
|
|||
#ifndef __COMMON_ADSP_CACHE_H__
|
||||
#define __COMMON_ADSP_CACHE_H__
|
||||
|
||||
#include <xtensa/hal.h>
|
||||
#include <arch/xtensa/cache.h>
|
||||
|
||||
/* macros for data cache operations */
|
||||
#define SOC_DCACHE_FLUSH(addr, size) \
|
||||
xthal_dcache_region_writeback((addr), (size))
|
||||
z_xtensa_cache_flush((addr), (size))
|
||||
#define SOC_DCACHE_INVALIDATE(addr, size) \
|
||||
xthal_dcache_region_invalidate((addr), (size))
|
||||
z_xtensa_cache_inv((addr), (size))
|
||||
|
||||
/**
|
||||
* @brief Return uncached pointer to a RAM address
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);
|
||||
|
||||
#include <soc.h>
|
||||
#include <arch/xtensa/cache.h>
|
||||
#include <adsp/io.h>
|
||||
|
||||
#include <soc/shim.h>
|
||||
|
@ -125,7 +126,7 @@ void z_mp_entry(void)
|
|||
* isn't using yet. Manual inspection of generated code says
|
||||
* we're safe, but really we need a better solution here.
|
||||
*/
|
||||
xthal_dcache_all_writeback_inv();
|
||||
z_xtensa_cache_flush_inv_all();
|
||||
|
||||
/* Copy over VECBASE from the main CPU for an initial value
|
||||
* (will need to revisit this if we ever allow a user API to
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#ifndef __INC_SOC_H
|
||||
#define __INC_SOC_H
|
||||
|
||||
#include <arch/xtensa/cache.h>
|
||||
|
||||
/* macros related to interrupt handling */
|
||||
#define XTENSA_IRQ_NUM_SHIFT 0
|
||||
#define CAVS_IRQ_NUM_SHIFT 8
|
||||
|
@ -216,9 +218,9 @@ struct soc_global_regs {
|
|||
|
||||
/* macros for data cache operations */
|
||||
#define SOC_DCACHE_FLUSH(addr, size) \
|
||||
xthal_dcache_region_writeback((addr), (size))
|
||||
z_xtensa_cache_flush((addr), (size))
|
||||
#define SOC_DCACHE_INVALIDATE(addr, size) \
|
||||
xthal_dcache_region_invalidate((addr), (size))
|
||||
z_xtensa_cache_inv((addr), (size))
|
||||
|
||||
extern void z_soc_irq_enable(uint32_t irq);
|
||||
extern void z_soc_irq_disable(uint32_t irq);
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <kernel_structs.h>
|
||||
#include <sys/sys_io.h>
|
||||
#include <sys/__assert.h>
|
||||
#include <xtensa/corebits.h>
|
||||
|
||||
#include <logging/log.h>
|
||||
LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);
|
||||
|
|
|
@ -51,7 +51,7 @@ static void cache_flush_test(void)
|
|||
}
|
||||
|
||||
LOG_INF("Flushing cache to commit contents to main memory ...");
|
||||
xthal_dcache_region_writeback(cached_buffer->flush,
|
||||
z_xtensa_cache_flush(cached_buffer->flush,
|
||||
CACHE_TEST_BUFFER_SIZE);
|
||||
|
||||
LOG_INF("Comparing contents of cached memory vs main memory ...");
|
||||
|
@ -80,7 +80,7 @@ static void cache_invalidation_test(void)
|
|||
}
|
||||
|
||||
LOG_INF("Invalidating cache to read contents from main memory ...");
|
||||
xthal_dcache_region_invalidate(cached_buffer->invalidate,
|
||||
z_xtensa_cache_inv(cached_buffer->invalidate,
|
||||
CACHE_TEST_BUFFER_SIZE);
|
||||
|
||||
LOG_INF("Comparing contents of cached memory vs main memory ...");
|
||||
|
|
|
@ -190,10 +190,10 @@ static int test_task(uint32_t chan_id, uint32_t blen, uint32_t block_count)
|
|||
printk("*** timed out waiting for dma to complete ***\n");
|
||||
}
|
||||
|
||||
xthal_dcache_region_invalidate(rx_data, RX_BUFF_SIZE);
|
||||
xthal_dcache_region_invalidate(rx_data2, RX_BUFF_SIZE);
|
||||
xthal_dcache_region_invalidate(rx_data3, RX_BUFF_SIZE);
|
||||
xthal_dcache_region_invalidate(rx_data4, RX_BUFF_SIZE);
|
||||
z_xtensa_cache_inv(rx_data, RX_BUFF_SIZE);
|
||||
z_xtensa_cache_inv(rx_data2, RX_BUFF_SIZE);
|
||||
z_xtensa_cache_inv(rx_data3, RX_BUFF_SIZE);
|
||||
z_xtensa_cache_inv(rx_data4, RX_BUFF_SIZE);
|
||||
|
||||
/* Intentionally break has been omitted (fall-through) */
|
||||
switch (block_count) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue