arch/xtensa: Promote adsp RPO/cache utilities to an arch API
This is trick (mapping RAM twice so you can use alternate Region Protection Option addresses to control cacheability) is something any Xtensa hardware designer might productively choose to do. And as it works really well, we should encourage that by making this a generic architecture feature for Zephyr. Now everything works by setting two kconfig values at the soc level defining the cached and uncached regions. As long as these are correct, you can then use the new arch_xtensa_un/cached_ptr() APIs to convert between them and a ARCH_XTENSA_SET_RPO_TLB() macro that provides much smaller initialization code (in C!) than the HAL assembly macros. The conversion routines have been generalized to support conversion between any two regions. Note that full KERNEL_COHERENCE still requires support from the platform linker script, that can't be made generic given the way Zephyr does linkage. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
parent
6aa3d0c72f
commit
97ada8bc04
9 changed files with 191 additions and 125 deletions
|
@ -89,4 +89,21 @@ config XTENSA_SMALL_VECTOR_TABLE_ENTRY
|
||||||
handlers to the end of vector table, renaming them to
|
handlers to the end of vector table, renaming them to
|
||||||
_Level\LVL\()VectorHelper.
|
_Level\LVL\()VectorHelper.
|
||||||
|
|
||||||
|
config XTENSA_CACHED_REGION
|
||||||
|
int "Cached RPO mapping"
|
||||||
|
range 0 7
|
||||||
|
help
|
||||||
|
A design trick on multi-core hardware is to map memory twice
|
||||||
|
so that it can be seen in both (incoherent) cached mappings
|
||||||
|
and a coherent "shared" area. This specifies which 512M
|
||||||
|
region (0-7, as defined by the Xtensa Region Protection
|
||||||
|
Option) contains the "cached" mapping.
|
||||||
|
|
||||||
|
config XTENSA_UNCACHED_REGION
|
||||||
|
int "Uncached RPO mapping"
|
||||||
|
range 0 7
|
||||||
|
help
|
||||||
|
As for XTENSA_CACHED_REGION, this specifies which 512M
|
||||||
|
region (0-7) contains the "uncached" mapping.
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
|
|
@ -58,18 +58,6 @@ static inline void arch_switch(void *switch_to, void **switched_from)
|
||||||
return xtensa_switch(switch_to, switched_from);
|
return xtensa_switch(switch_to, switched_from);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIXME: we don't have a framework for including this from the SoC
|
|
||||||
* layer, so we define it in the arch code here.
|
|
||||||
*/
|
|
||||||
#if defined(CONFIG_SOC_FAMILY_INTEL_ADSP) && defined(CONFIG_KERNEL_COHERENCE)
|
|
||||||
static inline bool arch_mem_coherent(void *ptr)
|
|
||||||
{
|
|
||||||
size_t addr = (size_t) ptr;
|
|
||||||
|
|
||||||
return addr >= 0x80000000 && addr < 0xa0000000;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_KERNEL_COHERENCE
|
#ifdef CONFIG_KERNEL_COHERENCE
|
||||||
static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
|
static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
|
||||||
void *old_switch_handle,
|
void *old_switch_handle,
|
||||||
|
|
|
@ -15,8 +15,9 @@ extern "C" {
|
||||||
|
|
||||||
#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)
|
#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)
|
||||||
|
|
||||||
#if XCHAL_DCACHE_SIZE
|
|
||||||
#define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0))
|
#define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0))
|
||||||
|
|
||||||
|
#if XCHAL_DCACHE_SIZE
|
||||||
BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
|
BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
|
||||||
BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
|
BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
|
||||||
#endif
|
#endif
|
||||||
|
@ -78,6 +79,139 @@ static ALWAYS_INLINE void z_xtensa_cache_flush_inv_all(void)
|
||||||
z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
|
z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_COHERENCE
|
||||||
|
static inline bool arch_mem_coherent(void *ptr)
|
||||||
|
{
|
||||||
|
size_t addr = (size_t) ptr;
|
||||||
|
|
||||||
|
return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom)
|
||||||
|
{
|
||||||
|
/* The math here is all compile-time: when the two regions
|
||||||
|
* differ by a power of two, we can convert between them by
|
||||||
|
* setting or clearing just one bit. Otherwise it needs two
|
||||||
|
* operations.
|
||||||
|
*/
|
||||||
|
uint32_t rxor = (rto ^ rfrom) << 29;
|
||||||
|
|
||||||
|
rto <<= 29;
|
||||||
|
if (Z_IS_POW2(rxor)) {
|
||||||
|
if ((rxor & rto) == 0) {
|
||||||
|
return addr & ~rxor;
|
||||||
|
} else {
|
||||||
|
return addr | rxor;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return (addr & ~(7U << 29)) | rto;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return cached pointer to a RAM address
|
||||||
|
*
|
||||||
|
* The Xtensa coherence architecture maps addressable RAM twice, in
|
||||||
|
* two different 512MB regions whose L1 cache settings can be
|
||||||
|
* controlled independently. So for any given pointer, it is possible
|
||||||
|
* to convert it to and from a cached version.
|
||||||
|
*
|
||||||
|
* This function takes a pointer to any addressible object (either in
|
||||||
|
* cacheable memory or not) and returns a pointer that can be used to
|
||||||
|
* refer to the same memory through the L1 data cache. Data read
|
||||||
|
* through the resulting pointer will reflect locally cached values on
|
||||||
|
* the current CPU if they exist, and writes will go first into the
|
||||||
|
* cache and be written back later.
|
||||||
|
*
|
||||||
|
* @see arch_xtensa_uncached_ptr()
|
||||||
|
*
|
||||||
|
* @param ptr A pointer to a valid C object
|
||||||
|
* @return A pointer to the same object via the L1 dcache
|
||||||
|
*/
|
||||||
|
static inline void *arch_xtensa_cached_ptr(void *ptr)
|
||||||
|
{
|
||||||
|
return (void *)z_xtrpoflip((uint32_t) ptr,
|
||||||
|
CONFIG_XTENSA_CACHED_REGION,
|
||||||
|
CONFIG_XTENSA_UNCACHED_REGION);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return uncached pointer to a RAM address
|
||||||
|
*
|
||||||
|
* The Xtensa coherence architecture maps addressable RAM twice, in
|
||||||
|
* two different 512MB regions whose L1 cache settings can be
|
||||||
|
* controlled independently. So for any given pointer, it is possible
|
||||||
|
* to convert it to and from a cached version.
|
||||||
|
*
|
||||||
|
* This function takes a pointer to any addressible object (either in
|
||||||
|
* cacheable memory or not) and returns a pointer that can be used to
|
||||||
|
* refer to the same memory while bypassing the L1 data cache. Data
|
||||||
|
* in the L1 cache will not be inspected nor modified by the access.
|
||||||
|
*
|
||||||
|
* @see arch_xtensa_cached_ptr()
|
||||||
|
*
|
||||||
|
* @param ptr A pointer to a valid C object
|
||||||
|
* @return A pointer to the same object bypassing the L1 dcache
|
||||||
|
*/
|
||||||
|
static inline void *arch_xtensa_uncached_ptr(void *ptr)
|
||||||
|
{
|
||||||
|
return (void *)z_xtrpoflip((uint32_t) ptr,
|
||||||
|
CONFIG_XTENSA_UNCACHED_REGION,
|
||||||
|
CONFIG_XTENSA_CACHED_REGION);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Utility to generate an unrolled and optimal[1] code sequence to set
|
||||||
|
* the RPO TLB registers (contra the HAL cacheattr macros, which
|
||||||
|
* generate larger code and can't be called from C), based on the
|
||||||
|
* KERNEL_COHERENCE configuration in use. Selects RPO attribute "2"
|
||||||
|
* for regions (including MMIO registers in region zero) which want to
|
||||||
|
* bypass L1, "4" for the cached region which wants writeback, and
|
||||||
|
* "15" (invalid) elsewhere.
|
||||||
|
*
|
||||||
|
* Note that on cores that have the "translation" option set, we need
|
||||||
|
* to put an identity mapping in the high bits. Also per spec
|
||||||
|
* changing the current code region (by definition cached) requires
|
||||||
|
* that WITLB be followed by an ISYNC and that both instructions live
|
||||||
|
* in the same cache line (two 3-byte instructions fit in an 8-byte
|
||||||
|
* aligned region, so that's guaranteed not to cross a cache line
|
||||||
|
* boundary).
|
||||||
|
*
|
||||||
|
* [1] With the sole exception of gcc's infuriating insistence on
|
||||||
|
* emitting a precomputed literal for addr + addrincr instead of
|
||||||
|
* computing it with a single ADD instruction from values it already
|
||||||
|
* has in registers. Explicitly assigning the variables to registers
|
||||||
|
* via an attribute works, but then emits needless MOV instructions
|
||||||
|
* instead. I tell myself it's just 32 bytes of .text, but... Sigh.
|
||||||
|
*/
|
||||||
|
#define _REGION_ATTR(r) \
|
||||||
|
((r) == 0 ? 2 : \
|
||||||
|
((r) == CONFIG_XTENSA_CACHED_REGION ? 4 : \
|
||||||
|
((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15)))
|
||||||
|
|
||||||
|
#define _SET_ONE_TLB(region) do { \
|
||||||
|
uint32_t attr = _REGION_ATTR(region); \
|
||||||
|
if (XCHAL_HAVE_XLT_CACHEATTR) { \
|
||||||
|
attr |= addr; /* RPO with translation */ \
|
||||||
|
} \
|
||||||
|
if (region != CONFIG_XTENSA_CACHED_REGION) { \
|
||||||
|
__asm__ volatile("wdtlb %0, %1; witlb %0, %1" \
|
||||||
|
:: "r"(attr), "r"(addr)); \
|
||||||
|
} else { \
|
||||||
|
__asm__ volatile("wdtlb %0, %1" \
|
||||||
|
:: "r"(attr), "r"(addr)); \
|
||||||
|
__asm__ volatile("j 1f; .align 8; 1:"); \
|
||||||
|
__asm__ volatile("witlb %0, %1; isync" \
|
||||||
|
:: "r"(attr), "r"(addr)); \
|
||||||
|
} \
|
||||||
|
addr += addrincr; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define ARCH_XTENSA_SET_RPO_TLB() do { \
|
||||||
|
register uint32_t addr = 0, addrincr = 0x20000000; \
|
||||||
|
FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -4,3 +4,11 @@
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
source "soc/xtensa/intel_adsp/*/Kconfig.defconfig.series"
|
source "soc/xtensa/intel_adsp/*/Kconfig.defconfig.series"
|
||||||
|
|
||||||
|
# Lower priority defaults come AFTER the series-specific ones set above
|
||||||
|
|
||||||
|
config XTENSA_CACHED_REGION
|
||||||
|
default 5
|
||||||
|
|
||||||
|
config XTENSA_UNCACHED_REGION
|
||||||
|
default 4
|
||||||
|
|
|
@ -59,6 +59,7 @@ add_custom_target(
|
||||||
copy ${CMAKE_BINARY_DIR}/zephyr/${KERNEL_NAME}.elf ${KERNEL_REMAPPED}
|
copy ${CMAKE_BINARY_DIR}/zephyr/${KERNEL_NAME}.elf ${KERNEL_REMAPPED}
|
||||||
|
|
||||||
COMMAND ${ELF_FIX} ${CMAKE_OBJCOPY} ${KERNEL_REMAPPED}
|
COMMAND ${ELF_FIX} ${CMAKE_OBJCOPY} ${KERNEL_REMAPPED}
|
||||||
|
${CONFIG_XTENSA_CACHED_REGION} ${CONFIG_XTENSA_UNCACHED_REGION}
|
||||||
|
|
||||||
# Extract modules for rimage
|
# Extract modules for rimage
|
||||||
COMMAND ${CMAKE_OBJCOPY}
|
COMMAND ${CMAKE_OBJCOPY}
|
||||||
|
|
|
@ -3,13 +3,11 @@
|
||||||
# Copyright (c) 2020 Intel Corporation
|
# Copyright (c) 2020 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
# ADSP devices have their RAM regions mapped twice, once in the 512MB
|
# ADSP devices have their RAM regions mapped twice. The first mapping
|
||||||
# region from 0x80000000-0x9fffffff and again from
|
# is set in the CPU to bypass the L1 cache, and so access through
|
||||||
# 0xa0000000-0xbfffffff. The first mapping is set in the CPU to
|
# pointers in that region is coherent between CPUs (but slow). The
|
||||||
# bypass the L1 cache, and so access through pointers in that region
|
# second region accesses the same memory through the L1 cache and
|
||||||
# is coherent between CPUs (but slow). The second region accesses the
|
# requires careful flushing when used with shared data.
|
||||||
# same memory through the L1 cache and requires careful flushing when
|
|
||||||
# used with shared data.
|
|
||||||
#
|
#
|
||||||
# This distinction is exposed in the linker script, where some symbols
|
# This distinction is exposed in the linker script, where some symbols
|
||||||
# (e.g. stack regions) are linked into cached memory, but others
|
# (e.g. stack regions) are linked into cached memory, but others
|
||||||
|
@ -26,13 +24,19 @@ from elftools.elf.elffile import ELFFile
|
||||||
|
|
||||||
objcopy_bin = sys.argv[1]
|
objcopy_bin = sys.argv[1]
|
||||||
elffile = sys.argv[2]
|
elffile = sys.argv[2]
|
||||||
|
cached_reg = int(sys.argv[3])
|
||||||
|
uncached_reg = int(sys.argv[4])
|
||||||
|
|
||||||
|
uc_min = uncached_reg << 29
|
||||||
|
uc_max = uc_min | 0x1fffffff
|
||||||
|
cache_off = "0x%x" % ((cached_reg - uncached_reg) << 29)
|
||||||
|
|
||||||
fixup =[]
|
fixup =[]
|
||||||
with open(elffile, "rb") as fd:
|
with open(elffile, "rb") as fd:
|
||||||
elf = ELFFile(fd)
|
elf = ELFFile(fd)
|
||||||
for s in elf.iter_sections():
|
for s in elf.iter_sections():
|
||||||
addr = s.header.sh_addr
|
addr = s.header.sh_addr
|
||||||
if 0x80000000 <= addr < 0xa0000000:
|
if uc_min <= addr <= uc_max:
|
||||||
print(f"fix_elf_addrs.py: Moving section {s.name} to cached SRAM region")
|
print(f"fix_elf_addrs.py: Moving section {s.name} to cached SRAM region")
|
||||||
fixup.append(s.name)
|
fixup.append(s.name)
|
||||||
|
|
||||||
|
@ -43,5 +47,5 @@ for s in fixup:
|
||||||
# error (no --quiet option, no -Werror=no-whatever, nothing).
|
# error (no --quiet option, no -Werror=no-whatever, nothing).
|
||||||
# Just swallow the error stream for now pending rework to the
|
# Just swallow the error stream for now pending rework to the
|
||||||
# linker framework.
|
# linker framework.
|
||||||
cmd = f"{objcopy_bin} --change-section-address {s}+0x20000000 {elffile} 2>/dev/null"
|
cmd = f"{objcopy_bin} --change-section-address {s}+{cache_off} {elffile} 2>/dev/null"
|
||||||
os.system(cmd)
|
os.system(cmd)
|
||||||
|
|
|
@ -25,31 +25,25 @@ OUTPUT_ARCH(xtensa)
|
||||||
|
|
||||||
ENTRY(rom_entry);
|
ENTRY(rom_entry);
|
||||||
|
|
||||||
/* DSP RAM regions (all of them) are mapped twice on the DSP: once in
|
/* DSP RAM regions (all of them) are mapped twice on the DSP. One
|
||||||
* a 512MB region from 0x80000000-0x9fffffff and again from
|
* mapping is set up to bypass the L1 cache, so it must be used when
|
||||||
* 0xa0000000-0xbfffffff. The first mapping is set up to bypass the
|
* multiprocessor coherence is desired, where the latter mapping is
|
||||||
* L1 cache, so it must be used when multiprocessor coherence is
|
* best used for processor-local data (e.g. stacks) or shared data
|
||||||
* desired, where the latter mapping is best used for processor-local
|
* that is managed with explicit cache flush/invalidate operations.
|
||||||
* data (e.g. stacks) or shared data that is managed with explicit
|
|
||||||
* cache flush/invalidate operations.
|
|
||||||
*
|
*
|
||||||
* These macros will set up a segment start address correctly,
|
* These macros will set up a segment start address correctly,
|
||||||
* including alignment to a cache line. Be sure to also emit the
|
* including alignment to a cache line. Be sure to also emit the
|
||||||
* section to ">ram :ram_phdr" or ">ucram :ucram_phdr" as
|
* section to ">ram" or ">ucram" as appropriate, to prevent the linker
|
||||||
* appropriate. (Forgetting the correct PHDR will actually work, as
|
* from filling in 512MB of sparse zeros.
|
||||||
* the output tooling ignores it, but it will cause the linker to emit
|
|
||||||
* 512MB of unused data into the output file!)
|
|
||||||
*
|
|
||||||
* (Note clumsy syntax because XCC doesn't understand the "~" operator)
|
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_KERNEL_COHERENCE
|
#ifdef CONFIG_KERNEL_COHERENCE
|
||||||
#define SEGSTART_CACHED (ALIGN(64) | 0x20000000)
|
#define RPO_SET(addr, reg) ((addr & 0x1fffffff) | (reg << 29))
|
||||||
#define SEGSTART_UNCACHED (ALIGN(64) & 0xdfffffff) /* == ~0x20000000 */
|
#define SEGSTART_CACHED RPO_SET(ALIGN(64), CONFIG_XTENSA_CACHED_REGION)
|
||||||
|
#define SEGSTART_UNCACHED RPO_SET(ALIGN(64), CONFIG_XTENSA_UNCACHED_REGION)
|
||||||
#else
|
#else
|
||||||
#define SEGSTART_CACHED .
|
#define SEGSTART_CACHED .
|
||||||
#define SEGSTART_UNCACHED .
|
#define SEGSTART_UNCACHED .
|
||||||
#define ucram ram
|
#define ucram ram
|
||||||
#define ucram_phdr ram_phdr
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* intlist.ld needs an IDT_LIST memory region */
|
/* intlist.ld needs an IDT_LIST memory region */
|
||||||
|
@ -129,7 +123,7 @@ MEMORY {
|
||||||
len = RAM_SIZE
|
len = RAM_SIZE
|
||||||
#ifdef CONFIG_KERNEL_COHERENCE
|
#ifdef CONFIG_KERNEL_COHERENCE
|
||||||
ucram :
|
ucram :
|
||||||
org = RAM_BASE - 0x20000000,
|
org = RPO_SET(RAM_BASE, CONFIG_XTENSA_UNCACHED_REGION),
|
||||||
len = RAM_SIZE
|
len = RAM_SIZE
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_GEN_ISR_TABLES
|
#ifdef CONFIG_GEN_ISR_TABLES
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#ifndef __INTEL_ADSP_CPU_INIT_H
|
#ifndef __INTEL_ADSP_CPU_INIT_H
|
||||||
#define __INTEL_ADSP_CPU_INIT_H
|
#define __INTEL_ADSP_CPU_INIT_H
|
||||||
|
|
||||||
|
#include <arch/xtensa/cache.h>
|
||||||
#include <xtensa/config/core-isa.h>
|
#include <xtensa/config/core-isa.h>
|
||||||
|
|
||||||
#define CxL1CCAP (*(volatile uint32_t *)0x9F080080)
|
#define CxL1CCAP (*(volatile uint32_t *)0x9F080080)
|
||||||
|
@ -14,39 +15,6 @@
|
||||||
#define CxL1CCAP_DCMWC ((CxL1CCAP >> 16) & 7)
|
#define CxL1CCAP_DCMWC ((CxL1CCAP >> 16) & 7)
|
||||||
#define CxL1CCAP_ICMWC ((CxL1CCAP >> 20) & 7)
|
#define CxL1CCAP_ICMWC ((CxL1CCAP >> 20) & 7)
|
||||||
|
|
||||||
/* Utilities to generate an unwrapped code sequence to set the RPO TLB
|
|
||||||
* registers. Pass the 8 region attributes as arguments, e.g.:
|
|
||||||
*
|
|
||||||
* SET_RPO_TLB(2, 15, 15, 15, 2, 4, 15, 15);
|
|
||||||
*
|
|
||||||
* Note that cAVS 1.5 has the "translation" option that we don't use,
|
|
||||||
* but still need to put an identity mapping in the high bits. Also
|
|
||||||
* per spec changing the current code region requires that WITLB be
|
|
||||||
* followed by an ISYNC and that both instructions live in the same
|
|
||||||
* cache line (two 3-byte instructions fit in an 8-byte aligned
|
|
||||||
* region, so that's guaranteed not to cross a caceh line boundary).
|
|
||||||
*/
|
|
||||||
#define SET_ONE_TLB(region, att) do { \
|
|
||||||
uint32_t addr = region * 0x20000000U, attr = att; \
|
|
||||||
if (XCHAL_HAVE_XLT_CACHEATTR) { \
|
|
||||||
attr |= addr; /* RPO with translation */ \
|
|
||||||
} \
|
|
||||||
if (region != (L2_SRAM_BASE >> 29)) { \
|
|
||||||
__asm__ volatile("wdtlb %0, %1; witlb %0, %1" \
|
|
||||||
:: "r"(attr), "r"(addr)); \
|
|
||||||
} else { \
|
|
||||||
__asm__ volatile("wdtlb %0, %1" \
|
|
||||||
:: "r"(attr), "r"(addr)); \
|
|
||||||
__asm__ volatile("j 1f; .align 8; 1:"); \
|
|
||||||
__asm__ volatile("witlb %0, %1; isync" \
|
|
||||||
:: "r"(attr), "r"(addr)); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define SET_RPO_TLB(...) do { \
|
|
||||||
FOR_EACH_IDX(SET_ONE_TLB, (;), __VA_ARGS__); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/* Low-level CPU initialization. Call this immediately after entering
|
/* Low-level CPU initialization. Call this immediately after entering
|
||||||
* C code to initialize the cache, protection and synchronization
|
* C code to initialize the cache, protection and synchronization
|
||||||
* features.
|
* features.
|
||||||
|
@ -98,15 +66,9 @@ static ALWAYS_INLINE void cpu_early_init(void)
|
||||||
|
|
||||||
/* Finally we need to enable the cache in the Region
|
/* Finally we need to enable the cache in the Region
|
||||||
* Protection Option "TLB" entries. The hardware defaults
|
* Protection Option "TLB" entries. The hardware defaults
|
||||||
* have this set to RW/uncached (2) everywhere. We want
|
* have this set to RW/uncached everywhere.
|
||||||
* writeback caching (4) in the sixth mapping (the second of
|
|
||||||
* two RAM mappings) and to mark all unused regions
|
|
||||||
* inaccessible (15) for safety. Note that there is a HAL
|
|
||||||
* routine that does this (by emulating the older "cacheattr"
|
|
||||||
* hardware register), but it generates significantly larger
|
|
||||||
* code.
|
|
||||||
*/
|
*/
|
||||||
SET_RPO_TLB(2, 15, 15, 15, 2, 4, 15, 15);
|
ARCH_XTENSA_SET_RPO_TLB();
|
||||||
|
|
||||||
/* Initialize ATOMCTL: Hardware defaults for S32C1I use
|
/* Initialize ATOMCTL: Hardware defaults for S32C1I use
|
||||||
* "internal" operations, meaning they are atomic only WRT the
|
* "internal" operations, meaning they are atomic only WRT the
|
||||||
|
|
|
@ -80,54 +80,12 @@ extern void soc_start_core(int cpu_num);
|
||||||
|
|
||||||
extern bool soc_cpus_active[CONFIG_MP_NUM_CPUS];
|
extern bool soc_cpus_active[CONFIG_MP_NUM_CPUS];
|
||||||
|
|
||||||
/* Legacy SOC-level API still used in a few drivers */
|
/* Legacy cache APIs still used in a few places */
|
||||||
#define SOC_DCACHE_FLUSH(addr, size) \
|
#define SOC_DCACHE_FLUSH(addr, size) \
|
||||||
z_xtensa_cache_flush((addr), (size))
|
z_xtensa_cache_flush((addr), (size))
|
||||||
#define SOC_DCACHE_INVALIDATE(addr, size) \
|
#define SOC_DCACHE_INVALIDATE(addr, size) \
|
||||||
z_xtensa_cache_inv((addr), (size))
|
z_xtensa_cache_inv((addr), (size))
|
||||||
|
#define z_soc_cached_ptr(p) arch_xtensa_cached_ptr(p)
|
||||||
/**
|
#define z_soc_uncached_ptr(p) arch_xtensa_uncached_ptr(p)
|
||||||
* @brief Return uncached pointer to a RAM address
|
|
||||||
*
|
|
||||||
* The Intel ADSP architecture maps all addressable RAM (of all types)
|
|
||||||
* twice, in two different 512MB segments regions whose L1 cache
|
|
||||||
* settings can be controlled independently. So for any given
|
|
||||||
* pointer, it is possible to convert it to and from a cached version.
|
|
||||||
*
|
|
||||||
* This function takes a pointer to any addressible object (either in
|
|
||||||
* cacheable memory or not) and returns a pointer that can be used to
|
|
||||||
* refer to the same memory while bypassing the L1 data cache. Data
|
|
||||||
* in the L1 cache will not be inspected nor modified by the access.
|
|
||||||
*
|
|
||||||
* @see z_soc_cached_ptr()
|
|
||||||
*
|
|
||||||
* @param p A pointer to a valid C object
|
|
||||||
* @return A pointer to the same object bypassing the L1 dcache
|
|
||||||
*/
|
|
||||||
static inline void *z_soc_uncached_ptr(void *p)
|
|
||||||
{
|
|
||||||
return ((void *)(((size_t)p) & ~0x20000000));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Return cached pointer to a RAM address
|
|
||||||
*
|
|
||||||
* This function takes a pointer to any addressible object (either in
|
|
||||||
* cacheable memory or not) and returns a pointer that can be used to
|
|
||||||
* refer to the same memory through the L1 data cache. Data read
|
|
||||||
* through the resulting pointer will reflect locally cached values on
|
|
||||||
* the current CPU if they exist, and writes will go first into the
|
|
||||||
* cache and be written back later.
|
|
||||||
*
|
|
||||||
* @see z_soc_uncached_ptr()
|
|
||||||
*
|
|
||||||
* @param p A pointer to a valid C object
|
|
||||||
* @return A pointer to the same object via the L1 dcache
|
|
||||||
|
|
||||||
*/
|
|
||||||
static inline void *z_soc_cached_ptr(void *p)
|
|
||||||
{
|
|
||||||
return ((void *)(((size_t)p) | 0x20000000));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __INC_SOC_H */
|
#endif /* __INC_SOC_H */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue