diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 8b51ec570b5..8b3ee35a670 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -89,4 +89,21 @@ config XTENSA_SMALL_VECTOR_TABLE_ENTRY
 	  handlers to the end of vector table, renaming them to
 	  _Level\LVL\()VectorHelper.
 
+config XTENSA_CACHED_REGION
+	int "Cached RPO mapping"
+	range 0 7
+	help
+	  A design trick on multi-core hardware is to map memory twice
+	  so that it can be seen in both (incoherent) cached mappings
+	  and a coherent "shared" area.  This specifies which 512M
+	  region (0-7, as defined by the Xtensa Region Protection
+	  Option) contains the "cached" mapping.
+
+config XTENSA_UNCACHED_REGION
+	int "Uncached RPO mapping"
+	range 0 7
+	help
+	  As for XTENSA_CACHED_REGION, this specifies which 512M
+	  region (0-7) contains the "uncached" mapping.
+
 endmenu
diff --git a/arch/xtensa/include/kernel_arch_func.h b/arch/xtensa/include/kernel_arch_func.h
index b69f951adce..82a4f986c5b 100644
--- a/arch/xtensa/include/kernel_arch_func.h
+++ b/arch/xtensa/include/kernel_arch_func.h
@@ -58,18 +58,6 @@ static inline void arch_switch(void *switch_to, void **switched_from)
 	return xtensa_switch(switch_to, switched_from);
 }
 
-/* FIXME: we don't have a framework for including this from the SoC
- * layer, so we define it in the arch code here.
- */
-#if defined(CONFIG_SOC_FAMILY_INTEL_ADSP) && defined(CONFIG_KERNEL_COHERENCE)
-static inline bool arch_mem_coherent(void *ptr)
-{
-	size_t addr = (size_t) ptr;
-
-	return addr >= 0x80000000 && addr < 0xa0000000;
-}
-#endif
-
 #ifdef CONFIG_KERNEL_COHERENCE
 static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
 					     void *old_switch_handle,
diff --git a/include/arch/xtensa/cache.h b/include/arch/xtensa/cache.h
index 12dc2c60065..bfd8608a4b8 100644
--- a/include/arch/xtensa/cache.h
+++ b/include/arch/xtensa/cache.h
@@ -15,8 +15,9 @@ extern "C" {
 
 #define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)
 
-#if XCHAL_DCACHE_SIZE
 #define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0))
+
+#if XCHAL_DCACHE_SIZE
 BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
 BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
 #endif
@@ -78,6 +79,139 @@ static ALWAYS_INLINE void z_xtensa_cache_flush_inv_all(void)
 	z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
 }
 
+#ifdef CONFIG_ARCH_HAS_COHERENCE
+static inline bool arch_mem_coherent(void *ptr)
+{
+	size_t addr = (size_t) ptr;
+
+	return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION;
+}
+#endif
+
+static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom)
+{
+	/* The math here is all compile-time: when the two regions
+	 * differ by a power of two, we can convert between them by
+	 * setting or clearing just one bit.  Otherwise it needs two
+	 * operations.
+	 */
+	uint32_t rxor = (rto ^ rfrom) << 29;
+
+	rto <<= 29;
+	if (Z_IS_POW2(rxor)) {
+		if ((rxor & rto) == 0) {
+			return addr & ~rxor;
+		} else {
+			return addr | rxor;
+		}
+	} else {
+		return (addr & ~(7U << 29)) | rto;
+	}
+}
+
+/**
+ * @brief Return cached pointer to a RAM address
+ *
+ * The Xtensa coherence architecture maps addressable RAM twice, in
+ * two different 512MB regions whose L1 cache settings can be
+ * controlled independently.  So for any given pointer, it is possible
+ * to convert it to and from a cached version.
+ *
+ * This function takes a pointer to any addressible object (either in
+ * cacheable memory or not) and returns a pointer that can be used to
+ * refer to the same memory through the L1 data cache.  Data read
+ * through the resulting pointer will reflect locally cached values on
+ * the current CPU if they exist, and writes will go first into the
+ * cache and be written back later.
+ *
+ * @see arch_xtensa_uncached_ptr()
+ *
+ * @param ptr A pointer to a valid C object
+ * @return A pointer to the same object via the L1 dcache
+ */
+static inline void *arch_xtensa_cached_ptr(void *ptr)
+{
+	return (void *)z_xtrpoflip((uint32_t) ptr,
+				   CONFIG_XTENSA_CACHED_REGION,
+				   CONFIG_XTENSA_UNCACHED_REGION);
+}
+
+/**
+ * @brief Return uncached pointer to a RAM address
+ *
+ * The Xtensa coherence architecture maps addressable RAM twice, in
+ * two different 512MB regions whose L1 cache settings can be
+ * controlled independently.  So for any given pointer, it is possible
+ * to convert it to and from a cached version.
+ *
+ * This function takes a pointer to any addressible object (either in
+ * cacheable memory or not) and returns a pointer that can be used to
+ * refer to the same memory while bypassing the L1 data cache.  Data
+ * in the L1 cache will not be inspected nor modified by the access.
+ *
+ * @see arch_xtensa_cached_ptr()
+ *
+ * @param ptr A pointer to a valid C object
+ * @return A pointer to the same object bypassing the L1 dcache
+ */
+static inline void *arch_xtensa_uncached_ptr(void *ptr)
+{
+	return (void *)z_xtrpoflip((uint32_t) ptr,
+				   CONFIG_XTENSA_UNCACHED_REGION,
+				   CONFIG_XTENSA_CACHED_REGION);
+}
+
+/* Utility to generate an unrolled and optimal[1] code sequence to set
+ * the RPO TLB registers (contra the HAL cacheattr macros, which
+ * generate larger code and can't be called from C), based on the
+ * KERNEL_COHERENCE configuration in use.  Selects RPO attribute "2"
+ * for regions (including MMIO registers in region zero) which want to
+ * bypass L1, "4" for the cached region which wants writeback, and
+ * "15" (invalid) elsewhere.
+ *
+ * Note that on cores that have the "translation" option set, we need
+ * to put an identity mapping in the high bits.  Also per spec
+ * changing the current code region (by definition cached) requires
+ * that WITLB be followed by an ISYNC and that both instructions live
+ * in the same cache line (two 3-byte instructions fit in an 8-byte
+ * aligned region, so that's guaranteed not to cross a cache line
+ * boundary).
+ *
+ * [1] With the sole exception of gcc's infuriating insistence on
+ * emitting a precomputed literal for addr + addrincr instead of
+ * computing it with a single ADD instruction from values it already
+ * has in registers.  Explicitly assigning the variables to registers
+ * via an attribute works, but then emits needless MOV instructions
+ * instead.  I tell myself it's just 32 bytes of .text, but... Sigh.
+ */
+#define _REGION_ATTR(r)						\
+	((r) == 0 ? 2 :						\
+	 ((r) == CONFIG_XTENSA_CACHED_REGION ? 4 :		\
+	  ((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15)))
+
+#define _SET_ONE_TLB(region) do {				\
+	uint32_t attr = _REGION_ATTR(region);			\
+	if (XCHAL_HAVE_XLT_CACHEATTR) {				\
+		attr |= addr; /* RPO with translation */	\
+	}							\
+	if (region != CONFIG_XTENSA_CACHED_REGION) {		\
+		__asm__ volatile("wdtlb %0, %1; witlb %0, %1"	\
+				 :: "r"(attr), "r"(addr));	\
+	} else {						\
+		__asm__ volatile("wdtlb %0, %1"			\
+				 :: "r"(attr), "r"(addr));	\
+		__asm__ volatile("j 1f; .align 8; 1:");		\
+		__asm__ volatile("witlb %0, %1; isync"		\
+				 :: "r"(attr), "r"(addr));	\
+	}							\
+	addr += addrincr;					\
+} while (0)
+
+#define ARCH_XTENSA_SET_RPO_TLB() do {				\
+	register uint32_t addr = 0, addrincr = 0x20000000;	\
+	FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7);	\
+} while (0)
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/soc/xtensa/intel_adsp/Kconfig.defconfig b/soc/xtensa/intel_adsp/Kconfig.defconfig
index 76b060560f9..1e6889389a7 100644
--- a/soc/xtensa/intel_adsp/Kconfig.defconfig
+++ b/soc/xtensa/intel_adsp/Kconfig.defconfig
@@ -4,3 +4,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 source "soc/xtensa/intel_adsp/*/Kconfig.defconfig.series"
+
+# Lower priority defaults come AFTER the series-specific ones set above
+
+config XTENSA_CACHED_REGION
+	default 5
+
+config XTENSA_UNCACHED_REGION
+	default 4
diff --git a/soc/xtensa/intel_adsp/common/CMakeLists.txt b/soc/xtensa/intel_adsp/common/CMakeLists.txt
index 435e07482d8..ede0063f235 100644
--- a/soc/xtensa/intel_adsp/common/CMakeLists.txt
+++ b/soc/xtensa/intel_adsp/common/CMakeLists.txt
@@ -59,6 +59,7 @@ add_custom_target(
       copy ${CMAKE_BINARY_DIR}/zephyr/${KERNEL_NAME}.elf ${KERNEL_REMAPPED}
 
   COMMAND ${ELF_FIX} ${CMAKE_OBJCOPY} ${KERNEL_REMAPPED}
+      ${CONFIG_XTENSA_CACHED_REGION} ${CONFIG_XTENSA_UNCACHED_REGION}
 
   # Extract modules for rimage
   COMMAND ${CMAKE_OBJCOPY}
diff --git a/soc/xtensa/intel_adsp/common/fix_elf_addrs.py b/soc/xtensa/intel_adsp/common/fix_elf_addrs.py
index 3f60fd386b4..8b0c7c2a21c 100755
--- a/soc/xtensa/intel_adsp/common/fix_elf_addrs.py
+++ b/soc/xtensa/intel_adsp/common/fix_elf_addrs.py
@@ -3,13 +3,11 @@
 # Copyright (c) 2020 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-# ADSP devices have their RAM regions mapped twice, once in the 512MB
-# region from 0x80000000-0x9fffffff and again from
-# 0xa0000000-0xbfffffff.  The first mapping is set in the CPU to
-# bypass the L1 cache, and so access through pointers in that region
-# is coherent between CPUs (but slow).  The second region accesses the
-# same memory through the L1 cache and requires careful flushing when
-# used with shared data.
+# ADSP devices have their RAM regions mapped twice.  The first mapping
+# is set in the CPU to bypass the L1 cache, and so access through
+# pointers in that region is coherent between CPUs (but slow).  The
+# second region accesses the same memory through the L1 cache and
+# requires careful flushing when used with shared data.
 #
 # This distinction is exposed in the linker script, where some symbols
 # (e.g. stack regions) are linked into cached memory, but others
@@ -26,13 +24,19 @@ from elftools.elf.elffile import ELFFile
 
 objcopy_bin = sys.argv[1]
 elffile = sys.argv[2]
+cached_reg = int(sys.argv[3])
+uncached_reg = int(sys.argv[4])
+
+uc_min = uncached_reg << 29
+uc_max = uc_min | 0x1fffffff
+cache_off = "0x%x" % ((cached_reg - uncached_reg) << 29)
 
 fixup =[]
 with open(elffile, "rb") as fd:
     elf = ELFFile(fd)
     for s in elf.iter_sections():
         addr = s.header.sh_addr
-        if 0x80000000 <= addr < 0xa0000000:
+        if uc_min <= addr <= uc_max:
             print(f"fix_elf_addrs.py: Moving section {s.name} to cached SRAM region")
             fixup.append(s.name)
 
@@ -43,5 +47,5 @@ for s in fixup:
     # error (no --quiet option, no -Werror=no-whatever, nothing).
     # Just swallow the error stream for now pending rework to the
     # linker framework.
-    cmd = f"{objcopy_bin} --change-section-address {s}+0x20000000 {elffile} 2>/dev/null"
+    cmd = f"{objcopy_bin} --change-section-address {s}+{cache_off} {elffile} 2>/dev/null"
     os.system(cmd)
diff --git a/soc/xtensa/intel_adsp/common/include/cavs-link.ld b/soc/xtensa/intel_adsp/common/include/cavs-link.ld
index 2978594b25a..85804185960 100644
--- a/soc/xtensa/intel_adsp/common/include/cavs-link.ld
+++ b/soc/xtensa/intel_adsp/common/include/cavs-link.ld
@@ -25,31 +25,25 @@ OUTPUT_ARCH(xtensa)
 
 ENTRY(rom_entry);
 
-/* DSP RAM regions (all of them) are mapped twice on the DSP: once in
- * a 512MB region from 0x80000000-0x9fffffff and again from
- * 0xa0000000-0xbfffffff.  The first mapping is set up to bypass the
- * L1 cache, so it must be used when multiprocessor coherence is
- * desired, where the latter mapping is best used for processor-local
- * data (e.g. stacks) or shared data that is managed with explicit
- * cache flush/invalidate operations.
+/* DSP RAM regions (all of them) are mapped twice on the DSP.  One
+ * mapping is set up to bypass the L1 cache, so it must be used when
+ * multiprocessor coherence is desired, where the latter mapping is
+ * best used for processor-local data (e.g. stacks) or shared data
+ * that is managed with explicit cache flush/invalidate operations.
  *
  * These macros will set up a segment start address correctly,
  * including alignment to a cache line.  Be sure to also emit the
- * section to ">ram :ram_phdr" or ">ucram :ucram_phdr" as
- * appropriate. (Forgetting the correct PHDR will actually work, as
- * the output tooling ignores it, but it will cause the linker to emit
- * 512MB of unused data into the output file!)
- *
- * (Note clumsy syntax because XCC doesn't understand the "~" operator)
+ * section to ">ram" or ">ucram" as appropriate, to prevent the linker
+ * from filling in 512MB of sparse zeros.
  */
 #ifdef CONFIG_KERNEL_COHERENCE
-#define SEGSTART_CACHED   (ALIGN(64) | 0x20000000)
-#define SEGSTART_UNCACHED (ALIGN(64) & 0xdfffffff) /* == ~0x20000000 */
+#define RPO_SET(addr, reg) ((addr & 0x1fffffff) | (reg << 29))
+#define SEGSTART_CACHED   RPO_SET(ALIGN(64), CONFIG_XTENSA_CACHED_REGION)
+#define SEGSTART_UNCACHED RPO_SET(ALIGN(64), CONFIG_XTENSA_UNCACHED_REGION)
 #else
 #define SEGSTART_CACHED   .
 #define SEGSTART_UNCACHED .
 #define ucram ram
-#define ucram_phdr ram_phdr
 #endif
 
 /* intlist.ld needs an IDT_LIST memory region */
@@ -129,7 +123,7 @@ MEMORY {
 	len = RAM_SIZE
 #ifdef CONFIG_KERNEL_COHERENCE
   ucram :
-	org = RAM_BASE - 0x20000000,
+	org = RPO_SET(RAM_BASE, CONFIG_XTENSA_UNCACHED_REGION),
 	len = RAM_SIZE
 #endif
 #ifdef CONFIG_GEN_ISR_TABLES
diff --git a/soc/xtensa/intel_adsp/common/include/cpu_init.h b/soc/xtensa/intel_adsp/common/include/cpu_init.h
index 6ba83cda0c6..b0ce7de9ab4 100644
--- a/soc/xtensa/intel_adsp/common/include/cpu_init.h
+++ b/soc/xtensa/intel_adsp/common/include/cpu_init.h
@@ -4,6 +4,7 @@
 #ifndef __INTEL_ADSP_CPU_INIT_H
 #define __INTEL_ADSP_CPU_INIT_H
 
+#include <arch/xtensa/cache.h>
 #include <xtensa/config/core-isa.h>
 
 #define CxL1CCAP (*(volatile uint32_t *)0x9F080080)
@@ -14,39 +15,6 @@
 #define CxL1CCAP_DCMWC ((CxL1CCAP >> 16) & 7)
 #define CxL1CCAP_ICMWC ((CxL1CCAP >> 20) & 7)
 
-/* Utilities to generate an unwrapped code sequence to set the RPO TLB
- * registers.  Pass the 8 region attributes as arguments, e.g.:
- *
- *     SET_RPO_TLB(2, 15, 15, 15, 2, 4, 15, 15);
- *
- * Note that cAVS 1.5 has the "translation" option that we don't use,
- * but still need to put an identity mapping in the high bits.  Also
- * per spec changing the current code region requires that WITLB be
- * followed by an ISYNC and that both instructions live in the same
- * cache line (two 3-byte instructions fit in an 8-byte aligned
- * region, so that's guaranteed not to cross a caceh line boundary).
- */
-#define SET_ONE_TLB(region, att) do {					\
-	uint32_t addr = region * 0x20000000U, attr = att;		\
-	if (XCHAL_HAVE_XLT_CACHEATTR) {					\
-		attr |= addr; /* RPO with translation */		\
-	}								\
-	if (region != (L2_SRAM_BASE >> 29)) {				\
-		__asm__ volatile("wdtlb %0, %1; witlb %0, %1"		\
-				 :: "r"(attr), "r"(addr));		\
-	} else {							\
-		__asm__ volatile("wdtlb %0, %1"				\
-				 :: "r"(attr), "r"(addr));		\
-		__asm__ volatile("j 1f; .align 8; 1:");			\
-		__asm__ volatile("witlb %0, %1; isync"			\
-				 :: "r"(attr), "r"(addr));		\
-	}								\
-} while (0)
-
-#define SET_RPO_TLB(...) do {				\
-	FOR_EACH_IDX(SET_ONE_TLB, (;), __VA_ARGS__);	\
-} while (0)
-
 /* Low-level CPU initialization.  Call this immediately after entering
  * C code to initialize the cache, protection and synchronization
  * features.
@@ -98,15 +66,9 @@ static ALWAYS_INLINE void cpu_early_init(void)
 
 	/* Finally we need to enable the cache in the Region
 	 * Protection Option "TLB" entries.  The hardware defaults
-	 * have this set to RW/uncached (2) everywhere.  We want
-	 * writeback caching (4) in the sixth mapping (the second of
-	 * two RAM mappings) and to mark all unused regions
-	 * inaccessible (15) for safety.  Note that there is a HAL
-	 * routine that does this (by emulating the older "cacheattr"
-	 * hardware register), but it generates significantly larger
-	 * code.
+	 * have this set to RW/uncached everywhere.
 	 */
-	SET_RPO_TLB(2, 15, 15, 15, 2, 4, 15, 15);
+	ARCH_XTENSA_SET_RPO_TLB();
 
 	/* Initialize ATOMCTL: Hardware defaults for S32C1I use
 	 * "internal" operations, meaning they are atomic only WRT the
diff --git a/soc/xtensa/intel_adsp/common/include/soc.h b/soc/xtensa/intel_adsp/common/include/soc.h
index 26bcbede9ee..129df3f5453 100644
--- a/soc/xtensa/intel_adsp/common/include/soc.h
+++ b/soc/xtensa/intel_adsp/common/include/soc.h
@@ -80,54 +80,12 @@ extern void soc_start_core(int cpu_num);
 
 extern bool soc_cpus_active[CONFIG_MP_NUM_CPUS];
 
-/* Legacy SOC-level API still used in a few drivers */
+/* Legacy cache APIs still used in a few places */
 #define SOC_DCACHE_FLUSH(addr, size)		\
 	z_xtensa_cache_flush((addr), (size))
 #define SOC_DCACHE_INVALIDATE(addr, size)	\
 	z_xtensa_cache_inv((addr), (size))
-
-/**
- * @brief Return uncached pointer to a RAM address
- *
- * The Intel ADSP architecture maps all addressable RAM (of all types)
- * twice, in two different 512MB segments regions whose L1 cache
- * settings can be controlled independently.  So for any given
- * pointer, it is possible to convert it to and from a cached version.
- *
- * This function takes a pointer to any addressible object (either in
- * cacheable memory or not) and returns a pointer that can be used to
- * refer to the same memory while bypassing the L1 data cache.  Data
- * in the L1 cache will not be inspected nor modified by the access.
- *
- * @see z_soc_cached_ptr()
- *
- * @param p A pointer to a valid C object
- * @return A pointer to the same object bypassing the L1 dcache
- */
-static inline void *z_soc_uncached_ptr(void *p)
-{
-	return ((void *)(((size_t)p) & ~0x20000000));
-}
-
-/**
- * @brief Return cached pointer to a RAM address
- *
- * This function takes a pointer to any addressible object (either in
- * cacheable memory or not) and returns a pointer that can be used to
- * refer to the same memory through the L1 data cache.  Data read
- * through the resulting pointer will reflect locally cached values on
- * the current CPU if they exist, and writes will go first into the
- * cache and be written back later.
- *
- * @see z_soc_uncached_ptr()
- *
- * @param p A pointer to a valid C object
- * @return A pointer to the same object via the L1 dcache
-
- */
-static inline void *z_soc_cached_ptr(void *p)
-{
-	return ((void *)(((size_t)p) | 0x20000000));
-}
+#define z_soc_cached_ptr(p) arch_xtensa_cached_ptr(p)
+#define z_soc_uncached_ptr(p) arch_xtensa_uncached_ptr(p)
 
 #endif /* __INC_SOC_H */