diff --git a/arch/Kconfig b/arch/Kconfig
index 5ce7079e532..2d6ec48f2a0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -44,6 +44,7 @@ config X86
 	select ARCH_HAS_CUSTOM_SWAP_TO_MAIN if !X86_64
 	select ARCH_SUPPORTS_COREDUMP
 	select CPU_HAS_MMU
+	select ARCH_MEM_DOMAIN_DATA if USERSPACE
 	select ARCH_MEM_DOMAIN_SYNCHRONOUS_API if USERSPACE
 	select ARCH_HAS_GDBSTUB if !X86_64
 	select ARCH_HAS_TIMING_FUNCTIONS
diff --git a/arch/x86/core/ia32/userspace.S b/arch/x86/core/ia32/userspace.S
index 69c3b8e13c5..7c1277446c0 100644
--- a/arch/x86/core/ia32/userspace.S
+++ b/arch/x86/core/ia32/userspace.S
@@ -305,42 +305,18 @@ SECTION_FUNC(TEXT, z_x86_userspace_enter)
 	 * want to leak any information.
 	 */
 	mov	%edi, %esp
-#ifdef CONFIG_X86_PAE
-	/* Skip over the toplevel PDPT stored here */
-	subl	$0x20, %esp
-#endif /* CONFIG_X86_PAE */
 
-	/* Stash some registers we are going to need to erase the user
-	 * stack.
-	 */
+	/* Erase and enable US bit in page tables for the stack buffer */
 	push	%ecx
-	push	%edi
 	push	%eax
-
-	/* Compute size of user stack in 4-byte chunks and put in ECX */
-	mov	%ebx, %ecx
-	sub	%edi, %ecx
-	shr	$2, %ecx	/* Divide by 4 */
-
-#ifdef CONFIG_INIT_STACKS
-	mov	$0xAAAAAAAA, %eax
-#else
-	xor	%eax, %eax
-#endif
-	/* Copy 4 bytes of memory at a time, starting at ES:EDI, with whatever
-	 * is in EAX. Repeat this ECX times.  Stack sizes are always at least
-	 * 4-byte aligned.
-	 */
-	cld
-	rep stosl
-
-	/* Restore registers */
+	push	%edx
+	call	z_x86_current_stack_perms
+	pop	%edx
 	pop	%eax
-	pop	%edi
 	pop	%ecx
 
-	/* Now set stack pointer to the base of the user stack. Now that this
-	 * is set we won't need EBX any more.
+	/* Set stack pointer to the base of the freshly-erased user stack.
+	 * Now that this is set we won't need EBX any more.
 	 */
 	mov	%ebx, %esp
 
diff --git a/arch/x86/core/intel64/userspace.S b/arch/x86/core/intel64/userspace.S
index 33b12ca0cf0..8fd9297f9cf 100644
--- a/arch/x86/core/intel64/userspace.S
+++ b/arch/x86/core/intel64/userspace.S
@@ -286,24 +286,20 @@ z_x86_userspace_enter:
 	 */
 	movq	%r9, %rsp
 
-	/* Need RDI temporarily */
-	pushq	%rdi
-
-	/* Compute size of user stack in 8-byte chunks and put in RCX */
-	movq	%r9, %rdi	/* Start address for rep stosq in RDI */
-	movq	%r8, %rcx	/* Ending address */
-	subq	%rdi, %rcx	/* Subtract starting address */
-	shrq	$3, %rcx	/* Divide by 8 */
-
-	movq	$0xAAAAAAAAAAAAAAAA, %rax	/* Fill value */
-	/* Copy 8 bytes of memory at a time, starting at ES:RDI, with whatever
-	 * is in RAX. Repeat this RCX times.  Stack sizes are always at least
-	 * 8-byte aligned.
+	/* Push callee-saved regs and go back into C code to erase the stack
+	 * buffer and set US bit in page tables for it
 	 */
-	cld
-	rep stosq
-
+	pushq	%rdx
+	pushq	%rsi
+	pushq	%rdi
+	pushq	%r8
+	pushq	%r10
+	callq	z_x86_current_stack_perms
+	popq	%r10
+	popq	%r8
 	popq	%rdi
+	popq	%rsi
+	popq	%rdx
 
 	/* Reset to the beginning of the user stack */
 	movq	%r8, %rsp
diff --git a/arch/x86/core/userspace.c b/arch/x86/core/userspace.c
index 9c332b5f553..5ea2f4cbead 100644
--- a/arch/x86/core/userspace.c
+++ b/arch/x86/core/userspace.c
@@ -15,12 +15,11 @@
 /* Update the to the incoming thread's page table, and update the location of
  * the privilege elevation stack.
  *
- * May be called ONLY during context switch and when supervisor threads drop
- * synchronously to user mode. Hot code path!
+ * May be called ONLY during context switch. Hot code path!
  *
  * Nothing to do here if KPTI is enabled. We are in supervisor mode, so the
  * active page tables are the kernel's page tables. If the incoming thread is
- * in user mode we are going to switch CR3 to the thread-specific tables when
+ * in user mode we are going to switch CR3 to the domain-specific tables when
  * we go through z_x86_trampoline_to_user.
  *
  * We don't need to update the privilege mode initial stack pointer either,
@@ -33,18 +32,17 @@ void z_x86_swap_update_page_tables(struct k_thread *incoming)
 	uintptr_t ptables_phys;
 
 #ifndef CONFIG_X86_64
-	/* 64-bit uses syscall/sysret which switches stacks manually,
-	 * tss64.psp is updated unconditionally in __resume
+	/* Set initial stack pointer when elevating privileges from Ring 3
+	 * to Ring 0.
 	 */
-	if ((incoming->base.user_options & K_USER) != 0) {
-		_main_tss.esp0 = (uintptr_t)incoming->arch.psp;
-	}
+	_main_tss.esp0 = (uintptr_t)incoming->arch.psp;
 #endif
-
 	/* Check first that we actually need to do this, since setting
 	 * CR3 involves an expensive full TLB flush.
 	 */
 	ptables_phys = incoming->arch.ptables;
+	__ASSERT(ptables_phys != 0, "NULL page tables for thread %p\n",
+		 incoming);
 
 	if (ptables_phys != z_x86_cr3_get()) {
 		z_x86_cr3_set(ptables_phys);
@@ -52,23 +50,6 @@ void z_x86_swap_update_page_tables(struct k_thread *incoming)
 }
 #endif /* CONFIG_X86_KPTI */
 
-FUNC_NORETURN static void drop_to_user(k_thread_entry_t user_entry,
-				       void *p1, void *p2, void *p3)
-{
-	uint32_t stack_end;
-
-	/* Transition will reset stack pointer to initial, discarding
-	 * any old context since this is a one-way operation
-	 */
-	stack_end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
-				      _current->stack_info.size -
-				      _current->stack_info.delta);
-
-	z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
-			      _current->stack_info.start);
-	CODE_UNREACHABLE;
-}
-
 /* Preparation steps needed for all threads if user mode is turned on.
  *
  * Returns the initial entry point to swap into.
@@ -82,11 +63,15 @@ void *z_x86_userspace_prepare_thread(struct k_thread *thread)
 	thread->arch.psp =
 		header->privilege_stack + sizeof(header->privilege_stack);
 
+	/* Important this gets cleared, so that arch_mem_domain_* APIs
+	 * can distinguish between new threads, and threads migrating
+	 * between domains
+	 */
+	thread->arch.ptables = (uintptr_t)NULL;
+
 	if ((thread->base.user_options & K_USER) != 0U) {
-		z_x86_thread_pt_init(thread);
-		initial_entry = drop_to_user;
+		initial_entry = arch_user_mode_enter;
 	} else {
-		thread->arch.ptables = (uintptr_t)&z_x86_kernel_ptables;
 		initial_entry = z_thread_entry;
 	}
 
@@ -96,32 +81,16 @@ void *z_x86_userspace_prepare_thread(struct k_thread *thread)
 FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
 					void *p1, void *p2, void *p3)
 {
-	k_spinlock_key_t key;
+	uint32_t stack_end;
 
-	z_x86_thread_pt_init(_current);
-
-	key = k_spin_lock(&z_mem_domain_lock);
-	/* Apply memory domain configuration, if assigned. Threads that
-	 * started in user mode already had this done via z_setup_new_thread()
+	/* Transition will reset stack pointer to initial, discarding
+	 * any old context since this is a one-way operation
 	 */
-	z_x86_apply_mem_domain(_current, _current->mem_domain_info.mem_domain);
-	k_spin_unlock(&z_mem_domain_lock, key);
+	stack_end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
+				      _current->stack_info.size -
+				      _current->stack_info.delta);
 
-#ifndef CONFIG_X86_KPTI
-	/* We're synchronously dropping into user mode from a thread that
-	 * used to be in supervisor mode. K_USER flag has now been set, but
-	 * Need to swap from the kernel's page tables to the per-thread page
-	 * tables.
-	 *
-	 * Safe to update page tables from here, all tables are identity-
-	 * mapped and memory areas used before the ring 3 transition all
-	 * have the same attributes wrt supervisor mode access.
-	 *
-	 * Threads that started in user mode already had this applied on
-	 * initial context switch.
-	 */
-	z_x86_swap_update_page_tables(_current);
-#endif
-
-	drop_to_user(user_entry, p1, p2, p3);
+	z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
+			      _current->stack_info.start);
+	CODE_UNREACHABLE;
 }
diff --git a/arch/x86/core/x86_mmu.c b/arch/x86/core/x86_mmu.c
index 2914856da65..35d8ef6438d 100644
--- a/arch/x86/core/x86_mmu.c
+++ b/arch/x86/core/x86_mmu.c
@@ -21,9 +21,45 @@
 
 LOG_MODULE_DECLARE(os);
 
-#define ENTRY_RW	(MMU_RW | MMU_IGNORED0)
-#define ENTRY_US	(MMU_US | MMU_IGNORED1)
-#define ENTRY_XD	(MMU_XD | MMU_IGNORED2)
+/* We will use some ignored bits in the PTE to backup permission settings
+ * when the mapping was made. This is used to un-apply memory domain memory
+ * partitions to page tables when the partitions are removed.
+ */
+#define MMU_RW_ORIG	MMU_IGNORED0
+#define MMU_US_ORIG	MMU_IGNORED1
+#define MMU_XD_ORIG	MMU_IGNORED2
+
+/* Bits in the PTE that form the set of permission bits, when resetting */
+#define MASK_PERM	(MMU_RW | MMU_US | MMU_XD)
+
+/* When we want to set up a new mapping, discarding any previous state */
+#define MASK_ALL	(~((pentry_t)0U))
+
+/* Bits to set at mapping time for particular permissions. We set the actual
+ * page table bit effecting the policy and also the backup bit.
+ */
+#define ENTRY_RW	(MMU_RW | MMU_RW_ORIG)
+#define ENTRY_US	(MMU_US | MMU_US_ORIG)
+#define ENTRY_XD	(MMU_XD | MMU_XD_ORIG)
+
+/* Bit position which is always zero in a PTE. We'll use the PAT bit.
+ * This helps disambiguate PTEs that do not have the Present bit set (MMU_P):
+ * - If the entire entry is zero, it's an un-mapped virtual page
+ * - If MMU_PTE_ZERO is set, we flipped this page due to KPTI
+ * - Otherwise, this was a page-out
+ */
+#define PTE_ZERO	MMU_PAT
+
+/* Protects x86_domain_list and serializes any changes to page tables */
+static struct k_spinlock x86_mmu_lock;
+
+#ifdef CONFIG_USERSPACE
+/* List of all active and initialized memory domains. This is used to make
+ * sure all memory mappings are the same across all page tables when invoking
+ * range_map()
+ */
+static sys_slist_t x86_domain_list;
+#endif
 
 /* "dummy" pagetables for the first-phase build. The real page tables
  * are produced by gen-mmu.py based on data read in zephyr-prebuilt.elf,
@@ -177,10 +213,16 @@ static inline pentry_t *next_table(pentry_t entry, int level)
 	return (pentry_t *)(get_entry_phys(entry, level));
 }
 
+/* Number of table entries at this level */
+static inline size_t get_num_entries(int level)
+{
+	return paging_levels[level].entries;
+}
+
 /* 4K for everything except PAE PDPTs */
 static inline size_t table_size(int level)
 {
-	return paging_levels[level].entries * sizeof(pentry_t);
+	return get_num_entries(level) * sizeof(pentry_t);
 }
 
 /* For a table at a particular level, size of the amount of virtual memory
@@ -196,7 +238,7 @@ static inline size_t get_entry_scope(int level)
  */
 static inline size_t get_table_scope(int level)
 {
-	return get_entry_scope(level) * paging_levels[level].entries;
+	return get_entry_scope(level) * get_num_entries(level);
 }
 
 /* Must have checked Present bit first! Non-present entries may have OS data
@@ -224,6 +266,13 @@ static inline void tlb_flush_page(void *addr)
 	/* TODO: Need to implement TLB shootdown for SMP */
 }
 
+#ifdef CONFIG_X86_KPTI
+static inline bool is_flipped_pte(pentry_t pte)
+{
+	return (pte & MMU_P) == 0 && (pte & PTE_ZERO) != 0;
+}
+#endif
+
 #if defined(CONFIG_SMP)
 void z_x86_tlb_ipi(const void *arg)
 {
@@ -527,14 +576,13 @@ void z_x86_dump_mmu_flags(pentry_t *ptables, void *virt)
 }
 #endif /* CONFIG_EXCEPTION_DEBUG */
 
-/* Page allocation function prototype, passed to map_page() */
-typedef void * (*page_get_func_t)(void *);
-
 /*
  * Pool of free memory pages for creating new page tables, as needed.
  *
- * This is very crude, once obtained, pages may not be returned. Fine for
- * permanent kernel mappings.
+ * XXX: This is very crude, once obtained, pages may not be returned. Tuning
+ * the optimal value of CONFIG_X86_MMU_PAGE_POOL_PAGES is not intuitive,
+ * Better to have a kernel managed page pool of unused RAM that can be used for
+ * this, sbrk(), and other anonymous mappings. See #29526
  */
 static uint8_t __noinit
 	page_pool[CONFIG_MMU_PAGE_SIZE * CONFIG_X86_MMU_PAGE_POOL_PAGES]
@@ -542,26 +590,19 @@ static uint8_t __noinit
 
 static uint8_t *page_pos = page_pool + sizeof(page_pool);
 
-static struct k_spinlock pool_lock;
-
 /* Return a zeroed and suitably aligned memory page for page table data
  * from the global page pool
  */
-static void *page_pool_get(void *context)
+static void *page_pool_get(void)
 {
 	void *ret;
-	k_spinlock_key_t key;
 
-	ARG_UNUSED(context);
-
-	key = k_spin_lock(&pool_lock);
 	if (page_pos == page_pool) {
 		ret = NULL;
 	} else {
 		page_pos -= CONFIG_MMU_PAGE_SIZE;
 		ret = page_pos;
 	}
-	k_spin_unlock(&pool_lock, key);
 
 	if (ret != NULL) {
 		memset(ret, 0, CONFIG_MMU_PAGE_SIZE);
@@ -570,39 +611,111 @@ static void *page_pool_get(void *context)
 	return ret;
 }
 
+/* Reset permissions on a PTE to original state when the mapping was made */
+static inline pentry_t reset_pte(pentry_t old_val)
+{
+	pentry_t new_val;
+
+	/* Clear any existing state in permission bits */
+	new_val = old_val & (~K_MEM_PARTITION_PERM_MASK);
+
+	/* Now set permissions based on the stashed original values */
+	if ((old_val & MMU_RW_ORIG) != 0) {
+		new_val |= MMU_RW;
+	}
+	if ((old_val & MMU_US_ORIG) != 0) {
+		new_val |= MMU_US;
+	}
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+	if ((old_val & MMU_XD_ORIG) != 0) {
+		new_val |= MMU_XD;
+	}
+#endif
+	return new_val;
+}
+
+/* Wrapper functionsfor some gross stuff we have to do for Kernel
+ * page table isolation. If these are User mode page tables, the user bit
+ * isn't set, and this is not the shared page, all the bits in the PTE
+ * are flipped. This serves three purposes:
+ *  - The page isn't present, implementing page table isolation
+ *  - Flipping the physical address bits cheaply mitigates L1TF
+ *  - State is preserved; to get original PTE, just complement again
+ */
+static inline void set_leaf_entry(pentry_t *entryp, pentry_t val,
+				  bool user_table)
+{
+#ifdef CONFIG_X86_KPTI
+	/* Ram is identity-mapped, so phys=virt for this page */
+	uintptr_t shared_phys_addr = (uintptr_t)&z_shared_kernel_page_start;
+
+	if (user_table && (val & MMU_US) == 0 && (val & MMU_P) != 0 &&
+	    get_entry_phys(val, NUM_LEVELS - 1) != shared_phys_addr) {
+		*entryp = ~val;
+		return;
+	}
+#endif
+	*entryp = val;
+}
+
+/* Indicates that the target page tables will be used by user mode threads.
+ * This only has implications for CONFIG_X86_KPTI where user thread facing
+ * page tables need nearly all pages that don't have the US bit to also
+ * not be Present.
+ */
+#define OPTION_USER		BIT(0)
+
+/* Indicates that the operation requires TLBs to be flushed as we are altering
+ * existing mappings. Not needed for establishing new mappings
+ */
+#define OPTION_FLUSH		BIT(1)
+
+/* Indicates that each PTE's permission bits should be restored to their
+ * original state when the memory was mapped. All other bits in the PTE are
+ * preserved.
+ */
+#define OPTION_RESET		BIT(2)
+
+/* Indicates that allocations from the page pool are allowed to instantiate
+ * new paging structures. Only necessary when establishing new mappings
+ * and the entire address space isn't pre-allocated.
+ */
+#define OPTION_ALLOC		BIT(3)
+
 /**
- * Low-level mapping function
+ * Low level page table update function for a virtual page
  *
- * Walk the provided page tables until we get to the PTE for the provided
- * virtual address, and set that to whatever is in 'entry_val'.
+ * For the provided set of page tables, update the PTE associated with the
+ * virtual address to a new value, using the mask to control what bits
+ * need to be preserved.
  *
- * If memory must be drawn to instantiate page table memory, it will be
- * obtained from the provided get_page() function. The function must
- * return a page-aligned pointer to a page-sized block of zeroed memory.
- * All intermediate tables have hard-coded flags of INT_FLAGS.
+ * It is permitted to set up mappings without the Present bit set, in which
+ * case all other bits may be used for OS accounting.
  *
- * Presumes we want to map a minimally sized page of CONFIG_MMU_PAGE_SIZE.
- * No support for mapping big pages yet; unclear if we will ever need it given
- * Zephyr's typical use-cases.
+ * Must call this with x86_mmu_lock held.
  *
- * TODO: There may be opportunities to optimize page table walks such as this
- * by using recusrive page table mappings, see for example
- * https://os.phil-opp.com/paging-implementation/#recursive-page-tables
- * May also help if we need fast virtual-to-physical translation outside of
- * the permanent memory mapping area.
+ * Common mask values:
+ *  MASK_ALL  - Update all PTE bits. Exitsing state totally discarded.
+ *  MASK_PERM - Only update permission bits. All other bits and physical
+ *              mapping preserved.
  *
- * @param ptables Top-level page tables pointer
- * @param virt Virtual address to set mapping
- * @param entry_val Value to set PTE to
- * @param get_page Function to draw memory pages from
- * @param ctx Context pointer to pass to get_page()
- * @retval 0 success
- * @retval -ENOMEM get_page() failed
+ * @param ptables Page tables to modify
+ * @param virt Virtual page table entry to update
+ * @param entry_val Value to update in the PTE (ignored if OPTION_RESET)
+ * @param mask What bits to update in the PTE (ignored if OPTION_RESET)
+ * @param options Control options, described above
+ * @retval 0 Success
+ * @retval -ENOMEM allocation required and no free pages (only if OPTION_ALLOC)
  */
 static int page_map_set(pentry_t *ptables, void *virt, pentry_t entry_val,
-			page_get_func_t get_page, void *ctx)
+			pentry_t mask, uint32_t options)
 {
 	pentry_t *table = ptables;
+	bool user_table = (options & OPTION_USER) != 0U;
+	bool reset = (options & OPTION_RESET) != 0U;
+	bool flush = (options & OPTION_FLUSH) != 0U;
+
+	assert_virt_addr_aligned(virt);
 
 	for (int level = 0; level < NUM_LEVELS; level++) {
 		int index;
@@ -613,7 +726,24 @@ static int page_map_set(pentry_t *ptables, void *virt, pentry_t entry_val,
 
 		/* Check if we're a PTE */
 		if (level == (NUM_LEVELS - 1)) {
-			*entryp = entry_val;
+			pentry_t cur_pte = *entryp;
+			pentry_t new_pte;
+
+#ifdef CONFIG_X86_KPTI
+			if (is_flipped_pte(cur_pte)) {
+				/* Page was flipped for KPTI. Un-flip it */
+				cur_pte = ~cur_pte;
+			}
+#endif
+
+			if (reset) {
+				new_pte = reset_pte(cur_pte);
+			} else {
+				new_pte = ((cur_pte & ~mask) |
+					   (entry_val & mask));
+			}
+
+			set_leaf_entry(entryp, new_pte, user_table);
 			break;
 		}
 
@@ -622,12 +752,17 @@ static int page_map_set(pentry_t *ptables, void *virt, pentry_t entry_val,
 			/* Not present. Never done a mapping here yet, need
 			 * some RAM for linked tables
 			 */
-			void *new_table = get_page(ctx);
+			void *new_table;
+
+			__ASSERT((options & OPTION_ALLOC) != 0,
+				 "missing page table and allocations disabled");
+
+			new_table = page_pool_get();
 
 			if (new_table == NULL) {
 				return -ENOMEM;
 			}
-			*entryp = ((uintptr_t)new_table) | INT_FLAGS;
+			*entryp = ((pentry_t)(uintptr_t)new_table) | INT_FLAGS;
 			table = new_table;
 		} else {
 			/* We fail an assertion here due to no support for
@@ -641,9 +776,168 @@ static int page_map_set(pentry_t *ptables, void *virt, pentry_t entry_val,
 		}
 	}
 
+	if (flush) {
+		tlb_flush_page(virt);
+	}
+
 	return 0;
 }
 
+/**
+ * Map a physical region in a specific set of page tables.
+ *
+ * See documentation for page_map_set() for additional notes about masks and
+ * supported options.
+ *
+ * Must call this with x86_mmu_lock held.
+ *
+ * It is vital to remember that all virtual-to-physical mappings must be
+ * the same with respect to supervisor mode regardless of what thread is
+ * scheduled (and therefore, if multiple sets of page tables exist, which one
+ * is active).
+ *
+ * It is permitted to set up mappings without the Present bit set.
+ *
+ * @param ptables Page tables to modify
+ * @param virt Base page-aligned virtual memory address to map the region.
+ * @param phys Base page-aligned physical memory address for the region.
+ *        Ignored if OPTION_RESET. Also affected by the mask parameter. This
+ *        address is not directly examined, it will simply be programmed into
+ *        the PTE.
+ * @param size Size of the physical region to map
+ * @param entry_flags Non-address bits to set in every PTE. Ignored if
+ *        OPTION_RESET. Also affected by the mask parameter.
+ * @param mask What bits to update in each PTE. Un-set bits will never be
+ *        modified. Ignored if OPTION_RESET.
+ * @param options Control options, described above
+ * @retval 0 Success
+ * @retval -ENOMEM allocation required and no free pages (only if OPTION_ALLOC)
+ */
+static int range_map_ptables(pentry_t *ptables, void *virt, uintptr_t phys,
+			     size_t size, pentry_t entry_flags, pentry_t mask,
+			     uint32_t options)
+{
+	int ret;
+	bool reset = (options & OPTION_RESET) != 0U;
+
+	assert_addr_aligned(phys);
+	__ASSERT((size & (CONFIG_MMU_PAGE_SIZE - 1)) == 0U,
+		 "unaligned size %zu", size);
+	__ASSERT((entry_flags & paging_levels[0].mask) == 0U,
+		 "entry_flags " PRI_ENTRY " overlaps address area",
+		 entry_flags);
+
+	/* This implementation is stack-efficient but not particularly fast.
+	 * We do a full page table walk for every page we are updating.
+	 * Recursive approaches are possible, but use much more stack space.
+	 */
+	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
+		uint8_t *dest_virt = (uint8_t *)virt + offset;
+		pentry_t entry_val;
+
+		if (reset) {
+			entry_val = 0;
+		} else {
+			entry_val = (phys + offset) | entry_flags;
+		}
+
+		ret = page_map_set(ptables, dest_virt, entry_val, mask,
+				   options);
+		if (ret != 0) {
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Establish or update a memory mapping for all page tables
+ *
+ * The physical region noted from phys to phys + size will be mapped to
+ * an equal sized virtual region starting at virt, with the provided flags.
+ * The mask value denotes what bits in PTEs will actually be modified.
+ *
+ * See range_map_ptables() for additional details.
+ *
+ * @param virt Page-aligned starting virtual address
+ * @param phys Page-aligned starting physical address. Ignored if the mask
+ *             parameter does not enable address bits or OPTION_RESET used.
+ *             This region is not directly examined, it will simply be
+ *             programmed into the page tables.
+ * @param size Size of the physical region to map
+ * @param entry_flags Desired state of non-address PTE bits covered by mask,
+ *                    ignored if OPTION_RESET
+ * @param mask What bits in the PTE to actually modifiy; unset bits will
+ *             be preserved. Ignored if OPTION_RESET.
+ * @param options Control options. Do not set OPTION_USER here. OPTION_FLUSH
+ *                will trigger a TLB shootdown after all tables are updated.
+ * @retval 0 Success
+ * @retval -ENOMEM page table allocation required, but no free pages
+ */
+static int range_map(void *virt, uintptr_t phys, size_t size,
+		     pentry_t entry_flags, pentry_t mask, uint32_t options)
+{
+	k_spinlock_key_t key;
+	int ret = 0;
+
+	LOG_DBG("%s: %p -> %p (%zu) flags " PRI_ENTRY " mask "
+		PRI_ENTRY " opt 0x%x", __func__, (void *)phys, virt, size,
+		entry_flags, mask, options);
+
+#ifdef CONFIG_X86_64
+	/* There's a gap in the "64-bit" address space, as 4-level paging
+	 * requires bits 48 to 63 to be copies of bit 47. Test this
+	 * by treating as a signed value and shifting.
+	 */
+	__ASSERT(((((intptr_t)virt) << 16) >> 16) == (intptr_t)virt,
+		 "non-canonical virtual address mapping %p (size %zu)",
+		 virt, size);
+#endif /* CONFIG_X86_64 */
+
+	__ASSERT((options & OPTION_USER) == 0U, "invalid option for function");
+
+	/* All virtual-to-physical mappings are the same in all page tables.
+	 * What can differ is only access permissions, defined by the memory
+	 * domain associated with the page tables, and the threads that are
+	 * members of that domain.
+	 *
+	 * Any new mappings need to be applied to all page tables.
+	 */
+	key = k_spin_lock(&x86_mmu_lock);
+#ifdef CONFIG_USERSPACE
+	sys_snode_t *node;
+
+	SYS_SLIST_FOR_EACH_NODE(&x86_domain_list, node) {
+		struct arch_mem_domain *domain =
+			CONTAINER_OF(node, struct arch_mem_domain, node);
+
+		ret = range_map_ptables(domain->ptables, virt, phys, size,
+					entry_flags, mask,
+					options | OPTION_USER);
+		if (ret != 0) {
+			/* NOTE: Currently we do not un-map a partially
+			 * completed mapping.
+			 */
+			goto out_unlock;
+		}
+	}
+#endif /* CONFIG_USERSPACE */
+	ret = range_map_ptables(z_x86_kernel_ptables, virt, phys, size,
+				entry_flags, mask, options);
+#ifdef CONFIG_USERSPACE
+out_unlock:
+#endif /* CONFIG_USERSPACE */
+	k_spin_unlock(&x86_mmu_lock, key);
+
+#ifdef CONFIG_SMP
+	if ((options & OPTION_FLUSH) != 0U) {
+		tlb_shootdown();
+	}
+#endif /* CONFIG_SMP */
+	return ret;
+}
+
 static pentry_t flags_to_entry(uint32_t flags)
 {
 	pentry_t entry_flags = MMU_P;
@@ -671,6 +965,10 @@ static pentry_t flags_to_entry(uint32_t flags)
 		entry_flags |= ENTRY_RW;
 	}
 
+	if ((flags & K_MEM_PERM_USER) != 0U) {
+		entry_flags |= ENTRY_US;
+	}
+
 	if ((flags & K_MEM_PERM_EXEC) == 0U) {
 		entry_flags |= ENTRY_XD;
 	}
@@ -678,107 +976,30 @@ static pentry_t flags_to_entry(uint32_t flags)
 	return entry_flags;
 }
 
-/* map region virt..virt+size to phys with provided arch-neutral flags */
+/* map new region virt..virt+size to phys with provided arch-neutral flags */
 int arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
 {
-	pentry_t entry_flags;
-	pentry_t *ptables;
-
-	LOG_DBG("%s: %p -> %p (%zu) flags 0x%x",
-		__func__, (void *)phys, virt, size, flags);
-
-#ifdef CONFIG_X86_64
-	/* There's a gap in the "64-bit" address space, as 4-level paging
-	 * requires bits 48 to 63 to be copies of bit 47. Test this
-	 * by treating as a signed value and shifting.
-	 */
-	__ASSERT(((((intptr_t)virt) << 16) >> 16) == (intptr_t)virt,
-		 "non-canonical virtual address mapping %p (size %zu)",
-		 virt, size);
-#endif /* CONFIG_X86_64 */
-
-	/* For now, always map in the kernel's page tables, we're just using
-	 * this for driver mappings. User mode mappings
-	 * (and interactions with KPTI) not implemented yet.
-	 */
-	ptables = z_x86_kernel_ptables;
-
-	if ((flags & K_MEM_PERM_USER) != 0U) {
-		/* TODO: user mode support
-		 * entry_flags |= MMU_US;
-		 */
-		return -ENOTSUP;
-	}
-
-	entry_flags = flags_to_entry(flags);
-
-	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
-		int ret;
-		pentry_t entry_val = (phys + offset) | entry_flags;
-		uint8_t *dest_virt = (uint8_t *)virt + offset;
-
-		ret = page_map_set(ptables, dest_virt, entry_val,
-				   page_pool_get, NULL);
-
-		/* Currently used for new mappings, no TLB flush. Re-visit
-		 * as capabilities increase
-		 */
-
-		if (ret != 0) {
-			/* NOTE: Currently we do not un-map a partially
-			 * completed mapping.
-			 */
-			return ret;
-		}
-	}
-
-	return 0;
+	return range_map(virt, phys, size, flags_to_entry(flags), MASK_ALL,
+			 OPTION_ALLOC);
 }
 
 #if CONFIG_X86_STACK_PROTECTION
-/* Legacy stack guard function. This will eventually be replaced in favor
- * of memory-mapping stacks (with a non-present mapping immediately below each
- * one to catch overflows) instead of using in-place
- */
-static void stack_guard_set(void *guard_page)
-{
-	pentry_t pte = ((uintptr_t)guard_page) | MMU_P | ENTRY_XD;
-	int ret;
-
-	assert_virt_addr_aligned(guard_page);
-
-	/* Always modify the kernel's page tables since this is for
-	 * supervisor threads or handling syscalls
-	 */
-	ret = page_map_set(z_x86_kernel_ptables, guard_page, pte,
-			   page_pool_get, NULL);
-	/* Literally should never happen */
-	__ASSERT(ret == 0, "stack guard mapping failed for %p", guard_page);
-	(void)ret;
-}
-
 void z_x86_set_stack_guard(k_thread_stack_t *stack)
 {
-#ifdef CONFIG_USERSPACE
-	if (z_stack_is_user_capable(stack)) {
-		struct z_x86_thread_stack_header *header =
-			(struct z_x86_thread_stack_header *)stack;
-
-		stack_guard_set(&header->guard_page);
-	} else
-#endif /* CONFIG_USERSPACE */
-	{
-		stack_guard_set(stack);
-	}
+	/* Applied to all page tables as this affects supervisor mode.
+	 * XXX: This never gets reset when the thread exits, which can
+	 * cause problems if the memory is later used for something else.
+	 * See #29499
+	 *
+	 * Guard page is always the first page of the stack object for both
+	 * kernel and thread stacks.
+	 */
+	(void)range_map(stack, 0, CONFIG_MMU_PAGE_SIZE, MMU_P | ENTRY_XD,
+			MASK_PERM, OPTION_FLUSH);
 }
 #endif /* CONFIG_X86_STACK_PROTECTION */
 
 #ifdef CONFIG_USERSPACE
-/*
- * All of the code below will eventually be removed/replaced with a virtual
- * address space aware userspace that doesn't do a physical memory map with
- * memory domains.
- */
 static bool page_validate(pentry_t *ptables, uint8_t *addr, bool write)
 {
 	pentry_t *table = (pentry_t *)ptables;
@@ -838,392 +1059,301 @@ int arch_buffer_validate(void *addr, size_t size, int write)
 	return ret;
 }
 
-/* Fetch pages for per-thread page tables from reserved space within the
- * thread stack object
+/**
+*  Duplicate an entire set of page tables
  *
- * For the moment, re-use pool_lock for synchronization
+ * Uses recursion, but depth at any given moment is limited by the number of
+ * paging levels.
+ *
+ * x86_mmu_lock must be held.
+ *
+ * @param dst a zeroed out chunk of memory of sufficient size for the indicated
+ *            paging level.
+ * @param src some paging structure from within the source page tables to copy
+ *            at the indicated paging level
+ * @param level Current paging level
+ * @retval 0 Success
+ * @retval -ENOMEM Insufficient page pool memory
  */
-static void *thread_page_pool_get(void *context)
+static int copy_page_table(pentry_t *dst, pentry_t *src, int level)
 {
-	struct k_thread *thread = context;
-	uint8_t *stack_object = (uint8_t *)thread->stack_obj;
-	void *ret;
-	k_spinlock_key_t key;
-
-	key = k_spin_lock(&pool_lock);
-	ret = thread->arch.mmu_pos;
-
-	if (thread->arch.mmu_pos >= stack_object + Z_X86_THREAD_PT_AREA) {
-		ret = NULL;
+	if (level == (NUM_LEVELS - 1)) {
+		/* Base case: leaf page table */
+		for (int i = 0; i < get_num_entries(level); i++) {
+			set_leaf_entry(&dst[i], reset_pte(src[i]), true);
+		}
 	} else {
-		thread->arch.mmu_pos += CONFIG_MMU_PAGE_SIZE;
-		memset(ret, 0, CONFIG_MMU_PAGE_SIZE);
-	}
-	k_spin_unlock(&pool_lock, key);
+		/* Recursive case: allocate sub-structures as needed and
+		 * make recursive calls on them
+		 */
+		for (int i = 0; i < get_num_entries(level); i++) {
+			pentry_t *child_dst;
+			int ret;
 
-	return ret;
-}
+			if ((src[i] & MMU_P) == 0) {
+				/* Non-present, skip */
+				continue;
+			}
 
-#define RAM_BASE	((uintptr_t)CONFIG_SRAM_BASE_ADDRESS)
-#define RAM_END		(RAM_BASE + (CONFIG_SRAM_SIZE * 1024UL))
+			__ASSERT((src[i] & MMU_PS) == 0,
+				 "large page encountered");
 
-/* Establish a mapping in the thread's page tables */
-static void thread_map(struct k_thread *thread, void *ptr, size_t size,
-		       pentry_t flags, bool flush)
-{
-	pentry_t *ptables = z_x86_thread_page_tables_get(thread);
+			child_dst = page_pool_get();
+			if (child_dst == NULL) {
+				return -ENOMEM;
+			}
 
-	assert_region_page_aligned(ptr, size);
+			/* Page table links are by physical address. RAM
+			 * for page tables is identity-mapped, but double-
+			 * cast needed for PAE case where sizeof(void *) and
+			 * sizeof(pentry_t) are not the same.
+			 */
+			dst[i] = ((pentry_t)(uintptr_t)child_dst) | INT_FLAGS;
 
-	/* Only mapping system RAM addresses is supported in thread page tables,
-	 * as the thread does not have its own copies of tables outside of it
-	 */
-	__ASSERT((uintptr_t)ptr >= RAM_BASE,
-		 "%p below system RAM", ptr);
-	__ASSERT((uintptr_t)ptr < RAM_END,
-		 "%p above system ram", ptr);
-
-	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
-		pentry_t pte;
-		uint8_t *pos;
-		int ret;
-
-		pos = (uint8_t *)ptr + offset;
-
-		if ((flags & MMU_P) == 0U) {
-			/* L1TF */
-			pte = 0U;
-		} else {
-			pte = ((uintptr_t)pos) | flags;
+			ret = copy_page_table(child_dst,
+					      next_table(src[i], level),
+					      level + 1);
+			if (ret != 0) {
+				return ret;
+			}
 		}
-
-		ret = page_map_set(ptables, pos, pte, thread_page_pool_get,
-				   thread);
-		__ASSERT(ret == 0, "mapping failed for %p", pos);
-		(void)ret;
-
-		if (flush) {
-			tlb_flush_page(pos);
-		}
-	}
-}
-
-/* Get the kernel's PTE value for a particular virtual address */
-static pentry_t kernel_page_map_get(void *virt)
-{
-	pentry_t *table = z_x86_kernel_ptables;
-
-	for (int level = 0; level < NUM_LEVELS; level++) {
-		pentry_t entry = get_entry(table, virt, level);
-
-		if ((entry & MMU_P) == 0U) {
-			break;
-		}
-
-		if (is_leaf(level, entry)) {
-			__ASSERT((entry & MMU_PS) == 0, "bigpage found");
-			return entry;
-		}
-
-		table = next_table(entry, level);
 	}
 
 	return 0;
 }
 
-/* In thread page tables, set mapping for a particular address to whatever
- * mapping is set up for that address in the kernel's page tables.
- */
-static void page_reset(struct k_thread *thread, void *virt)
+static void region_map_update(pentry_t *ptables, void *start,
+			      size_t size, pentry_t flags, bool reset)
 {
-	pentry_t kern_pte = kernel_page_map_get(virt);
-	pentry_t *thread_ptables = z_x86_thread_page_tables_get(thread);
-	int ret;
+	uint32_t options = OPTION_USER;
+	k_spinlock_key_t key;
 
-#ifdef CONFIG_X86_KPTI
-	/* Shared kernel page needs to be mapped in page tables as it contains
-	 * trampoline stack and important data structures. Otherwise, non-User
-	 * pages aren't present.
-	 */
-	if ((kern_pte & MMU_US) == 0U && virt != &z_shared_kernel_page_start) {
-		kern_pte = 0;
+	if (reset) {
+		options |= OPTION_RESET;
 	}
-#endif /* CONFIG_X86_KPTI */
-
-	ret = page_map_set(thread_ptables, virt,
-			   kern_pte, thread_page_pool_get, thread);
-	__ASSERT(ret == 0, "mapping failed for %p", virt);
-	(void)ret;
-}
-
-#ifdef CONFIG_X86_KPTI
-/* KPTI version. The thread-level page tables are ONLY used by user mode
- * and very briefly when changing privileges.
- *
- * We leave any memory addresses outside of system RAM unmapped.
- * Any addresses within system RAM are also unmapped unless they have the US
- * bit set, or are the trampoline page.
- */
-static void setup_thread_tables(struct k_thread *thread,
-				pentry_t *thread_ptables)
-{
-	ARG_UNUSED(thread_ptables);
-
-	for (uint8_t *pos = (uint8_t *)RAM_BASE; pos < (uint8_t *)RAM_END;
-	     pos += CONFIG_MMU_PAGE_SIZE) {
-		page_reset(thread, pos);
-	}
-}
-#else
-/* get the Nth level paging structure for a particular virtual address */
-static pentry_t *page_table_get(pentry_t *toplevel, void *virt, int level)
-{
-	pentry_t *table = toplevel;
-
-	__ASSERT(level < NUM_LEVELS, "bad level argument %d", level);
-
-	for (int i = 0; i < level; i++) {
-		pentry_t entry = get_entry(table, virt, level);
-
-		if ((entry & MMU_P) == 0U) {
-			return NULL;
-		}
-		__ASSERT((entry & MMU_PS) == 0, "bigpage found");
-		table = next_table(entry, i);
+	if (ptables == z_x86_page_tables_get()) {
+		options |= OPTION_FLUSH;
 	}
 
-	return table;
-}
+	key = k_spin_lock(&x86_mmu_lock);
+	(void)range_map_ptables(ptables, start, 0, size, flags, MASK_PERM,
+				options);
+	k_spin_unlock(&x86_mmu_lock, key);
 
-/* Get a pointer to the N-th level entry for a particular virtual address */
-static pentry_t *page_entry_ptr_get(pentry_t *toplevel, void *virt, int level)
-{
-	pentry_t *table = page_table_get(toplevel, virt, level);
-
-	__ASSERT(table != NULL, "no table mapping for %p at level %d",
-		 virt, level);
-	return get_entry_ptr(table, virt, level);
-}
-
- /* Non-KPTI version. The thread-level page tables are used even during
-  * interrupts, exceptions, and syscalls, so we need all mappings.
-  * Copies will be made of all tables that provide mappings for system RAM,
-  * otherwise the kernel table will just be linked instead.
-  */
-static void setup_thread_tables(struct k_thread *thread,
-				pentry_t *thread_ptables)
-{
-	/* Copy top-level structure verbatim */
-	(void)memcpy(thread_ptables, &z_x86_kernel_ptables[0], table_size(0));
-
-	/* Proceed through linked structure levels, and for all system RAM
-	 * virtual addresses, create copies of all relevant tables.
-	 */
-	for (int level = 1; level < NUM_LEVELS; level++) {
-		uint8_t *start, *end;
-		size_t increment;
-
-		increment = get_entry_scope(level);
-		start = (uint8_t *)ROUND_DOWN(RAM_BASE, increment);
-		end = (uint8_t *)ROUND_UP(RAM_END, increment);
-
-		for (uint8_t *virt = start; virt < end; virt += increment) {
-			pentry_t *link, *master_table, *user_table;
-
-			/* We're creating a new thread page table, so get the
-			 * pointer to the entry in the previous table to have
-			 * it point to the new location
-			 */
-			link = page_entry_ptr_get(thread_ptables, virt,
-						  level - 1);
-
-			/* Master table contents, which we make a copy of */
-			master_table = page_table_get(z_x86_kernel_ptables,
-						      virt, level);
-
-			/* Pulled out of reserved memory in the stack object */
-			user_table = thread_page_pool_get(thread);
-			__ASSERT(user_table != NULL, "out of memory")
-
-			(void)memcpy(user_table, master_table,
-				     table_size(level));
-
-			*link = ((pentry_t)user_table) | INT_FLAGS;
-		}
-	}
-}
-#endif /* CONFIG_X86_KPTI */
-
-/* Called on creation of a user thread or when a supervisor thread drops to
- * user mode.
- *
- * Sets up the per-thread page tables, such that when they are activated on
- * context switch, everything is rseady to go. thread->arch.ptables is updated
- * to the thread-level tables instead of the kernel's page tbales.
- *
- * Memory for the per-thread page table structures is drawn from the stack
- * object, a buffer of size Z_X86_THREAD_PT_AREA starting from the beginning
- * of the stack object.
- */
-void z_x86_thread_pt_init(struct k_thread *thread)
-{
-	pentry_t *ptables;
-
-	/* thread_page_pool_get() memory starts at the beginning of the
-	 * stack object
-	 */
-	assert_virt_addr_aligned(thread->stack_obj);
-	thread->arch.mmu_pos = (uint8_t *)thread->stack_obj;
-
-	/* Get memory for the top-level structure */
-#ifndef CONFIG_X86_PAE
-	ptables = thread_page_pool_get(thread);
-	__ASSERT(ptables != NULL, "out of memory");
-#else
-	struct z_x86_thread_stack_header *header =
-		(struct z_x86_thread_stack_header *)thread->stack_obj;
-
-	ptables = (pentry_t *)&header->kernel_data.ptables;
+#ifdef CONFIG_SMP
+	tlb_shootdown();
 #endif
-	thread->arch.ptables = ((uintptr_t)ptables);
-
-	setup_thread_tables(thread, ptables);
-
-	/* Enable access to the thread's own stack buffer */
-	thread_map(thread, (void *)thread->stack_info.start,
-		   ROUND_UP(thread->stack_info.size,
-			    CONFIG_MMU_PAGE_SIZE),
-		   MMU_P | MMU_RW | MMU_US | MMU_XD, false);
 }
 
-static inline void apply_mem_partition(struct k_thread *thread,
-				       struct k_mem_partition *partition)
+static inline void reset_region(pentry_t *ptables, void *start, size_t size)
 {
-	thread_map(thread, (void *)partition->start, partition->size,
-		   partition->attr | MMU_P, false);
+	LOG_DBG("%s(%p, %p, %zu)", __func__, ptables, start, size);
+	region_map_update(ptables, start, size, 0, true);
 }
 
-static void reset_mem_partition(struct k_thread *thread,
-				struct k_mem_partition *partition)
+static inline void apply_region(pentry_t *ptables, void *start,
+				size_t size, pentry_t attr)
 {
-	uint8_t *addr = (uint8_t *)partition->start;
-	size_t size = partition->size;
-
-	assert_region_page_aligned(addr, size);
-	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
-		page_reset(thread, addr + offset);
-	}
+	LOG_DBG("%s(%p, %p, %zu, " PRI_ENTRY ")", __func__, ptables, start,
+		size, attr);
+	region_map_update(ptables, start, size, attr, false);
 }
 
-void z_x86_apply_mem_domain(struct k_thread *thread,
-			    struct k_mem_domain *mem_domain)
+static void set_stack_perms(struct k_thread *thread, pentry_t *ptables)
 {
-	for (int i = 0, pcount = 0; pcount < mem_domain->num_partitions; i++) {
-		struct k_mem_partition *partition;
-
-		partition = &mem_domain->partitions[i];
-		if (partition->size == 0) {
-			continue;
-		}
-		pcount++;
-
-		apply_mem_partition(thread, partition);
-	}
+	LOG_DBG("update stack for thread %p's ptables at %p: %p (size %zu)",
+		thread, ptables, (void *)thread->stack_info.start,
+		thread->stack_info.size);
+	apply_region(ptables, (void *)thread->stack_info.start,
+		     thread->stack_info.size,
+		     MMU_P | MMU_XD | MMU_RW | MMU_US);
 }
 
 /*
- * Arch interface implementations for memory domains
- *
- * In all cases, if one of these arch_mem_domain_* APIs is called on a
- * supervisor thread, we don't need to do anything. If the thread later drops
- * into user mode the per-thread page tables will be generated and the memory
- * domain configuration applied.
+ * Arch interface implementations for memory domains and userspace
  */
+
+int arch_mem_domain_init(struct k_mem_domain *domain)
+{
+	int ret;
+	k_spinlock_key_t key  = k_spin_lock(&x86_mmu_lock);
+
+	LOG_DBG("%s(%p)", __func__, domain);
+#if __ASSERT_ON
+	sys_snode_t *node;
+
+	/* Assert that we have not already initialized this domain */
+	SYS_SLIST_FOR_EACH_NODE(&x86_domain_list, node) {
+		struct arch_mem_domain *list_domain =
+			CONTAINER_OF(node, struct arch_mem_domain, node);
+
+		__ASSERT(list_domain != &domain->arch,
+			 "%s(%p) called multiple times", __func__, domain);
+	}
+#endif /* __ASSERT_ON */
+#ifndef CONFIG_X86_KPTI
+	/* If we're not using KPTI then we can use the build time page tables
+	 * (which are mutable) as the set of page tables for the default
+	 * memory domain, saving us some memory.
+	 *
+	 * We skip adding this domain to x86_domain_list since we already
+	 * update z_x86_kernel_ptables directly in range_map().
+	 */
+	if (domain == &k_mem_domain_default) {
+		domain->arch.ptables = z_x86_kernel_ptables;
+		k_spin_unlock(&x86_mmu_lock, key);
+		return 0;
+	}
+#endif /* CONFIG_X86_KPTI */
+#ifdef CONFIG_X86_PAE
+	/* PDPT is stored within the memory domain itself since it is
+	 * much smaller than a full page
+	 */
+	(void)memset(domain->arch.pdpt, 0, sizeof(domain->arch.pdpt));
+	domain->arch.ptables = domain->arch.pdpt;
+#else
+	/* Allocate a page-sized top-level structure, either a PD or PML4 */
+	domain->arch.ptables = page_pool_get();
+	if (domain->arch.ptables == NULL) {
+		k_spin_unlock(&x86_mmu_lock, key);
+		return -ENOMEM;
+	}
+#endif /* CONFIG_X86_PAE */
+
+	LOG_DBG("copy_page_table(%p, %p, 0)", domain->arch.ptables,
+		z_x86_kernel_ptables);
+
+	/* Make a copy of the boot page tables created by gen_mmu.py */
+	ret = copy_page_table(domain->arch.ptables, z_x86_kernel_ptables, 0);
+	if (ret == 0) {
+		sys_slist_append(&x86_domain_list, &domain->arch.node);
+	}
+	k_spin_unlock(&x86_mmu_lock, key);
+
+	return ret;
+}
+
 void arch_mem_domain_partition_remove(struct k_mem_domain *domain,
 				      uint32_t partition_id)
 {
-	sys_dnode_t *node, *next_node;
+	struct k_mem_partition *partition = &domain->partitions[partition_id];
 
-	/* Removing a partition. Need to reset the relevant memory range
-	 * to the defaults in USER_PDPT for each thread.
-	 */
-	SYS_DLIST_FOR_EACH_NODE_SAFE(&domain->mem_domain_q, node, next_node) {
-		struct k_thread *thread =
-			CONTAINER_OF(node, struct k_thread, mem_domain_info);
-
-		if ((thread->base.user_options & K_USER) == 0) {
-			continue;
-		}
-
-		reset_mem_partition(thread, &domain->partitions[partition_id]);
-	}
+	/* Reset the partition's region back to defaults */
+	reset_region(domain->arch.ptables, (void *)partition->start,
+		     partition->size);
 }
 
 void arch_mem_domain_destroy(struct k_mem_domain *domain)
 {
-	for (int i = 0, pcount = 0; pcount < domain->num_partitions; i++) {
-		struct k_mem_partition *partition;
-
-		partition = &domain->partitions[i];
-		if (partition->size == 0) {
-			continue;
-		}
-		pcount++;
-
-		arch_mem_domain_partition_remove(domain, i);
-	}
+	/* No-op, this is eventually getting removed in 2.5 */
 }
 
+/* Called on thread exit or when moving it to a different memory domain */
 void arch_mem_domain_thread_remove(struct k_thread *thread)
 {
 	struct k_mem_domain *domain = thread->mem_domain_info.mem_domain;
 
-	/* Non-user threads don't have per-thread page tables set up */
 	if ((thread->base.user_options & K_USER) == 0) {
 		return;
 	}
 
-	for (int i = 0, pcount = 0; pcount < domain->num_partitions; i++) {
-		struct k_mem_partition *partition;
-
-		partition = &domain->partitions[i];
-		if (partition->size == 0) {
-			continue;
-		}
-		pcount++;
-
-		reset_mem_partition(thread, partition);
+	if ((thread->base.thread_state & _THREAD_DEAD) == 0) {
+		/* Thread is migrating to another memory domain and not
+		 * exiting for good; we weren't called from
+		 * z_thread_single_abort().  Resetting the stack region will
+		 * take place in the forthcoming thread_add() call.
+		 */
+		return;
 	}
+
+	/* Restore permissions on the thread's stack area since it is no
+	 * longer a member of the domain.
+	 */
+	reset_region(domain->arch.ptables, (void *)thread->stack_info.start,
+		     thread->stack_info.size);
 }
 
 void arch_mem_domain_partition_add(struct k_mem_domain *domain,
 				   uint32_t partition_id)
 {
-	sys_dnode_t *node, *next_node;
+	struct k_mem_partition *partition = &domain->partitions[partition_id];
 
-	SYS_DLIST_FOR_EACH_NODE_SAFE(&domain->mem_domain_q, node, next_node) {
-		struct k_thread *thread =
-			CONTAINER_OF(node, struct k_thread, mem_domain_info);
-
-		if ((thread->base.user_options & K_USER) == 0) {
-			continue;
-		}
-
-		apply_mem_partition(thread, &domain->partitions[partition_id]);
-	}
+	/* Update the page tables with the partition info */
+	apply_region(domain->arch.ptables, (void *)partition->start,
+		     partition->size, partition->attr | MMU_P);
 }
 
+/* Invoked from memory domain API calls, as well as during thread creation */
 void arch_mem_domain_thread_add(struct k_thread *thread)
 {
-	if ((thread->base.user_options & K_USER) == 0) {
-		return;
+	/* New memory domain we are being added to */
+	struct k_mem_domain *domain = thread->mem_domain_info.mem_domain;
+	/* This is only set for threads that were migrating from some other
+	 * memory domain; new threads this is NULL
+	 */
+	pentry_t *old_ptables = (pentry_t *)thread->arch.ptables;
+	bool is_user = (thread->base.user_options & K_USER) != 0;
+	bool is_migration = (old_ptables != NULL) && is_user;
+
+	/* Allow US access to the thread's stack in its new domain if
+	 * we are migrating. If we are not migrating this is done in
+	 * z_x86_current_stack_perms()
+	 */
+	if (is_migration) {
+		set_stack_perms(thread, domain->arch.ptables);
 	}
 
-	z_x86_apply_mem_domain(thread, thread->mem_domain_info.mem_domain);
+	thread->arch.ptables = (uintptr_t)domain->arch.ptables;
+	LOG_DBG("set thread %p page tables to %p", thread,
+		(void *)thread->arch.ptables);
+
+	/* Check if we're doing a migration from a different memory domain
+	 * and have to remove permissions from its old domain.
+	 *
+	 * XXX: The checks we have to do here and in
+	 * arch_mem_domain_thread_remove() are clumsy, it may be worth looking
+	 * into adding a specific arch_mem_domain_thread_migrate() API.
+	 * See #29601
+	 */
+	if (is_migration) {
+		reset_region(old_ptables, (void *)thread->stack_info.start,
+			     thread->stack_info.size);
+	}
+
+#if !defined(CONFIG_X86_KPTI)
+	/* Need to switch to using these new page tables, in case we drop
+	 * to user mode before we are ever context switched out.
+	 * IPI takes care of this if the thread is currently running on some
+	 * other CPU.
+	 */
+	if (thread == _current && thread->arch.ptables != z_x86_cr3_get()) {
+		z_x86_cr3_set(thread->arch.ptables);
+	}
+#endif /* CONFIG_X86_KPTI */
 }
 
 int arch_mem_domain_max_partitions_get(void)
 {
 	return CONFIG_MAX_DOMAIN_PARTITIONS;
 }
+
+/* Invoked from z_x86_userspace_enter */
+void z_x86_current_stack_perms(void)
+{
+	/* Clear any previous context in the stack buffer to prevent
+	 * unintentional data leakage.
+	 */
+	(void)memset((void *)_current->stack_info.start, 0xAA,
+		     _current->stack_info.size - _current->stack_info.delta);
+
+	/* Only now is it safe to grant access to the stack buffer since any
+	 * previous context has been erased.
+	 */
+
+	/* Memory domain access is already programmed into the page tables.
+	 * Need to enable access to this new user thread's stack buffer in
+	 * its domain-specific page tables.
+	 */
+	set_stack_perms(_current, z_x86_thread_page_tables_get(_current));
+}
 #endif /* CONFIG_USERSPACE */
diff --git a/arch/x86/include/x86_mmu.h b/arch/x86/include/x86_mmu.h
index 720b5b5a822..84b51e87d57 100644
--- a/arch/x86/include/x86_mmu.h
+++ b/arch/x86/include/x86_mmu.h
@@ -40,7 +40,8 @@
 #define MMU_PCD		BITL(4)		/** Page Cache Disable */
 #define MMU_A		BITL(5)		/** Accessed */
 #define MMU_D		BITL(6)		/** Dirty */
-#define MMU_PS		BITL(7)		/** Page Size */
+#define MMU_PS		BITL(7)		/** Page Size (non PTE)*/
+#define MMU_PAT		BITL(7)		/** Page Attribute (PTE) */
 #define MMU_G		BITL(8)		/** Global */
 #ifdef XD_SUPPORTED
 #define MMU_XD		BITL(63)	/** Execute Disable */
@@ -122,18 +123,14 @@ void z_x86_set_stack_guard(k_thread_stack_t *stack);
  */
 extern uint8_t z_shared_kernel_page_start;
 #endif /* CONFIG_X86_KPTI */
-
-/* Set up per-thread page tables just prior to entering user mode */
-void z_x86_thread_pt_init(struct k_thread *thread);
-
-/* Apply a memory domain policy to a set of thread page tables.
- *
- * Must be called with z_mem_domain_lock held.
- */
-void z_x86_apply_mem_domain(struct k_thread *thread,
-			    struct k_mem_domain *mem_domain);
 #endif /* CONFIG_USERSPACE */
 
+#ifdef CONFIG_X86_PAE
+#define PTABLES_ALIGN	0x1fU
+#else
+#define PTABLES_ALIGN	0xfffU
+#endif
+
 /* Set CR3 to a physical address. There must be a valid top-level paging
  * structure here or the CPU will triple fault. The incoming page tables must
  * have the same kernel mappings wrt supervisor mode. Don't use this function
@@ -141,6 +138,7 @@ void z_x86_apply_mem_domain(struct k_thread *thread,
  */
 static inline void z_x86_cr3_set(uintptr_t phys)
 {
+	__ASSERT((phys & PTABLES_ALIGN) == 0U, "unaligned page tables");
 #ifdef CONFIG_X86_64
 	__asm__ volatile("movq %0, %%cr3\n\t" : : "r" (phys) : "memory");
 #else
diff --git a/include/arch/x86/ia32/thread.h b/include/arch/x86/ia32/thread.h
index 9f69db286c2..6b4e00ea87b 100644
--- a/include/arch/x86/ia32/thread.h
+++ b/include/arch/x86/ia32/thread.h
@@ -215,17 +215,9 @@ struct _thread_arch {
 	uint8_t flags;
 
 #ifdef CONFIG_USERSPACE
-	/* Physical address of the page tables used by this thread. Supervisor
-	 * threads always use the kernel's page table, user thread use
-	 * per-thread tables stored in the stack object.
-	 */
+	/* Physical address of the page tables used by this thread */
 	uintptr_t ptables;
 
-	/* Track available unused space in the stack object used for building
-	 * thread-specific page tables.
-	 */
-	uint8_t *mmu_pos;
-
 	/* Initial privilege mode stack pointer when doing a system call.
 	 * Un-set for supervisor threads.
 	 */
diff --git a/include/arch/x86/intel64/thread.h b/include/arch/x86/intel64/thread.h
index 3263b3612d8..ad99adc9e58 100644
--- a/include/arch/x86/intel64/thread.h
+++ b/include/arch/x86/intel64/thread.h
@@ -115,17 +115,9 @@ struct _thread_arch {
 	uint8_t flags;
 
 #ifdef CONFIG_USERSPACE
-	/* Physical address to page tables used by this thread. Supervisor
-	 * threads always use the kernel's page table, user thread use
-	 * per-thread tables stored in the stack object
-	 */
+	/* Physical address of the page tables used by this thread */
 	uintptr_t ptables;
 
-	/* Track available unused space in the stack object used for building
-	 * thread-specific page tables.
-	 */
-	uint8_t *mmu_pos;
-
 	/* Initial privilege mode stack pointer when doing a system call.
 	 * Un-set for supervisor threads.
 	 */
diff --git a/include/arch/x86/mmustructs.h b/include/arch/x86/mmustructs.h
index e237aa32823..2c10f25ce8a 100644
--- a/include/arch/x86/mmustructs.h
+++ b/include/arch/x86/mmustructs.h
@@ -148,6 +148,8 @@
 					 Z_X86_MMU_XD)
 
 #ifndef _ASMLANGUAGE
+#include <sys/slist.h>
+
 /* Page table entry data type at all levels. Defined here due to
  * k_mem_partition_attr_t, eventually move to private x86_mmu.h
  */
@@ -157,5 +159,21 @@ typedef uint64_t pentry_t;
 typedef uint32_t pentry_t;
 #endif
 typedef pentry_t k_mem_partition_attr_t;
+
+struct arch_mem_domain {
+#ifdef CONFIG_X86_PAE
+	/* 4-entry, 32-byte top-level PDPT */
+	pentry_t pdpt[4];
+#endif
+	/* Pointer to top-level structure, either a PML4, PDPT, PD */
+	pentry_t *ptables;
+
+	/* Linked list of all active memory domains */
+	sys_snode_t node;
+#ifdef CONFIG_X86_PAE
+} __aligned(32);
+#else
+};
+#endif /* CONFIG_X86_PAE */
 #endif /* _ASMLANGUAGE */
 #endif /* ZEPHYR_INCLUDE_ARCH_X86_MMU_H */