kernel: mmu: make demand paging work on SMP

This is the minimum for demand paging to work on SMP systems. Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
2024-07-09 18:04:31 -04:00 · 2024-07-09 18:04:31 -04:00 · 6b3fff3a2f
commit 6b3fff3a2f
parent a206ab0ac3
1 changed files with 85 additions and 44 deletions
--- a/kernel/mmu.c
+++ b/kernel/mmu.c
@ -662,15 +662,16 @@ void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_
 		arch_mem_map(dst, phys, size, flags);
 	}
-	if (!uninit) {
+out:
 	k_spin_unlock(&z_mm_lock, key);
 	if (dst != NULL && !uninit) {
 		/* If we later implement mappings to a copy-on-write
 		 * zero page, won't need this step
 		 */
 		memset(dst, 0, size);
 	}
 out:
 	k_spin_unlock(&z_mm_lock, key);
 	return dst;
 }
@ -1236,14 +1237,18 @@ static inline void do_backing_store_page_out(uintptr_t location)
 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
 }
-/* Current implementation relies on interrupt locking to any prevent page table
+#if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ)
- * access, which falls over if other CPUs are active. Addressing this is not
+/*
- * as simple as using spinlocks as regular memory reads/writes constitute
+ * SMP support is very simple. Some resources such as the scratch page could
- * "access" in this sense.
+ * be made per CPU, backing store driver execution be confined to the faulting
- *
+ * CPU, statistics be made to cope with access concurrency, etc. But in the
- * Current needs for demand paging are on uniprocessor systems.
+ * end we're dealing with memory transfer to/from some external storage which
 * is inherently slow and whose access is most likely serialized anyway.
 * So let's simply enforce global demand paging serialization across all CPUs
 * with a mutex as there is no real gain from added parallelism here.
 */
-BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
+static K_MUTEX_DEFINE(z_mm_paging_lock);
 #endif
 static void virt_region_foreach(void *addr, size_t size,
 				void (*func)(void *))
@ -1333,16 +1338,21 @@ static int do_mem_evict(void *addr)
 	bool dirty;
 	struct k_mem_page_frame *pf;
 	uintptr_t location;
-	int key, ret;
+	k_spinlock_key_t key;
 	uintptr_t flags, phys;
 	int ret;
 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
 	__ASSERT(!k_is_in_isr(),
 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
 		 __func__);
 #ifdef CONFIG_SMP
 	k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
 #else
 	k_sched_lock();
 #endif
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
-	key = irq_lock();
+	key = k_spin_lock(&z_mm_lock);
 	flags = arch_page_info_get(addr, &phys, false);
 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
 		 "address %p isn't mapped", addr);
@ -1362,19 +1372,23 @@ static int do_mem_evict(void *addr)
 	__ASSERT(ret == 0, "failed to prepare page frame");
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
-	irq_unlock(key);
+	k_spin_unlock(&z_mm_lock, key);
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	if (dirty) {
 		do_backing_store_page_out(location);
 	}
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
-	key = irq_lock();
+	key = k_spin_lock(&z_mm_lock);
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	page_frame_free_locked(pf);
 out:
-	irq_unlock(key);
+	k_spin_unlock(&z_mm_lock, key);
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 #ifdef CONFIG_SMP
 	k_mutex_unlock(&z_mm_paging_lock);
 #else
 	k_sched_unlock();
 #endif
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	return ret;
 }
@ -1400,11 +1414,12 @@ int k_mem_page_out(void *addr, size_t size)
 int k_mem_page_frame_evict(uintptr_t phys)
 {
-	int key, ret;
+	k_spinlock_key_t key;
 	struct k_mem_page_frame *pf;
 	bool dirty;
 	uintptr_t flags;
 	uintptr_t location;
 	int ret;
 	__ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
 		 __func__, phys);
@ -1417,9 +1432,13 @@ int k_mem_page_frame_evict(uintptr_t phys)
 	__ASSERT(!k_is_in_isr(),
 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
 		 __func__);
 #ifdef CONFIG_SMP
 	k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
 #else
 	k_sched_lock();
 #endif
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
-	key = irq_lock();
+	key = k_spin_lock(&z_mm_lock);
 	pf = k_mem_phys_to_page_frame(phys);
 	if (!k_mem_page_frame_is_mapped(pf)) {
 		/* Nothing to do, free page */
@ -1436,19 +1455,23 @@ int k_mem_page_frame_evict(uintptr_t phys)
 	}
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
-	irq_unlock(key);
+	k_spin_unlock(&z_mm_lock, key);
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	if (dirty) {
 		do_backing_store_page_out(location);
 	}
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
-	key = irq_lock();
+	k_spin_unlock(&z_mm_lock, key);
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	page_frame_free_locked(pf);
 out:
-	irq_unlock(key);
+	k_spin_unlock(&z_mm_lock, key);
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 #ifdef CONFIG_SMP
 	k_mutex_unlock(&z_mm_paging_lock);
 #else
 	k_sched_unlock();
 #endif
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	return ret;
 }
@ -1549,12 +1572,13 @@ static inline struct k_mem_page_frame *do_eviction_select(bool *dirty)
 static bool do_page_fault(void *addr, bool pin)
 {
 	struct k_mem_page_frame *pf;
-	int key, ret;
+	k_spinlock_key_t key;
 	uintptr_t page_in_location, page_out_location;
 	enum arch_page_location status;
 	bool result;
 	bool dirty = false;
-	struct k_thread *faulting_thread = _current_cpu->current;
+	struct k_thread *faulting_thread;
 	int ret;
 	__ASSERT(page_frames_initialized, "page fault at %p happened too early",
 		 addr);
@ -1568,13 +1592,11 @@ static bool do_page_fault(void *addr, bool pin)
 	 */
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
-	/* We lock the scheduler so that other threads are never scheduled
+	/*
-	 * during the page-in/out operation.
+	 * We do re-enable interrupts during the page-in/page-out operation
-	 *
+	 * if and only if interrupts were enabled when the exception was
-	 * We do however re-enable interrupts during the page-in/page-out
+	 * taken; in this configuration page faults in an ISR are a bug; all
-	 * operation if and only if interrupts were enabled when the exception
+	 * their code/data must be pinned.
 	 * was taken; in this configuration page faults in an ISR are a bug;
 	 * all their code/data must be pinned.
 	 *
 	 * If interrupts were disabled when the exception was taken, the
 	 * arch code is responsible for keeping them that way when entering
@ -1584,20 +1606,35 @@ static bool do_page_fault(void *addr, bool pin)
 	 * entire operation. This is far worse for system interrupt latency
 	 * but requires less pinned pages and ISRs may also take page faults.
 	 *
-	 * Support for allowing k_mem_paging_backing_store_page_out() and
+	 * On UP we lock the scheduler so that other threads are never
 	 * scheduled during the page-in/out operation. Support for
 	 * allowing k_mem_paging_backing_store_page_out() and
 	 * k_mem_paging_backing_store_page_in() to also sleep and allow
 	 * other threads to run (such as in the case where the transfer is
-	 * async DMA) is not implemented. Even if limited to thread context,
+	 * async DMA) is not supported on UP. Even if limited to thread
-	 * arbitrary memory access triggering exceptions that put a thread to
+	 * context, arbitrary memory access triggering exceptions that put
-	 * sleep on a contended page fault operation will break scheduling
+	 * a thread to sleep on a contended page fault operation will break
-	 * assumptions of cooperative threads or threads that implement
+	 * scheduling assumptions of cooperative threads or threads that
-	 * crticial sections with spinlocks or disabling IRQs.
+	 * implement critical sections with spinlocks or disabling IRQs.
 	 *
 	 * On SMP, though, exclusivity cannot be assumed solely from being
 	 * a cooperative thread. Another thread with any prio may be running
 	 * on another CPU so exclusion must already be enforced by other
 	 * means. Therefore trying to prevent scheduling on SMP is pointless,
 	 * and k_sched_lock()  is equivalent to a no-op on SMP anyway.
 	 * As a result, sleeping/rescheduling in the SMP case is fine.
 	 */
 	k_sched_lock();
 	__ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
 #ifdef CONFIG_SMP
 	k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
 #else
 	k_sched_lock();
 #endif
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
-	key = irq_lock();
+	key = k_spin_lock(&z_mm_lock);
 	faulting_thread = _current_cpu->current;
 	status = arch_page_location_get(addr, &page_in_location);
 	if (status == ARCH_PAGE_LOCATION_BAD) {
 		/* Return false to treat as a fatal error */
@ -1628,7 +1665,7 @@ static bool do_page_fault(void *addr, bool pin)
 	__ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
 		 "unexpected status value %d", status);
-	paging_stats_faults_inc(faulting_thread, key);
+	paging_stats_faults_inc(faulting_thread, key.key);
 	pf = free_page_frame_list_get();
 	if (pf == NULL) {
@ -1645,7 +1682,7 @@ static bool do_page_fault(void *addr, bool pin)
 	__ASSERT(ret == 0, "failed to prepare page frame");
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
-	irq_unlock(key);
+	k_spin_unlock(&z_mm_lock, key);
 	/* Interrupts are now unlocked if they were not locked when we entered
 	 * this function, and we may service ISRs. The scheduler is still
 	 * locked.
@ -1657,7 +1694,7 @@ static bool do_page_fault(void *addr, bool pin)
 	do_backing_store_page_in(page_in_location);
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
-	key = irq_lock();
+	key = k_spin_lock(&z_mm_lock);
 	k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY);
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED);
@ -1672,9 +1709,13 @@ static bool do_page_fault(void *addr, bool pin)
 		k_mem_paging_eviction_add(pf);
 	}
 out:
-	irq_unlock(key);
+	k_spin_unlock(&z_mm_lock, key);
 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
 #ifdef CONFIG_SMP
 	k_mutex_unlock(&z_mm_paging_lock);
 #else
 	k_sched_unlock();
 #endif
 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
 	return result;
@ -1722,10 +1763,10 @@ bool k_mem_page_fault(void *addr)
 static void do_mem_unpin(void *addr)
 {
 	struct k_mem_page_frame *pf;
-	unsigned int key;
+	k_spinlock_key_t key;
 	uintptr_t flags, phys;
-	key = irq_lock();
+	key = k_spin_lock(&z_mm_lock);
 	flags = arch_page_info_get(addr, &phys, false);
 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
 		 "invalid data page at %p", addr);
@ -1736,7 +1777,7 @@ static void do_mem_unpin(void *addr)
 			k_mem_paging_eviction_add(pf);
 		}
 	}
-	irq_unlock(key);
+	k_spin_unlock(&z_mm_lock, key);
 }
 void k_mem_unpin(void *addr, size_t size)