kernel: mmu: make demand paging work on SMP

This is the minimum for demand paging to work on SMP systems.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
Nicolas Pitre 2024-07-09 18:04:31 -04:00 committed by Carles Cufí
commit 6b3fff3a2f

View file

@ -662,15 +662,16 @@ void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_
arch_mem_map(dst, phys, size, flags); arch_mem_map(dst, phys, size, flags);
} }
if (!uninit) { out:
k_spin_unlock(&z_mm_lock, key);
if (dst != NULL && !uninit) {
/* If we later implement mappings to a copy-on-write /* If we later implement mappings to a copy-on-write
* zero page, won't need this step * zero page, won't need this step
*/ */
memset(dst, 0, size); memset(dst, 0, size);
} }
out:
k_spin_unlock(&z_mm_lock, key);
return dst; return dst;
} }
@ -1236,14 +1237,18 @@ static inline void do_backing_store_page_out(uintptr_t location)
#endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
} }
/* Current implementation relies on interrupt locking to any prevent page table #if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ)
* access, which falls over if other CPUs are active. Addressing this is not /*
* as simple as using spinlocks as regular memory reads/writes constitute * SMP support is very simple. Some resources such as the scratch page could
* "access" in this sense. * be made per CPU, backing store driver execution be confined to the faulting
* * CPU, statistics be made to cope with access concurrency, etc. But in the
* Current needs for demand paging are on uniprocessor systems. * end we're dealing with memory transfer to/from some external storage which
* is inherently slow and whose access is most likely serialized anyway.
* So let's simply enforce global demand paging serialization across all CPUs
* with a mutex as there is no real gain from added parallelism here.
*/ */
BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP)); static K_MUTEX_DEFINE(z_mm_paging_lock);
#endif
static void virt_region_foreach(void *addr, size_t size, static void virt_region_foreach(void *addr, size_t size,
void (*func)(void *)) void (*func)(void *))
@ -1333,16 +1338,21 @@ static int do_mem_evict(void *addr)
bool dirty; bool dirty;
struct k_mem_page_frame *pf; struct k_mem_page_frame *pf;
uintptr_t location; uintptr_t location;
int key, ret; k_spinlock_key_t key;
uintptr_t flags, phys; uintptr_t flags, phys;
int ret;
#if CONFIG_DEMAND_PAGING_ALLOW_IRQ #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
__ASSERT(!k_is_in_isr(), __ASSERT(!k_is_in_isr(),
"%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
__func__); __func__);
#ifdef CONFIG_SMP
k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
#else
k_sched_lock(); k_sched_lock();
#endif
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
key = irq_lock(); key = k_spin_lock(&z_mm_lock);
flags = arch_page_info_get(addr, &phys, false); flags = arch_page_info_get(addr, &phys, false);
__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
"address %p isn't mapped", addr); "address %p isn't mapped", addr);
@ -1362,19 +1372,23 @@ static int do_mem_evict(void *addr)
__ASSERT(ret == 0, "failed to prepare page frame"); __ASSERT(ret == 0, "failed to prepare page frame");
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
irq_unlock(key); k_spin_unlock(&z_mm_lock, key);
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
if (dirty) { if (dirty) {
do_backing_store_page_out(location); do_backing_store_page_out(location);
} }
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
key = irq_lock(); key = k_spin_lock(&z_mm_lock);
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
page_frame_free_locked(pf); page_frame_free_locked(pf);
out: out:
irq_unlock(key); k_spin_unlock(&z_mm_lock, key);
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
#ifdef CONFIG_SMP
k_mutex_unlock(&z_mm_paging_lock);
#else
k_sched_unlock(); k_sched_unlock();
#endif
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
return ret; return ret;
} }
@ -1400,11 +1414,12 @@ int k_mem_page_out(void *addr, size_t size)
int k_mem_page_frame_evict(uintptr_t phys) int k_mem_page_frame_evict(uintptr_t phys)
{ {
int key, ret; k_spinlock_key_t key;
struct k_mem_page_frame *pf; struct k_mem_page_frame *pf;
bool dirty; bool dirty;
uintptr_t flags; uintptr_t flags;
uintptr_t location; uintptr_t location;
int ret;
__ASSERT(page_frames_initialized, "%s called on 0x%lx too early", __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
__func__, phys); __func__, phys);
@ -1417,9 +1432,13 @@ int k_mem_page_frame_evict(uintptr_t phys)
__ASSERT(!k_is_in_isr(), __ASSERT(!k_is_in_isr(),
"%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
__func__); __func__);
#ifdef CONFIG_SMP
k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
#else
k_sched_lock(); k_sched_lock();
#endif
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
key = irq_lock(); key = k_spin_lock(&z_mm_lock);
pf = k_mem_phys_to_page_frame(phys); pf = k_mem_phys_to_page_frame(phys);
if (!k_mem_page_frame_is_mapped(pf)) { if (!k_mem_page_frame_is_mapped(pf)) {
/* Nothing to do, free page */ /* Nothing to do, free page */
@ -1436,19 +1455,23 @@ int k_mem_page_frame_evict(uintptr_t phys)
} }
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
irq_unlock(key); k_spin_unlock(&z_mm_lock, key);
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
if (dirty) { if (dirty) {
do_backing_store_page_out(location); do_backing_store_page_out(location);
} }
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
key = irq_lock(); k_spin_unlock(&z_mm_lock, key);
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
page_frame_free_locked(pf); page_frame_free_locked(pf);
out: out:
irq_unlock(key); k_spin_unlock(&z_mm_lock, key);
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
#ifdef CONFIG_SMP
k_mutex_unlock(&z_mm_paging_lock);
#else
k_sched_unlock(); k_sched_unlock();
#endif
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
return ret; return ret;
} }
@ -1549,12 +1572,13 @@ static inline struct k_mem_page_frame *do_eviction_select(bool *dirty)
static bool do_page_fault(void *addr, bool pin) static bool do_page_fault(void *addr, bool pin)
{ {
struct k_mem_page_frame *pf; struct k_mem_page_frame *pf;
int key, ret; k_spinlock_key_t key;
uintptr_t page_in_location, page_out_location; uintptr_t page_in_location, page_out_location;
enum arch_page_location status; enum arch_page_location status;
bool result; bool result;
bool dirty = false; bool dirty = false;
struct k_thread *faulting_thread = _current_cpu->current; struct k_thread *faulting_thread;
int ret;
__ASSERT(page_frames_initialized, "page fault at %p happened too early", __ASSERT(page_frames_initialized, "page fault at %p happened too early",
addr); addr);
@ -1568,13 +1592,11 @@ static bool do_page_fault(void *addr, bool pin)
*/ */
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
/* We lock the scheduler so that other threads are never scheduled /*
* during the page-in/out operation. * We do re-enable interrupts during the page-in/page-out operation
* * if and only if interrupts were enabled when the exception was
* We do however re-enable interrupts during the page-in/page-out * taken; in this configuration page faults in an ISR are a bug; all
* operation if and only if interrupts were enabled when the exception * their code/data must be pinned.
* was taken; in this configuration page faults in an ISR are a bug;
* all their code/data must be pinned.
* *
* If interrupts were disabled when the exception was taken, the * If interrupts were disabled when the exception was taken, the
* arch code is responsible for keeping them that way when entering * arch code is responsible for keeping them that way when entering
@ -1584,20 +1606,35 @@ static bool do_page_fault(void *addr, bool pin)
* entire operation. This is far worse for system interrupt latency * entire operation. This is far worse for system interrupt latency
* but requires less pinned pages and ISRs may also take page faults. * but requires less pinned pages and ISRs may also take page faults.
* *
* Support for allowing k_mem_paging_backing_store_page_out() and * On UP we lock the scheduler so that other threads are never
* scheduled during the page-in/out operation. Support for
* allowing k_mem_paging_backing_store_page_out() and
* k_mem_paging_backing_store_page_in() to also sleep and allow * k_mem_paging_backing_store_page_in() to also sleep and allow
* other threads to run (such as in the case where the transfer is * other threads to run (such as in the case where the transfer is
* async DMA) is not implemented. Even if limited to thread context, * async DMA) is not supported on UP. Even if limited to thread
* arbitrary memory access triggering exceptions that put a thread to * context, arbitrary memory access triggering exceptions that put
* sleep on a contended page fault operation will break scheduling * a thread to sleep on a contended page fault operation will break
* assumptions of cooperative threads or threads that implement * scheduling assumptions of cooperative threads or threads that
* crticial sections with spinlocks or disabling IRQs. * implement critical sections with spinlocks or disabling IRQs.
*
* On SMP, though, exclusivity cannot be assumed solely from being
* a cooperative thread. Another thread with any prio may be running
* on another CPU so exclusion must already be enforced by other
* means. Therefore trying to prevent scheduling on SMP is pointless,
* and k_sched_lock() is equivalent to a no-op on SMP anyway.
* As a result, sleeping/rescheduling in the SMP case is fine.
*/ */
k_sched_lock();
__ASSERT(!k_is_in_isr(), "ISR page faults are forbidden"); __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
#ifdef CONFIG_SMP
k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
#else
k_sched_lock();
#endif
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
key = irq_lock(); key = k_spin_lock(&z_mm_lock);
faulting_thread = _current_cpu->current;
status = arch_page_location_get(addr, &page_in_location); status = arch_page_location_get(addr, &page_in_location);
if (status == ARCH_PAGE_LOCATION_BAD) { if (status == ARCH_PAGE_LOCATION_BAD) {
/* Return false to treat as a fatal error */ /* Return false to treat as a fatal error */
@ -1628,7 +1665,7 @@ static bool do_page_fault(void *addr, bool pin)
__ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT, __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
"unexpected status value %d", status); "unexpected status value %d", status);
paging_stats_faults_inc(faulting_thread, key); paging_stats_faults_inc(faulting_thread, key.key);
pf = free_page_frame_list_get(); pf = free_page_frame_list_get();
if (pf == NULL) { if (pf == NULL) {
@ -1645,7 +1682,7 @@ static bool do_page_fault(void *addr, bool pin)
__ASSERT(ret == 0, "failed to prepare page frame"); __ASSERT(ret == 0, "failed to prepare page frame");
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
irq_unlock(key); k_spin_unlock(&z_mm_lock, key);
/* Interrupts are now unlocked if they were not locked when we entered /* Interrupts are now unlocked if they were not locked when we entered
* this function, and we may service ISRs. The scheduler is still * this function, and we may service ISRs. The scheduler is still
* locked. * locked.
@ -1657,7 +1694,7 @@ static bool do_page_fault(void *addr, bool pin)
do_backing_store_page_in(page_in_location); do_backing_store_page_in(page_in_location);
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
key = irq_lock(); key = k_spin_lock(&z_mm_lock);
k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY); k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY);
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED); k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED);
@ -1672,9 +1709,13 @@ static bool do_page_fault(void *addr, bool pin)
k_mem_paging_eviction_add(pf); k_mem_paging_eviction_add(pf);
} }
out: out:
irq_unlock(key); k_spin_unlock(&z_mm_lock, key);
#ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
#ifdef CONFIG_SMP
k_mutex_unlock(&z_mm_paging_lock);
#else
k_sched_unlock(); k_sched_unlock();
#endif
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
return result; return result;
@ -1722,10 +1763,10 @@ bool k_mem_page_fault(void *addr)
static void do_mem_unpin(void *addr) static void do_mem_unpin(void *addr)
{ {
struct k_mem_page_frame *pf; struct k_mem_page_frame *pf;
unsigned int key; k_spinlock_key_t key;
uintptr_t flags, phys; uintptr_t flags, phys;
key = irq_lock(); key = k_spin_lock(&z_mm_lock);
flags = arch_page_info_get(addr, &phys, false); flags = arch_page_info_get(addr, &phys, false);
__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
"invalid data page at %p", addr); "invalid data page at %p", addr);
@ -1736,7 +1777,7 @@ static void do_mem_unpin(void *addr)
k_mem_paging_eviction_add(pf); k_mem_paging_eviction_add(pf);
} }
} }
irq_unlock(key); k_spin_unlock(&z_mm_lock, key);
} }
void k_mem_unpin(void *addr, size_t size) void k_mem_unpin(void *addr, size_t size)