From ae865198191b030c20b458dfc728c8294920e0ed Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Fri, 26 Mar 2021 12:03:42 -0700 Subject: [PATCH] kernel: mmu: collect more demand paging statistics This adds more bits to gather statistics on demand paging, e.g. clean vs dirty pages evicted, # page faults with IRQ locked/unlocked, etc. Also extends this to gather per-thread demand paging statistics. Signed-off-by: Daniel Leung --- arch/Kconfig | 21 +++- include/kernel/thread.h | 9 ++ include/sys/mem_manage.h | 58 ++++++++++ kernel/CMakeLists.txt | 5 + kernel/mmu.c | 103 ++++++++++++------ kernel/paging/statistics.c | 71 ++++++++++++ .../kernel/mem_protect/demand_paging/prj.conf | 1 + 7 files changed, 236 insertions(+), 32 deletions(-) create mode 100644 kernel/paging/statistics.c diff --git a/arch/Kconfig b/arch/Kconfig index cafae92c988..902173042bb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -648,7 +648,7 @@ config KERNEL_VM_SIZE implement a notion of "high" memory in Zephyr to work around physical RAM size larger than the defined bounds of the virtual address space. -config DEMAND_PAGING +menuconfig DEMAND_PAGING bool "Enable demand paging [EXPERIMENTAL]" depends on ARCH_HAS_DEMAND_PAGING help @@ -671,6 +671,25 @@ config DEMAND_PAGING_ALLOW_IRQ If this option is disabled, the page fault servicing logic runs with interrupts disabled for the entire operation. However, ISRs may also page fault. + +config DEMAND_PAGING_STATS + bool "Gather Demand Paging Statistics" + help + This enables gathering various statistics related to demand paging, + e.g. number of pagefaults. This is useful for tuning eviction + algorithms and optimizing backing store. + + Should say N in production system as this is not without cost. + +config DEMAND_PAGING_THREAD_STATS + bool "Gather per Thread Demand Paging Statistics" + depends on DEMAND_PAGING_STATS + help + This enables gathering per thread statistics related to demand + paging. + + Should say N in production system as this is not without cost. + endif # DEMAND_PAGING endif # MMU diff --git a/include/kernel/thread.h b/include/kernel/thread.h index 6cdef33a128..b615fda17fd 100644 --- a/include/kernel/thread.h +++ b/include/kernel/thread.h @@ -7,6 +7,10 @@ #ifndef ZEPHYR_INCLUDE_KERNEL_THREAD_H_ #define ZEPHYR_INCLUDE_KERNEL_THREAD_H_ +#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS +#include +#endif + /** * @typedef k_thread_entry_t * @brief Thread entry point function type. @@ -279,6 +283,11 @@ struct k_thread { struct _thread_runtime_stats rt_stats; #endif +#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS + /** Paging statistics */ + struct k_mem_paging_stats_t paging_stats; +#endif + /** arch-specifics: must always be at the end */ struct _thread_arch arch; }; diff --git a/include/sys/mem_manage.h b/include/sys/mem_manage.h index 2d70cc2aa7c..27a7523943d 100644 --- a/include/sys/mem_manage.h +++ b/include/sys/mem_manage.h @@ -79,6 +79,34 @@ #include #include +struct k_mem_paging_stats_t { +#ifdef CONFIG_DEMAND_PAGING_STATS + struct { + /** Number of page faults */ + unsigned long cnt; + + /** Number of page faults with IRQ locked */ + unsigned long irq_locked; + + /** Number of page faults with IRQ unlocked */ + unsigned long irq_unlocked; + +#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ + /** Number of page faults while in ISR */ + unsigned long in_isr; +#endif + } pagefaults; + + struct { + /** Number of clean pages selected for eviction */ + unsigned long clean; + + /** Number of dirty pages selected for eviction */ + unsigned long dirty; + } eviction; +#endif /* CONFIG_DEMAND_PAGING_STATS */ +}; + /* Just like Z_MEM_PHYS_ADDR() but with type safety and assertions */ static inline uintptr_t z_mem_phys_addr(void *virt) { @@ -349,6 +377,36 @@ void k_mem_pin(void *addr, size_t size); void k_mem_unpin(void *addr, size_t size); #endif /* CONFIG_DEMAND_PAGING */ +#ifdef CONFIG_DEMAND_PAGING_STATS +/** + * Get the paging statistics since system startup + * + * This populates the paging statistics struct being passed in + * as argument. + * + * @param[in,out] stats Paging statistics struct to be filled. + */ +__syscall void k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats); + +#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS +/** + * Get the paging statistics since system startup for a thread + * + * This populates the paging statistics struct being passed in + * as argument for a particular thread. + * + * @param[in] tid Thread ID + * @param[in,out] stats Paging statistics struct to be filled. + */ +__syscall +void k_mem_paging_thread_stats_get(k_tid_t tid, + struct k_mem_paging_stats_t *stats); +#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ + +#include + +#endif /* CONFIG_DEMAND_PAGING_STATS */ + #ifdef __cplusplus } #endif diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 46a0be3e622..107d1f2da18 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -33,6 +33,11 @@ list(APPEND kernel_files xip.c) endif() +if(CONFIG_DEMAND_PAGING_STATS) +list(APPEND kernel_files + paging/statistics.c) +endif() + add_library(kernel ${kernel_files}) # Kernel files has the macro __ZEPHYR_SUPERVISOR__ set so that it diff --git a/kernel/mmu.c b/kernel/mmu.c index 6e936b179e9..99f347dd04f 100644 --- a/kernel/mmu.c +++ b/kernel/mmu.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL); @@ -563,7 +564,10 @@ void z_mem_manage_init(void) } #ifdef CONFIG_DEMAND_PAGING -static unsigned long z_num_pagefaults; + +#ifdef CONFIG_DEMAND_PAGING_STATS +struct k_mem_paging_stats_t paging_stats; +#endif /* Current implementation relies on interrupt locking to any prevent page table * access, which falls over if other CPUs are active. Addressing this is not @@ -786,6 +790,65 @@ out: return ret; } +static inline void paging_stats_faults_inc(struct k_thread *faulting_thread, + int key) +{ +#ifdef CONFIG_DEMAND_PAGING_STATS + bool is_irq_unlocked = arch_irq_unlocked(key); + + paging_stats.pagefaults.cnt++; + + if (is_irq_unlocked) { + paging_stats.pagefaults.irq_unlocked++; + } else { + paging_stats.pagefaults.irq_locked++; + } + +#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS + faulting_thread->paging_stats.pagefaults.cnt++; + + if (is_irq_unlocked) { + faulting_thread->paging_stats.pagefaults.irq_unlocked++; + } else { + faulting_thread->paging_stats.pagefaults.irq_locked++; + } +#else + ARG_UNUSED(faulting_thread); +#endif + +#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ + if (k_is_in_isr()) { + paging_stats.pagefaults.in_isr++; + +#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS + faulting_thread->paging_stats.pagefaults.in_isr++; +#endif + } +#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ +#endif /* CONFIG_DEMAND_PAGING_STATS */ +} + +static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread, + bool dirty) +{ +#ifdef CONFIG_DEMAND_PAGING_STATS + if (dirty) { + paging_stats.eviction.dirty++; + } else { + paging_stats.eviction.clean++; + } +#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS + if (dirty) { + faulting_thread->paging_stats.eviction.dirty++; + } else { + faulting_thread->paging_stats.eviction.clean++; + } +#else + ARG_UNUSED(faulting_thread); +#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ +#endif /* CONFIG_DEMAND_PAGING_STATS */ +} + static bool do_page_fault(void *addr, bool pin) { struct z_page_frame *pf; @@ -794,6 +857,7 @@ static bool do_page_fault(void *addr, bool pin) enum arch_page_location status; bool result; bool dirty = false; + struct k_thread *faulting_thread = _current_cpu->current; __ASSERT(page_frames_initialized, "page fault at %p happened too early", addr); @@ -802,13 +866,7 @@ static bool do_page_fault(void *addr, bool pin) /* * TODO: Add performance accounting: - * - Number of pagefaults - * * gathered on a per-thread basis: - * . Pagefaults with IRQs locked in faulting thread (bad) - * . Pagefaults with IRQs unlocked in faulting thread - * * Pagefaults in ISRs (if allowed) * - z_eviction_select() metrics - * * Clean vs dirty page eviction counts * * execution time histogram * * periodic timer execution time histogram (if implemented) * - z_backing_store_page_out() execution time histogram @@ -853,6 +911,9 @@ static bool do_page_fault(void *addr, bool pin) goto out; } result = true; + + paging_stats_faults_inc(faulting_thread, key); + if (status == ARCH_PAGE_LOCATION_PAGED_IN) { if (pin) { /* It's a physical memory address */ @@ -874,6 +935,8 @@ static bool do_page_fault(void *addr, bool pin) __ASSERT(pf != NULL, "failed to get a page frame"); LOG_DBG("evicting %p at 0x%lx", pf->addr, z_page_frame_to_phys(pf)); + + paging_stats_eviction_inc(faulting_thread, dirty); } ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location); __ASSERT(ret == 0, "failed to prepare page frame"); @@ -946,30 +1009,7 @@ void k_mem_pin(void *addr, size_t size) bool z_page_fault(void *addr) { - bool ret; - - ret = do_page_fault(addr, false); - if (ret) { - /* Wasn't an error, increment page fault count */ - int key; - - key = irq_lock(); - z_num_pagefaults++; - irq_unlock(key); - } - return ret; -} - -unsigned long z_num_pagefaults_get(void) -{ - unsigned long ret; - int key; - - key = irq_lock(); - ret = z_num_pagefaults; - irq_unlock(key); - - return ret; + return do_page_fault(addr, false); } static void do_mem_unpin(void *addr) @@ -995,4 +1035,5 @@ void k_mem_unpin(void *addr, size_t size) addr); virt_region_foreach(addr, size, do_mem_unpin); } + #endif /* CONFIG_DEMAND_PAGING */ diff --git a/kernel/paging/statistics.c b/kernel/paging/statistics.c new file mode 100644 index 00000000000..cc0a909cb4a --- /dev/null +++ b/kernel/paging/statistics.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include + +extern struct k_mem_paging_stats_t paging_stats; + +unsigned long z_num_pagefaults_get(void) +{ + unsigned long ret; + int key; + + key = irq_lock(); + ret = paging_stats.pagefaults.cnt; + irq_unlock(key); + + return ret; +} + +void z_impl_k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats) +{ + if (stats == NULL) { + return; + } + + /* Copy statistics */ + memcpy(stats, &paging_stats, sizeof(paging_stats)); +} + +#ifdef CONFIG_USERSPACE +static inline +void z_vrfy_k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats) +{ + Z_OOPS(Z_SYSCALL_MEMORY_WRITE(stats, sizeof(*stats))); + z_impl_k_mem_paging_stats_get(stats); +} +#include +#endif /* CONFIG_USERSPACE */ + +#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS +void z_impl_k_mem_paging_thread_stats_get(k_tid_t tid, + struct k_mem_paging_stats_t *stats) +{ + if ((tid == NULL) || (stats == NULL)) { + return; + } + + /* Copy statistics */ + memcpy(stats, &tid->paging_stats, sizeof(tid->paging_stats)); +} + +#ifdef CONFIG_USERSPACE +static inline +void z_vrfy_k_mem_paging_thread_stats_get(k_tid_t tid, + struct k_mem_paging_stats_t *stats) +{ + Z_OOPS(Z_SYSCALL_OBJ(tid, K_OBJ_THREAD)); + Z_OOPS(Z_SYSCALL_MEMORY_WRITE(stats, sizeof(*stats))); + z_impl_k_mem_paging_thread_stats_get(tid, stats); +} +#include +#endif /* CONFIG_USERSPACE */ + +#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ diff --git a/tests/kernel/mem_protect/demand_paging/prj.conf b/tests/kernel/mem_protect/demand_paging/prj.conf index 9467c292689..06c2fde9b1f 100644 --- a/tests/kernel/mem_protect/demand_paging/prj.conf +++ b/tests/kernel/mem_protect/demand_paging/prj.conf @@ -1 +1,2 @@ CONFIG_ZTEST=y +CONFIG_DEMAND_PAGING_STATS=y