x86: implement demand paging APIs
All arch_ APIs and macros are implemented, and the page fault handling code will call into the core kernel. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
b0d608e3c9
commit
ed22064e27
5 changed files with 206 additions and 0 deletions
|
@ -61,6 +61,7 @@ config X86
|
|||
select ARCH_HAS_GDBSTUB if !X86_64
|
||||
select ARCH_HAS_TIMING_FUNCTIONS
|
||||
select ARCH_HAS_THREAD_LOCAL_STORAGE
|
||||
select ARCH_HAS_DEMAND_PAGING
|
||||
help
|
||||
x86 architecture
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <exc_handle.h>
|
||||
#include <logging/log.h>
|
||||
#include <x86_mmu.h>
|
||||
#include <mmu.h>
|
||||
LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
|
||||
|
||||
#if defined(CONFIG_BOARD_QEMU_X86) || defined(CONFIG_BOARD_QEMU_X86_64)
|
||||
|
@ -359,6 +360,44 @@ static const struct z_exc_handle exceptions[] = {
|
|||
|
||||
void z_x86_page_fault_handler(z_arch_esf_t *esf)
|
||||
{
|
||||
#ifdef CONFIG_DEMAND_PAGING
|
||||
if ((esf->errorCode & PF_P) == 0) {
|
||||
/* Page was non-present at time exception happened.
|
||||
* Get faulting virtual address from CR2 register
|
||||
*/
|
||||
void *virt = z_x86_cr2_get();
|
||||
bool was_valid_access;
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Protection ring is lowest 2 bits in interrupted CS */
|
||||
bool was_user = ((esf->cs & 0x3) != 0U);
|
||||
|
||||
/* Need to check if the interrupted context was a user thread
|
||||
* that hit a non-present page that was flipped due to KPTI in
|
||||
* the thread's page tables, in which case this is an access
|
||||
* violation and we should treat this as an error.
|
||||
*
|
||||
* We're probably not locked, but if there is a race, we will
|
||||
* be fine, the kernel page fault code will later detect that
|
||||
* the page is present in the kernel's page tables and the
|
||||
* instruction will just be re-tried, producing another fault.
|
||||
*/
|
||||
if (was_user &&
|
||||
!z_x86_kpti_is_access_ok(virt, get_ptables(esf))) {
|
||||
was_valid_access = false;
|
||||
} else
|
||||
#else
|
||||
{
|
||||
was_valid_access = z_page_fault(virt);
|
||||
}
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
if (was_valid_access) {
|
||||
/* Page fault handled, re-try */
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_DEMAND_PAGING */
|
||||
|
||||
#if !defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_COREDUMP)
|
||||
z_x86_exception_vector = IV_PAGE_FAULT;
|
||||
#endif
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <kernel_internal.h>
|
||||
#include <mmu.h>
|
||||
#include <drivers/interrupt_controller/loapic.h>
|
||||
#include <mmu.h>
|
||||
|
||||
LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
|
||||
|
||||
|
@ -1720,3 +1721,153 @@ void arch_reserved_pages_update(void)
|
|||
}
|
||||
}
|
||||
#endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
|
||||
|
||||
#ifdef CONFIG_DEMAND_PAGING
|
||||
#define PTE_MASK (paging_levels[PTE_LEVEL].mask)
|
||||
|
||||
void arch_mem_page_out(void *addr, uintptr_t location)
|
||||
{
|
||||
pentry_t mask = PTE_MASK | MMU_P | MMU_A;
|
||||
|
||||
/* Accessed bit set to guarantee the entry is not completely 0 in
|
||||
* case of location value 0. A totally 0 PTE is un-mapped.
|
||||
*/
|
||||
range_map(addr, location, CONFIG_MMU_PAGE_SIZE, MMU_A, mask,
|
||||
OPTION_FLUSH);
|
||||
}
|
||||
|
||||
void arch_mem_page_in(void *addr, uintptr_t phys)
|
||||
{
|
||||
pentry_t mask = PTE_MASK | MMU_P | MMU_D | MMU_A;
|
||||
|
||||
range_map(addr, phys, CONFIG_MMU_PAGE_SIZE, MMU_P, mask,
|
||||
OPTION_FLUSH);
|
||||
}
|
||||
|
||||
void arch_mem_scratch(uintptr_t phys)
|
||||
{
|
||||
page_map_set(z_x86_page_tables_get(), Z_SCRATCH_PAGE,
|
||||
phys | MMU_P | MMU_RW | MMU_XD, NULL, MASK_ALL,
|
||||
OPTION_FLUSH);
|
||||
}
|
||||
|
||||
uintptr_t arch_page_info_get(void *addr, uintptr_t *phys, bool clear_accessed)
|
||||
{
|
||||
pentry_t all_pte, mask;
|
||||
uint32_t options;
|
||||
|
||||
/* What to change, if anything, in the page_map_set() calls */
|
||||
if (clear_accessed) {
|
||||
mask = MMU_A;
|
||||
options = OPTION_FLUSH;
|
||||
} else {
|
||||
/* In this configuration page_map_set() just queries the
|
||||
* page table and makes no changes
|
||||
*/
|
||||
mask = 0;
|
||||
options = 0;
|
||||
}
|
||||
|
||||
page_map_set(z_x86_kernel_ptables, addr, 0, &all_pte, mask, options);
|
||||
|
||||
/* Un-mapped PTEs are completely zeroed. No need to report anything
|
||||
* else in this case.
|
||||
*/
|
||||
if (all_pte == 0) {
|
||||
return ARCH_DATA_PAGE_NOT_MAPPED;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_USERSPACE) && !defined(CONFIG_X86_COMMON_PAGE_TABLE)
|
||||
/* Don't bother looking at other page tables if non-present as we
|
||||
* are not required to report accurate accessed/dirty in this case
|
||||
* and all mappings are otherwise the same.
|
||||
*/
|
||||
if ((all_pte & MMU_P) != 0) {
|
||||
sys_snode_t *node;
|
||||
|
||||
/* IRQs are locked, safe to do this */
|
||||
SYS_SLIST_FOR_EACH_NODE(&x86_domain_list, node) {
|
||||
pentry_t cur_pte;
|
||||
struct arch_mem_domain *domain =
|
||||
CONTAINER_OF(node, struct arch_mem_domain,
|
||||
node);
|
||||
|
||||
page_map_set(domain->ptables, addr, 0, &cur_pte,
|
||||
mask, options | OPTION_USER);
|
||||
|
||||
/* Logical OR of relevant PTE in all page tables.
|
||||
* addr/location and present state should be identical
|
||||
* among them.
|
||||
*/
|
||||
all_pte |= cur_pte;
|
||||
}
|
||||
}
|
||||
#endif /* USERSPACE && ~X86_COMMON_PAGE_TABLE */
|
||||
|
||||
/* NOTE: We are truncating the PTE on PAE systems, whose pentry_t
|
||||
* are larger than a uintptr_t.
|
||||
*
|
||||
* We currently aren't required to report back XD state (bit 63), and
|
||||
* Zephyr just doesn't support large physical memory on 32-bit
|
||||
* systems, PAE was only implemented for XD support.
|
||||
*/
|
||||
if (phys != NULL) {
|
||||
*phys = (uintptr_t)get_entry_phys(all_pte, PTE_LEVEL);
|
||||
}
|
||||
|
||||
/* We don't filter out any other bits in the PTE and the kernel
|
||||
* ignores them. For the case of ARCH_DATA_PAGE_NOT_MAPPED,
|
||||
* we use a bit which is never set in a real PTE (the PAT bit) in the
|
||||
* current system.
|
||||
*
|
||||
* The other ARCH_DATA_PAGE_* macros are defined to their corresponding
|
||||
* bits in the PTE.
|
||||
*/
|
||||
return (uintptr_t)all_pte;
|
||||
}
|
||||
|
||||
enum arch_page_location arch_page_location_get(void *addr, uintptr_t *location)
|
||||
{
|
||||
pentry_t pte;
|
||||
int level;
|
||||
|
||||
/* TODO: since we only have to query the current set of page tables,
|
||||
* could optimize this with recursive page table mapping
|
||||
*/
|
||||
pentry_get(&level, &pte, z_x86_page_tables_get(), addr);
|
||||
|
||||
if (pte == 0) {
|
||||
/* Not mapped */
|
||||
return ARCH_PAGE_LOCATION_BAD;
|
||||
}
|
||||
|
||||
__ASSERT(level == PTE_LEVEL, "bigpage found at %p", addr);
|
||||
*location = (uintptr_t)get_entry_phys(pte, PTE_LEVEL);
|
||||
|
||||
if ((pte & MMU_P) != 0) {
|
||||
return ARCH_PAGE_LOCATION_PAGED_IN;
|
||||
} else {
|
||||
return ARCH_PAGE_LOCATION_PAGED_OUT;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
bool z_x86_kpti_is_access_ok(void *addr, pentry_t *ptables)
|
||||
{
|
||||
pentry_t pte;
|
||||
int level;
|
||||
|
||||
pentry_get(&level, &pte, ptables, addr);
|
||||
|
||||
/* Might as well also check if it's un-mapped, normally we don't
|
||||
* fetch the PTE from the page tables until we are inside
|
||||
* z_page_fault() and call arch_page_fault_status_get()
|
||||
*/
|
||||
if (level != PTE_LEVEL || pte == 0 || is_flipped_pte(pte)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
#endif /* CONFIG_DEMAND_PAGING */
|
||||
|
|
|
@ -169,6 +169,13 @@ void z_x86_set_stack_guard(k_thread_stack_t *stack);
|
|||
* IDT, etc)
|
||||
*/
|
||||
extern uint8_t z_shared_kernel_page_start;
|
||||
|
||||
#ifdef CONFIG_DEMAND_PAGING
|
||||
/* Called from page fault handler. ptables here is the ptage tables for the
|
||||
* faulting user thread and not the current set of page tables
|
||||
*/
|
||||
extern bool z_x86_kpti_is_access_ok(void *virt, pentry_t *ptables)
|
||||
#endif /* CONFIG_DEMAND_PAGING */
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
|
|
|
@ -24,6 +24,14 @@
|
|||
#define Z_X86_MMU_XD 0
|
||||
#endif
|
||||
|
||||
/* For these we'll just use the same bits in the PTE */
|
||||
#define ARCH_DATA_PAGE_DIRTY ((uintptr_t)BIT(6))
|
||||
#define ARCH_DATA_PAGE_LOADED ((uintptr_t)BIT(0))
|
||||
#define ARCH_DATA_PAGE_ACCESSED ((uintptr_t)BIT(5))
|
||||
|
||||
/* Use an PAT bit for this one since it's never set in a mapped PTE */
|
||||
#define ARCH_DATA_PAGE_NOT_MAPPED ((uintptr_t)BIT(7))
|
||||
|
||||
/* Always true with 32-bit page tables, don't enable
|
||||
* CONFIG_EXECUTE_XOR_WRITE and expect it to work for you
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue