x86: mmu: clarify physical/virtual conversions

The page table implementation requires conversion between virtual
and physical addresses when creating and walking page tables. Add
a phys_addr() and virt_addr() functions instead of hard-casting
these values, plus a macro for doing the same in ASM code.

Currently, all pages are identity mapped so VIRT_OFFSET = 0, but
this will now still work if they are not the same.

ASM language was also updated for 32-bit. Comments were left in
64-bit, as long mode semantics don't allow use of Z_X86_PHYS_ADDR
macro; this can be revisited later.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
Andrew Boie 2020-12-14 17:18:12 -08:00 committed by Anas Nashif
commit 0a791b7a09
8 changed files with 70 additions and 23 deletions

View file

@ -169,7 +169,7 @@ static inline uintptr_t get_cr3(const z_arch_esf_t *esf)
static inline pentry_t *get_ptables(const z_arch_esf_t *esf)
{
return (pentry_t *)get_cr3(esf);
return z_x86_virt_addr(get_cr3(esf));
}
#ifdef CONFIG_X86_64

View file

@ -16,6 +16,7 @@
#include <kernel_arch_data.h>
#include <arch/cpu.h>
#include <arch/x86/multiboot.h>
#include <x86_mmu.h>
/* exports (private APIs) */
@ -231,7 +232,7 @@ __csSet:
#ifdef CONFIG_X86_MMU
/* Install page tables */
movl $z_x86_kernel_ptables, %eax
movl $Z_X86_PHYS_ADDR(z_x86_kernel_ptables), %eax
movl %eax, %cr3
#ifdef CONFIG_X86_PAE

View file

@ -151,7 +151,7 @@ struct task_state_segment _df_tss = {
.es = DATA_SEG,
.ss = DATA_SEG,
.eip = (uint32_t)df_handler_top,
.cr3 = (uint32_t)&z_x86_kernel_ptables
.cr3 = (uint32_t)Z_X86_PHYS_ADDR(z_x86_kernel_ptables)
};
static __used void df_handler_bottom(void)
@ -199,7 +199,7 @@ static FUNC_NORETURN __used void df_handler_top(void)
_main_tss.es = DATA_SEG;
_main_tss.ss = DATA_SEG;
_main_tss.eip = (uint32_t)df_handler_bottom;
_main_tss.cr3 = (uint32_t)(&z_x86_kernel_ptables);
_main_tss.cr3 = z_x86_phys_addr(z_x86_kernel_ptables);
_main_tss.eflags = 0U;
/* NT bit is set in EFLAGS so we will task switch back to _main_tss

View file

@ -9,6 +9,7 @@
#include <offsets_short.h>
#include <syscall.h>
#include <sys/mem_manage.h>
#include <x86_mmu.h>
/* Exports */
GTEXT(z_x86_syscall_entry_stub)
@ -50,7 +51,7 @@ SECTION_FUNC(TEXT, z_x86_trampoline_to_kernel)
pushl %edi
/* Switch to kernel page table */
movl $z_x86_kernel_ptables, %esi
movl $Z_X86_PHYS_ADDR(z_x86_kernel_ptables), %esi
movl %esi, %cr3
/* Save old trampoline stack pointer in %edi */
@ -155,7 +156,7 @@ SECTION_FUNC(TEXT, z_x86_syscall_entry_stub)
pushl %edi
/* Switch to kernel page table */
movl $z_x86_kernel_ptables, %esi
movl $Z_X86_PHYS_ADDR(z_x86_kernel_ptables), %esi
movl %esi, %cr3
/* Save old trampoline stack pointer in %edi */

View file

@ -39,7 +39,9 @@
movl %eax, %cr4
clts
/* Page tables created at build time by gen_mmu.py */
/* Page tables created at build time by gen_mmu.py
* NOTE: Presumes phys=virt
*/
movl $z_x86_kernel_ptables, %eax
movl %eax, %cr3
@ -61,6 +63,7 @@
movq %rax, %cr4
clts
/* NOTE: Presumes phys=virt */
movq $z_x86_kernel_ptables, %rax
movq %rax, %cr3
@ -473,7 +476,7 @@ except: /*
jz 1f
swapgs
#ifdef CONFIG_X86_KPTI
/* Load kernel's page table */
/* Load kernel's page table. NOTE: Presumes phys=virt */
movq $z_x86_kernel_ptables, %r11
movq %r11, %cr3
#endif /* CONFIG_X86_KPTI */
@ -628,7 +631,7 @@ irq:
jz 1f
swapgs
#ifdef CONFIG_X86_KPTI
/* Load kernel's page table */
/* Load kernel's page table. NOTE: presumes phys=virt */
movq $z_x86_kernel_ptables, %rsi
movq %rsi, %cr3
#endif /* CONFIG_X86_KPTI */

View file

@ -85,6 +85,8 @@ z_x86_syscall_entry_stub:
/* Load kernel's page table */
pushq %rax
/* NOTE: Presumes phys=virt */
movq $z_x86_kernel_ptables, %rax
movq %rax, %cr3
popq %rax

View file

@ -296,7 +296,7 @@ static inline uintptr_t get_entry_phys(pentry_t entry, int level)
/* Return the virtual address of a linked table stored in the provided entry */
static inline pentry_t *next_table(pentry_t entry, int level)
{
return (pentry_t *)(get_entry_phys(entry, level));
return z_x86_virt_addr(get_entry_phys(entry, level));
}
/* Number of table entries at this level */
@ -381,7 +381,7 @@ static inline bool is_flipped_pte(pentry_t pte)
#if defined(CONFIG_SMP)
void z_x86_tlb_ipi(const void *arg)
{
uintptr_t ptables;
uintptr_t ptables_phys;
ARG_UNUSED(arg);
@ -389,13 +389,13 @@ void z_x86_tlb_ipi(const void *arg)
/* We're always on the kernel's set of page tables in this context
* if KPTI is turned on
*/
ptables = z_x86_cr3_get();
__ASSERT(ptables == (uintptr_t)&z_x86_kernel_ptables, "");
ptables_phys = z_x86_cr3_get();
__ASSERT(ptables_phys == z_x86_phys_addr(&z_x86_kernel_ptables), "");
#else
/* We might have been moved to another memory domain, so always invoke
* z_x86_thread_page_tables_get() instead of using current CR3 value.
*/
ptables = (uintptr_t)z_x86_thread_page_tables_get(_current);
ptables_phys = z_x86_phys_addr(z_x86_thread_page_tables_get(_current));
#endif
/*
* In the future, we can consider making this smarter, such as
@ -405,7 +405,7 @@ void z_x86_tlb_ipi(const void *arg)
*/
LOG_DBG("%s on CPU %d\n", __func__, arch_curr_cpu()->id);
z_x86_cr3_set(ptables);
z_x86_cr3_set(ptables_phys);
}
/* NOTE: This is not synchronous and the actual flush takes place some short
@ -774,9 +774,8 @@ static inline pentry_t reset_pte(pentry_t old_val)
static inline pentry_t pte_finalize_value(pentry_t val, bool user_table)
{
#ifdef CONFIG_X86_KPTI
/* Ram is identity-mapped at boot, so phys=virt for this pinned page */
static const uintptr_t shared_phys_addr =
(uintptr_t)&z_shared_kernel_page_start;
(uintptr_t)Z_X86_PHYS_ADDR(&z_shared_kernel_page_start);
if (user_table && (val & MMU_US) == 0 && (val & MMU_P) != 0 &&
get_entry_phys(val, PTE_LEVEL) != shared_phys_addr) {
@ -981,7 +980,9 @@ static int page_map_set(pentry_t *ptables, void *virt, pentry_t entry_val,
if (new_table == NULL) {
return -ENOMEM;
}
*entryp = ((pentry_t)(uintptr_t)new_table) | INT_FLAGS;
*entryp = ((pentry_t)z_x86_phys_addr(new_table) |
INT_FLAGS);
table = new_table;
} else {
/* We fail an assertion here due to no support for
@ -1513,7 +1514,8 @@ static int copy_page_table(pentry_t *dst, pentry_t *src, int level)
* cast needed for PAE case where sizeof(void *) and
* sizeof(pentry_t) are not the same.
*/
dst[i] = ((pentry_t)(uintptr_t)child_dst) | INT_FLAGS;
dst[i] = ((pentry_t)z_x86_phys_addr(child_dst) |
INT_FLAGS);
ret = copy_page_table(child_dst,
next_table(src[i], level),
@ -1699,7 +1701,7 @@ void arch_mem_domain_thread_add(struct k_thread *thread)
/* This is only set for threads that were migrating from some other
* memory domain; new threads this is NULL
*/
pentry_t *old_ptables = (pentry_t *)thread->arch.ptables;
pentry_t *old_ptables = z_x86_virt_addr(thread->arch.ptables);
bool is_user = (thread->base.user_options & K_USER) != 0;
bool is_migration = (old_ptables != NULL) && is_user;
@ -1711,7 +1713,7 @@ void arch_mem_domain_thread_add(struct k_thread *thread)
set_stack_perms(thread, domain->arch.ptables);
}
thread->arch.ptables = (uintptr_t)domain->arch.ptables;
thread->arch.ptables = z_x86_phys_addr(domain->arch.ptables);
LOG_DBG("set thread %p page tables to %p", thread,
(void *)thread->arch.ptables);

View file

@ -65,6 +65,43 @@
#define PF_PK BIT(5) /* 1 protection-key violation */
#define PF_SGX BIT(15) /* 1 SGX-specific access control requirements */
/*
* NOTE: All page table links are by physical, not virtual address.
* For now, we have a hard requirement that the memory addresses of paging
* structures must be convertible with a simple mathematical operation,
* by applying the difference in the base kernel virtual and physical
* addresses.
*
* Arbitrary mappings would induce a chicken-and-the-egg problem when walking
* page tables. The codebase does not yet use techniques like recursive page
* table mapping to alleviate this. It's simplest to just ensure the page
* pool's pages can always be converted with simple math and a cast.
*
* The following conversion functions and macros are exclusively for use when
* walking and creating page tables.
*/
#ifdef CONFIG_MMU
#define Z_X86_VIRT_OFFSET (CONFIG_KERNEL_VM_BASE - CONFIG_SRAM_BASE_ADDRESS)
#else
#define Z_X86_VIRT_OFFSET 0
#endif
/* ASM code */
#define Z_X86_PHYS_ADDR(virt) ((virt) - Z_X86_VIRT_OFFSET)
#ifndef _ASMLANGUAGE
/* Installing new paging structures */
static inline uintptr_t z_x86_phys_addr(void *virt)
{
return ((uintptr_t)virt - Z_X86_VIRT_OFFSET);
}
/* Examining page table links */
static inline void *z_x86_virt_addr(uintptr_t phys)
{
return (void *)(phys + Z_X86_VIRT_OFFSET);
}
#ifdef CONFIG_EXCEPTION_DEBUG
/**
* Dump out page table entries for a particular virtual memory address
@ -172,7 +209,7 @@ static inline uintptr_t z_x86_cr3_get(void)
/* Return the virtual address of the page tables installed in this CPU in CR3 */
static inline pentry_t *z_x86_page_tables_get(void)
{
return (pentry_t *)z_x86_cr3_get();
return z_x86_virt_addr(z_x86_cr3_get());
}
/* Return cr2 value, which contains the page fault linear address.
@ -205,7 +242,7 @@ static inline pentry_t *z_x86_thread_page_tables_get(struct k_thread *thread)
* the kernel's page tables and not the page tables associated
* with their memory domain.
*/
return (pentry_t *)(thread->arch.ptables);
return z_x86_virt_addr(thread->arch.ptables);
}
#endif
return z_x86_kernel_ptables;
@ -219,4 +256,5 @@ void z_x86_tlb_ipi(const void *arg);
#ifdef CONFIG_X86_COMMON_PAGE_TABLE
void z_x86_swap_update_common_page_table(struct k_thread *incoming);
#endif
#endif /* _ASMLANGUAGE */
#endif /* ZEPHYR_ARCH_X86_INCLUDE_X86_MMU_H */