x86: paging code rewrite
The x86 paging code has been rewritten to support another paging mode and non-identity virtual mappings. - Paging code now uses an array of paging level characteristics and walks tables using for loops. This is opposed to having different functions for every paging level and lots of #ifdefs. The code is now more concise and adding new paging modes should be trivial. - We now support 32-bit, PAE, and IA-32e page tables. - The page tables created by gen_mmu.py are now installed at early boot. There are no longer separate "flat" page tables. These tables are mutable at any time. - The x86_mmu code now has a private header. Many definitions that did not need to be in public scope have been moved out of mmustructs.h and either placed in the C file or in the private header. - Improvements to dumping page table information, with the physical mapping and flags all shown - arch_mem_map() implemented - x86 userspace/memory domain code ported to use the new infrastructure. - add logic for physical -> virtual instruction pointer transition, including cleaning up identity mappings after this takes place. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
ddb63c404f
commit
38e17b68e3
26 changed files with 1574 additions and 1751 deletions
|
@ -46,6 +46,7 @@ config X86_64
|
|||
select USE_SWITCH
|
||||
select USE_SWITCH_SUPPORTED
|
||||
select SCHED_IPI_SUPPORTED
|
||||
select X86_MMU
|
||||
|
||||
config X86_KERNEL_OFFSET
|
||||
int "Kernel offset from beginning of RAM"
|
||||
|
|
|
@ -76,6 +76,15 @@ config X86_USERSPACE
|
|||
supporting user-level threads that are protected from each other and
|
||||
from crashing the kernel.
|
||||
|
||||
config X86_PAE
|
||||
bool "Use PAE page tables"
|
||||
default y
|
||||
depends on X86_MMU
|
||||
help
|
||||
If enabled, use PAE-style page tables instead of 32-bit page tables.
|
||||
The advantage is support for the Execute Disable bit, at a cost of
|
||||
more memory for paging structures.
|
||||
|
||||
menu "Architecture Floating Point Options"
|
||||
depends on CPU_HAS_FPU
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <kernel_internal.h>
|
||||
#include <exc_handle.h>
|
||||
#include <logging/log.h>
|
||||
#include <x86_mmu.h>
|
||||
LOG_MODULE_DECLARE(os);
|
||||
|
||||
#if defined(CONFIG_BOARD_QEMU_X86) || defined(CONFIG_BOARD_QEMU_X86_64)
|
||||
|
@ -71,18 +72,24 @@ bool z_x86_check_stack_bounds(uintptr_t addr, size_t size, uint16_t cs)
|
|||
start = (uintptr_t)Z_KERNEL_STACK_BUFFER(
|
||||
z_interrupt_stacks[cpu_id]);
|
||||
end = start + CONFIG_ISR_STACK_SIZE;
|
||||
} else if ((cs & 0x3U) != 0U ||
|
||||
(_current->base.user_options & K_USER) == 0) {
|
||||
/* Thread was in user mode, or is not a user mode thread.
|
||||
* The normal stack buffer is what we will check.
|
||||
#ifdef CONFIG_USERSPACE
|
||||
} else if ((cs & 0x3U) == 0 &&
|
||||
(_current->base.user_options & K_USER) != 0) {
|
||||
/* The low two bits of the CS register is the privilege
|
||||
* level. It will be 0 in supervisor mode and 3 in user mode
|
||||
* corresponding to ring 0 / ring 3.
|
||||
*
|
||||
* If we get here, we must have been doing a syscall, check
|
||||
* privilege elevation stack bounds
|
||||
*/
|
||||
start = _current->stack_info.start - CONFIG_MMU_PAGE_SIZE;
|
||||
end = _current->stack_info.start;
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
} else {
|
||||
/* Normal thread operation, check its stack buffer */
|
||||
start = _current->stack_info.start;
|
||||
end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
|
||||
_current->stack_info.size);
|
||||
} else {
|
||||
/* User thread was doing a syscall, check kernel stack bounds */
|
||||
start = _current->stack_info.start - MMU_PAGE_SIZE;
|
||||
end = _current->stack_info.start;
|
||||
_current->stack_info.size);
|
||||
}
|
||||
|
||||
return (addr <= start) || (addr + size > end);
|
||||
|
@ -146,19 +153,27 @@ static void unwind_stack(uintptr_t base_ptr, uint16_t cs)
|
|||
}
|
||||
#endif /* CONFIG_X86_EXCEPTION_STACK_TRACE */
|
||||
|
||||
static inline struct x86_page_tables *get_ptables(const z_arch_esf_t *esf)
|
||||
static inline uintptr_t get_cr3(const z_arch_esf_t *esf)
|
||||
{
|
||||
#if defined(CONFIG_USERSPACE) && defined(CONFIG_X86_KPTI)
|
||||
/* If the interrupted thread was in user mode, we did a page table
|
||||
* switch when we took the exception via z_x86_trampoline_to_kernel
|
||||
*/
|
||||
if ((esf->cs & 0x3) != 0) {
|
||||
return z_x86_thread_page_tables_get(_current);
|
||||
return _current->arch.ptables;
|
||||
}
|
||||
#else
|
||||
ARG_UNUSED(esf);
|
||||
#endif
|
||||
return z_x86_page_tables_get();
|
||||
/* Return the current CR3 value, it didn't change when we took
|
||||
* the exception
|
||||
*/
|
||||
return z_x86_cr3_get();
|
||||
}
|
||||
|
||||
static inline pentry_t *get_ptables(const z_arch_esf_t *esf)
|
||||
{
|
||||
return z_mem_virt_addr(get_cr3(esf));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -172,8 +187,8 @@ static void dump_regs(const z_arch_esf_t *esf)
|
|||
esf->r8, esf->r9, esf->r10, esf->r11);
|
||||
LOG_ERR("R12: 0x%016lx R13: 0x%016lx R14: 0x%016lx R15: 0x%016lx",
|
||||
esf->r12, esf->r13, esf->r14, esf->r15);
|
||||
LOG_ERR("RSP: 0x%016lx RFLAGS: 0x%016lx CS: 0x%04lx CR3: %p", esf->rsp,
|
||||
esf->rflags, esf->cs & 0xFFFFU, get_ptables(esf));
|
||||
LOG_ERR("RSP: 0x%016lx RFLAGS: 0x%016lx CS: 0x%04lx CR3: 0x%016lx",
|
||||
esf->rsp, esf->rflags, esf->cs & 0xFFFFU, get_cr3(esf));
|
||||
|
||||
#ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
|
||||
LOG_ERR("call trace:");
|
||||
|
@ -190,8 +205,8 @@ static void dump_regs(const z_arch_esf_t *esf)
|
|||
esf->eax, esf->ebx, esf->ecx, esf->edx);
|
||||
LOG_ERR("ESI: 0x%08x, EDI: 0x%08x, EBP: 0x%08x, ESP: 0x%08x",
|
||||
esf->esi, esf->edi, esf->ebp, esf->esp);
|
||||
LOG_ERR("EFLAGS: 0x%08x CS: 0x%04x CR3: %p", esf->eflags,
|
||||
esf->cs & 0xFFFFU, get_ptables(esf));
|
||||
LOG_ERR("EFLAGS: 0x%08x CS: 0x%04x CR3: 0x%08lx", esf->eflags,
|
||||
esf->cs & 0xFFFFU, get_cr3(esf));
|
||||
|
||||
#ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
|
||||
LOG_ERR("call trace:");
|
||||
|
@ -309,7 +324,7 @@ static void dump_page_fault(z_arch_esf_t *esf)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_X86_MMU
|
||||
z_x86_dump_mmu_flags(get_ptables(esf), cr2);
|
||||
z_x86_dump_mmu_flags(get_ptables(esf), (void *)cr2);
|
||||
#endif /* CONFIG_X86_MMU */
|
||||
}
|
||||
#endif /* CONFIG_EXCEPTION_DEBUG */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <kernel_arch_data.h>
|
||||
#include <arch/cpu.h>
|
||||
#include <arch/x86/multiboot.h>
|
||||
#include <sys/mem_manage.h>
|
||||
|
||||
/* exports (private APIs) */
|
||||
|
||||
|
@ -41,7 +42,55 @@
|
|||
GTEXT(_sys_resume_from_deep_sleep)
|
||||
#endif
|
||||
|
||||
.macro install_page_tables
|
||||
#ifdef CONFIG_X86_MMU
|
||||
/* Enable paging. If virtual memory is enabled, the instruction pointer
|
||||
* is currently at a physical address. There is an identity mapping
|
||||
* for all RAM, plus a virtual mapping of RAM starting at
|
||||
* CONFIG_KERNEL_VM_BASE using the same paging structures.
|
||||
*
|
||||
* Until we enable these page tables, only physical memory addresses
|
||||
* work.
|
||||
*/
|
||||
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/* Enable PAE */
|
||||
movl %cr4, %eax
|
||||
orl $CR4_PAE, %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
/* IA32_EFER NXE bit set */
|
||||
movl $0xC0000080, %ecx
|
||||
rdmsr
|
||||
orl $0x800, %eax
|
||||
wrmsr
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
|
||||
movl %cr0, %eax
|
||||
orl $(CR0_PG | CR0_WP), %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
#if CONFIG_KERNEL_VM_BASE != CONFIG_SRAM_BASE_ADDRESS
|
||||
/* Jump to a virtual address, which works because the identity and
|
||||
* virtual mappings both are to the same physical address.
|
||||
*/
|
||||
lea vm_enter, %eax
|
||||
jmp *%eax
|
||||
vm_enter:
|
||||
/* We are now executing in virtual memory. We'll un-map the identity
|
||||
* mappings later once we are in the C domain
|
||||
*/
|
||||
#endif /* CONFIG_KERNEL_VM_BASE != CONFIG_SRAM_BASE_ADDRESS */
|
||||
#endif /* CONFIG_X86_MMU */
|
||||
.endm
|
||||
|
||||
SECTION_FUNC(TEXT_START, __start)
|
||||
#ifndef CONFIG_XIP
|
||||
install_page_tables
|
||||
#endif /* CONFIG_XIP */
|
||||
|
||||
#include "../common.S"
|
||||
|
||||
|
@ -64,11 +113,7 @@ SECTION_FUNC(TEXT_START, __start)
|
|||
*/
|
||||
#if CONFIG_SET_GDT
|
||||
lgdt _gdt_rom /* load 32-bit operand size GDT */
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifdef CONFIG_SET_GDT
|
||||
/* If we set our own GDT, update the segment registers as well.
|
||||
*/
|
||||
movw $DATA_SEG, %ax /* data segment selector (entry = 3) */
|
||||
|
@ -84,7 +129,6 @@ SECTION_FUNC(TEXT_START, __start)
|
|||
__csSet:
|
||||
#endif /* CONFIG_SET_GDT */
|
||||
|
||||
|
||||
#if !defined(CONFIG_FPU)
|
||||
/*
|
||||
* Force an #NM exception for floating point instructions
|
||||
|
@ -206,6 +250,10 @@ __csSet:
|
|||
call _x86_data_copy
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
/* Have to do this here, the page tables aren't loaded into RAM
|
||||
* until after the data copy
|
||||
*/
|
||||
install_page_tables
|
||||
#endif /* CONFIG_XIP */
|
||||
|
||||
/*
|
||||
|
@ -308,30 +356,6 @@ dataWords:
|
|||
ret
|
||||
#endif /* CONFIG_XIP */
|
||||
|
||||
#ifdef CONFIG_X86_MMU
|
||||
z_x86_enable_paging:
|
||||
/* load the page directory address into the registers*/
|
||||
movl $z_x86_kernel_ptables, %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
/* Enable PAE */
|
||||
movl %cr4, %eax
|
||||
orl $CR4_PAE, %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
/* IA32_EFER NXE bit set */
|
||||
movl $0xC0000080, %ecx
|
||||
rdmsr
|
||||
orl $0x800, %eax
|
||||
wrmsr
|
||||
|
||||
/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
|
||||
movl %cr0, %eax
|
||||
orl $(CR0_PG | CR0_WP), %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
ret
|
||||
#endif /* CONFIG_X86_MMU */
|
||||
|
||||
#if defined(CONFIG_SSE)
|
||||
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
#include <inttypes.h>
|
||||
#include <exc_handle.h>
|
||||
#include <logging/log.h>
|
||||
#include <x86_mmu.h>
|
||||
#include <sys/mem_manage.h>
|
||||
|
||||
LOG_MODULE_DECLARE(os);
|
||||
|
||||
#ifdef CONFIG_DEBUG_COREDUMP
|
||||
|
@ -148,7 +151,7 @@ struct task_state_segment _df_tss = {
|
|||
.es = DATA_SEG,
|
||||
.ss = DATA_SEG,
|
||||
.eip = (uint32_t)df_handler_top,
|
||||
.cr3 = (uint32_t)&z_x86_kernel_ptables
|
||||
.cr3 = Z_MEM_PHYS_ADDR((uint32_t)&z_x86_kernel_ptables)
|
||||
};
|
||||
|
||||
static __used void df_handler_bottom(void)
|
||||
|
@ -196,7 +199,7 @@ static FUNC_NORETURN __used void df_handler_top(void)
|
|||
_main_tss.es = DATA_SEG;
|
||||
_main_tss.ss = DATA_SEG;
|
||||
_main_tss.eip = (uint32_t)df_handler_bottom;
|
||||
_main_tss.cr3 = (uint32_t)&z_x86_kernel_ptables;
|
||||
_main_tss.cr3 = z_mem_phys_addr(&z_x86_kernel_ptables);
|
||||
_main_tss.eflags = 0U;
|
||||
|
||||
/* NT bit is set in EFLAGS so we will task switch back to _main_tss
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <ksched.h>
|
||||
#include <arch/x86/mmustructs.h>
|
||||
#include <kswap.h>
|
||||
#include <x86_mmu.h>
|
||||
|
||||
/* forward declaration */
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <arch/cpu.h>
|
||||
#include <offsets_short.h>
|
||||
#include <syscall.h>
|
||||
#include <sys/mem_manage.h>
|
||||
|
||||
/* Exports */
|
||||
GTEXT(z_x86_syscall_entry_stub)
|
||||
|
@ -49,7 +50,7 @@ SECTION_FUNC(TEXT, z_x86_trampoline_to_kernel)
|
|||
pushl %edi
|
||||
|
||||
/* Switch to kernel page table */
|
||||
movl $z_x86_kernel_ptables, %esi
|
||||
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi
|
||||
movl %esi, %cr3
|
||||
|
||||
/* Save old trampoline stack pointer in %edi */
|
||||
|
@ -154,7 +155,7 @@ SECTION_FUNC(TEXT, z_x86_syscall_entry_stub)
|
|||
pushl %edi
|
||||
|
||||
/* Switch to kernel page table */
|
||||
movl $z_x86_kernel_ptables, %esi
|
||||
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi
|
||||
movl %esi, %cr3
|
||||
|
||||
/* Save old trampoline stack pointer in %edi */
|
||||
|
@ -304,7 +305,10 @@ SECTION_FUNC(TEXT, z_x86_userspace_enter)
|
|||
* want to leak any information.
|
||||
*/
|
||||
mov %edi, %esp
|
||||
subl $Z_X86_PDPT_SIZE, %esp
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/* Skip over the toplevel PDPT stored here */
|
||||
subl $0x20, %esp
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
/* Stash some registers we are going to need to erase the user
|
||||
* stack.
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include <kernel_structs.h>
|
||||
#include <kernel_internal.h>
|
||||
#include <arch/x86/multiboot.h>
|
||||
#include <arch/x86/mmustructs.h>
|
||||
#include <x86_mmu.h>
|
||||
#include <drivers/interrupt_controller/loapic.h>
|
||||
|
||||
/*
|
||||
|
@ -80,8 +80,6 @@ struct x86_tss64 tss3 = {
|
|||
};
|
||||
#endif
|
||||
|
||||
extern struct x86_page_tables z_x86_flat_ptables;
|
||||
|
||||
struct x86_cpuboot x86_cpuboot[] = {
|
||||
{
|
||||
.tr = X86_KERNEL_CPU0_TR,
|
||||
|
@ -89,9 +87,6 @@ struct x86_cpuboot x86_cpuboot[] = {
|
|||
.sp = (uint64_t) z_interrupt_stacks[0] +
|
||||
Z_KERNEL_STACK_SIZE_ADJUST(CONFIG_ISR_STACK_SIZE),
|
||||
.fn = z_x86_prep_c,
|
||||
#ifdef CONFIG_X86_MMU
|
||||
.ptables = &z_x86_flat_ptables,
|
||||
#endif
|
||||
},
|
||||
#if CONFIG_MP_NUM_CPUS > 1
|
||||
{
|
||||
|
@ -127,9 +122,6 @@ void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
|
|||
x86_cpuboot[cpu_num].sp = (uint64_t) Z_KERNEL_STACK_BUFFER(stack) + sz;
|
||||
x86_cpuboot[cpu_num].fn = fn;
|
||||
x86_cpuboot[cpu_num].arg = arg;
|
||||
#ifdef CONFIG_X86_MMU
|
||||
x86_cpuboot[cpu_num].ptables = &z_x86_kernel_ptables;
|
||||
#endif /* CONFIG_X86_MMU */
|
||||
|
||||
z_loapic_ipi(apic_id, LOAPIC_ICR_IPI_INIT, 0);
|
||||
k_busy_wait(10000);
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <offsets_short.h>
|
||||
#include <drivers/interrupt_controller/loapic.h>
|
||||
#include <arch/cpu.h>
|
||||
#include <sys/mem_manage.h>
|
||||
|
||||
.macro read_tsc var_name
|
||||
push %rax
|
||||
|
@ -21,8 +22,69 @@
|
|||
pop %rax
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Definitions/macros for enabling paging
|
||||
*/
|
||||
|
||||
/* Long mode, no-execute, syscall */
|
||||
#define EFER_BITS (X86_EFER_MSR_LME | X86_EFER_MSR_NXE | X86_EFER_MSR_SCE)
|
||||
|
||||
/* Paging, write-protect */
|
||||
#define CR0_BITS (CR0_PG | CR0_WP)
|
||||
|
||||
/* PAE, SSE */
|
||||
#define CR4_BITS (CR4_PAE | CR4_OSFXSR)
|
||||
|
||||
.macro set_efer
|
||||
movl $X86_EFER_MSR, %ecx
|
||||
rdmsr
|
||||
orl $EFER_BITS, %eax
|
||||
wrmsr
|
||||
.endm
|
||||
|
||||
.macro install_pagetables_32
|
||||
movl %cr4, %eax
|
||||
orl $CR4_BITS, %eax
|
||||
movl %eax, %cr4
|
||||
clts
|
||||
|
||||
/* Page tables created at build time by gen_mmu.py */
|
||||
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
set_efer
|
||||
|
||||
movl %cr0, %eax
|
||||
orl $CR0_BITS, %eax
|
||||
movl %eax, %cr0
|
||||
.endm
|
||||
|
||||
.macro install_pagetables_64
|
||||
/* Here, we are already in long mode with paging enabled and
|
||||
* just need to switch to our own page tables, but let's be
|
||||
* paranoid and ensure CR4, CR0, and EFER_MSR are set up
|
||||
* exactly how we expect. Logic is the same as install_pagetables_32
|
||||
*/
|
||||
movq %cr4, %rax
|
||||
orq $CR4_BITS, %rax
|
||||
movq %rax, %cr4
|
||||
clts
|
||||
|
||||
movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
|
||||
movq %rax, %cr3
|
||||
|
||||
set_efer
|
||||
|
||||
movq %cr0, %rax
|
||||
/* Use 32-bit instructions due to assembler fussiness with large
|
||||
* immediate values with `orq`, CR0_PG is bit 31. We don't ever set any
|
||||
* high bits in cr0 anyway.
|
||||
*/
|
||||
orl $CR0_BITS, %eax
|
||||
movq %rax, %cr0
|
||||
.endm
|
||||
|
||||
.section .locore,"ax"
|
||||
.code32
|
||||
|
||||
#if CONFIG_MP_NUM_CPUS > 1
|
||||
|
||||
|
@ -79,7 +141,6 @@ unknown_loapic_id:
|
|||
.code32
|
||||
.globl __start
|
||||
__start:
|
||||
|
||||
/*
|
||||
* kernel execution begins here in 32-bit mode, with flat-mode
|
||||
* descriptors in all segment registers, interrupts disabled.
|
||||
|
@ -98,7 +159,6 @@ __start:
|
|||
* next, clear the BSS. note we're still in 32-bit mode,
|
||||
* so the BSS must fit entirely in the first 4GB of RAM.
|
||||
*/
|
||||
|
||||
cld
|
||||
xorl %eax, %eax
|
||||
movl $__bss_start, %edi
|
||||
|
@ -108,37 +168,17 @@ __start:
|
|||
movl $x86_cpuboot, %ebp /* BSP is always logical CPU id 0 */
|
||||
movl %ebx, __x86_cpuboot_t_arg_OFFSET(%ebp) /* multiboot info */
|
||||
|
||||
/*
|
||||
* transition to long mode, reload the segment registers,
|
||||
* and configure per-CPU stuff: GS, task register, stack.
|
||||
*/
|
||||
|
||||
go64: movl %cr4, %eax /* enable PAE and SSE */
|
||||
orl $(CR4_PAE | CR4_OSFXSR), %eax
|
||||
movl %eax, %cr4
|
||||
clts
|
||||
|
||||
#ifdef CONFIG_X86_MMU
|
||||
movl __x86_cpuboot_t_ptables_OFFSET(%ebp), %eax
|
||||
#else
|
||||
movl $z_x86_flat_ptables, %eax
|
||||
#endif
|
||||
movl %eax, %cr3
|
||||
|
||||
movl $X86_EFER_MSR, %ecx /* enable long mode, no-execute, syscall */
|
||||
rdmsr
|
||||
orl $(X86_EFER_MSR_LME | X86_EFER_MSR_NXE | X86_EFER_MSR_SCE), %eax
|
||||
wrmsr
|
||||
|
||||
movl %cr0, %eax /* enable paging */
|
||||
orl $(CR0_PG | CR0_WP), %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
go64: /* Install page tables and transition to long mode */
|
||||
install_pagetables_32
|
||||
jmpl $X86_KERNEL_CS, $enter_code64
|
||||
|
||||
/* Long mode entry point. Arrive here from the code
|
||||
* immediately above (shared between main CPU startup and AP
|
||||
* startup), or from EFI entry in __start64
|
||||
* startup), or from EFI entry in __start64.
|
||||
*
|
||||
* Here we reload the segment registers,
|
||||
* and configure per-CPU stuff: GS, task register, stack.
|
||||
*/
|
||||
.code64
|
||||
enter_code64:
|
||||
|
@ -200,20 +240,7 @@ __start64:
|
|||
lidt idt80
|
||||
lgdt gdt80
|
||||
|
||||
/* These state and flag settings really should be done later,
|
||||
* in the shared startup path, they aren't required for mode
|
||||
* transition and having them in the 32 bit stub means they
|
||||
* have to be duplicated here.
|
||||
*/
|
||||
movq %cr4, %rax
|
||||
orq $(CR4_PAE | CR4_OSFXSR), %rax
|
||||
movq %rax, %cr4
|
||||
clts
|
||||
movq $X86_EFER_MSR, %rcx
|
||||
rdmsr
|
||||
orq $(X86_EFER_MSR_NXE | X86_EFER_MSR_SCE), %rax
|
||||
wrmsr
|
||||
cld
|
||||
install_pagetables_64
|
||||
|
||||
/* Disable 8259 PIT. Almost certainly not needed on modern
|
||||
* UEFI platforms taking this code path, but...
|
||||
|
@ -949,44 +976,6 @@ idt80: /* LIDT descriptor for 64 bit mode */
|
|||
.word (idt_end - idt - 1)
|
||||
.quad idt
|
||||
|
||||
/* Initial page tables for long mode entry. This generates a second
|
||||
* level page full of 512 1G PTE entries of the form:
|
||||
*
|
||||
* 0x000000nnn0000083
|
||||
*
|
||||
* Where nnn is an identity-mapped 1G page index in the range
|
||||
* 0x000-0x1ff, and 0x83 marks a present, 1G, read/write page
|
||||
* entry. It's split up somewhat awkwardly to get around gas's
|
||||
* recursion limits in macro expansion.
|
||||
*
|
||||
* This maps the first 512GB of memory space by default, which will
|
||||
* hopefully be enough to reach everything we need before we can
|
||||
* bootstrap the real page tables later.
|
||||
*/
|
||||
.macro populate_ptable base, count=64
|
||||
.long 0x00000083
|
||||
.long 64 - \count + \base
|
||||
.long 0x40000083
|
||||
.long 64 - \count + \base
|
||||
.long 0x80000083
|
||||
.long 64 - \count + \base
|
||||
.long 0xC0000083
|
||||
.long 64 - \count + \base
|
||||
.if \count > 1
|
||||
populate_ptable \base, (\count - 1)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.align 4096
|
||||
.globl z_x86_flat_ptables
|
||||
z_x86_flat_ptables:
|
||||
.long pdp + 0x03 /* 0x03 = R/W, P */
|
||||
.long 0
|
||||
.fill 4088, 1, 0
|
||||
pdp:
|
||||
populate_ptable 0
|
||||
populate_ptable 64
|
||||
|
||||
.section .gdt,"ad"
|
||||
|
||||
/*
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <kernel_structs.h>
|
||||
#include <kernel_internal.h>
|
||||
#include <offsets_short.h>
|
||||
#include <x86_mmu.h>
|
||||
|
||||
extern void x86_sse_init(struct k_thread *); /* in locore.S */
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <arch/cpu.h>
|
||||
#include <offsets_short.h>
|
||||
#include <syscall.h>
|
||||
#include <sys/mem_manage.h>
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Copy interrupt return stack context to the trampoline stack, switch back
|
||||
|
@ -83,7 +84,7 @@ z_x86_syscall_entry_stub:
|
|||
|
||||
/* Load kernel's page table */
|
||||
pushq %rax
|
||||
movq $z_x86_kernel_ptables, %rax
|
||||
movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
|
||||
movq %rax, %cr3
|
||||
popq %rax
|
||||
movq $0, -8(%rsp) /* Delete stashed RAX data */
|
||||
|
|
|
@ -36,7 +36,6 @@ GEN_OFFSET_SYM(_thread_arch_t, excNestCount);
|
|||
#ifdef CONFIG_USERSPACE
|
||||
GEN_OFFSET_SYM(_thread_arch_t, psp);
|
||||
GEN_OFFSET_SYM(_thread_arch_t, ptables);
|
||||
GEN_ABSOLUTE_SYM(Z_X86_PDPT_SIZE, sizeof(struct x86_mmu_pdpt));
|
||||
#endif
|
||||
|
||||
GEN_OFFSET_SYM(_thread_arch_t, preempFloatReg);
|
||||
|
@ -65,9 +64,4 @@ GEN_OFFSET_SYM(z_arch_esf_t, errorCode);
|
|||
GEN_OFFSET_SYM(z_arch_esf_t, eip);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, cs);
|
||||
GEN_OFFSET_SYM(z_arch_esf_t, eflags);
|
||||
|
||||
/* size of the MMU_REGION structure. Used by linker scripts */
|
||||
|
||||
GEN_ABSOLUTE_SYM(__MMU_REGION_SIZEOF, sizeof(struct mmu_region));
|
||||
|
||||
#endif /* _X86_OFFSETS_INC_ */
|
||||
|
|
|
@ -49,9 +49,6 @@ GEN_OFFSET_SYM(x86_cpuboot_t, gs_base);
|
|||
GEN_OFFSET_SYM(x86_cpuboot_t, sp);
|
||||
GEN_OFFSET_SYM(x86_cpuboot_t, fn);
|
||||
GEN_OFFSET_SYM(x86_cpuboot_t, arg);
|
||||
#ifdef CONFIG_X86_MMU
|
||||
GEN_OFFSET_SYM(x86_cpuboot_t, ptables);
|
||||
#endif /* CONFIG_X86_MMU */
|
||||
GEN_ABSOLUTE_SYM(__X86_CPUBOOT_SIZEOF, sizeof(x86_cpuboot_t));
|
||||
|
||||
#endif /* _X86_OFFSETS_INC_ */
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <kernel_internal.h>
|
||||
#include <arch/x86/acpi.h>
|
||||
#include <arch/x86/multiboot.h>
|
||||
#include <x86_mmu.h>
|
||||
|
||||
extern FUNC_NORETURN void z_cstart(void);
|
||||
extern void x86_64_irq_init(void);
|
||||
|
@ -25,6 +26,10 @@ FUNC_NORETURN void z_x86_prep_c(void *arg)
|
|||
z_x86_early_serial_init();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
z_x86_mmu_init();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
x86_64_irq_init();
|
||||
#endif
|
||||
|
@ -35,10 +40,6 @@ FUNC_NORETURN void z_x86_prep_c(void *arg)
|
|||
ARG_UNUSED(info);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MMU
|
||||
z_x86_paging_init();
|
||||
#endif
|
||||
|
||||
#if CONFIG_X86_STACK_PROTECTION
|
||||
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
|
||||
z_x86_set_stack_guard(z_interrupt_stacks[i]);
|
||||
|
|
|
@ -9,19 +9,20 @@
|
|||
#include <syscall_handler.h>
|
||||
#include <kernel_arch_func.h>
|
||||
#include <ksched.h>
|
||||
#include <x86_mmu.h>
|
||||
|
||||
#ifndef CONFIG_X86_KPTI
|
||||
/* Change to new set of page tables. ONLY intended for use from
|
||||
* z_x88_swap_update_page_tables(). This changes CR3, no memory access
|
||||
* afterwards is legal unless it is known for sure that the relevant
|
||||
* mappings are identical wrt supervisor mode until we iret out.
|
||||
/* Set CR3 to a physical address. There must be a valid top-level paging
|
||||
* structure here or the CPU will triple fault. The incoming page tables must
|
||||
* have the same kernel mappings wrt supervisor mode. Don't use this function
|
||||
* unless you know exactly what you are doing.
|
||||
*/
|
||||
static inline void page_tables_set(struct x86_page_tables *ptables)
|
||||
static inline void cr3_set(uintptr_t phys)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
__asm__ volatile("movq %0, %%cr3\n\t" : : "r" (ptables) : "memory");
|
||||
__asm__ volatile("movq %0, %%cr3\n\t" : : "r" (phys) : "memory");
|
||||
#else
|
||||
__asm__ volatile("movl %0, %%cr3\n\t" : : "r" (ptables) : "memory");
|
||||
__asm__ volatile("movl %0, %%cr3\n\t" : : "r" (phys) : "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -43,7 +44,7 @@ static inline void page_tables_set(struct x86_page_tables *ptables)
|
|||
*/
|
||||
void z_x86_swap_update_page_tables(struct k_thread *incoming)
|
||||
{
|
||||
struct x86_page_tables *ptables;
|
||||
uintptr_t ptables_phys;
|
||||
|
||||
#ifndef CONFIG_X86_64
|
||||
/* 64-bit uses syscall/sysret which switches stacks manually,
|
||||
|
@ -57,10 +58,10 @@ void z_x86_swap_update_page_tables(struct k_thread *incoming)
|
|||
/* Check first that we actually need to do this, since setting
|
||||
* CR3 involves an expensive full TLB flush.
|
||||
*/
|
||||
ptables = z_x86_thread_page_tables_get(incoming);
|
||||
ptables_phys = incoming->arch.ptables;
|
||||
|
||||
if (ptables != z_x86_page_tables_get()) {
|
||||
page_tables_set(ptables);
|
||||
if (ptables_phys != z_x86_cr3_get()) {
|
||||
cr3_set(ptables_phys);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
@ -99,7 +100,7 @@ void *z_x86_userspace_prepare_thread(struct k_thread *thread)
|
|||
z_x86_thread_pt_init(thread);
|
||||
initial_entry = drop_to_user;
|
||||
} else {
|
||||
thread->arch.ptables = &z_x86_kernel_ptables;
|
||||
thread->arch.ptables = z_mem_phys_addr(&z_x86_kernel_ptables);
|
||||
initial_entry = z_thread_entry;
|
||||
}
|
||||
|
||||
|
@ -115,7 +116,7 @@ FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
|
|||
* started in user mode already had this done via z_setup_new_thread()
|
||||
*/
|
||||
if (_current->mem_domain_info.mem_domain != NULL) {
|
||||
z_x86_apply_mem_domain(_current->arch.ptables,
|
||||
z_x86_apply_mem_domain(_current,
|
||||
_current->mem_domain_info.mem_domain);
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -25,9 +25,6 @@ struct x86_cpuboot {
|
|||
uint64_t sp; /* initial stack pointer */
|
||||
arch_cpustart_t fn; /* kernel entry function */
|
||||
void *arg; /* argument for above function */
|
||||
#ifdef CONFIG_X86_MMU
|
||||
struct x86_page_tables *ptables; /* Runtime page tables to install */
|
||||
#endif /* CONFIG_X86_MMU */
|
||||
};
|
||||
|
||||
typedef struct x86_cpuboot x86_cpuboot_t;
|
||||
|
|
|
@ -44,20 +44,6 @@ extern FUNC_NORETURN void z_x86_prep_c(void *arg);
|
|||
void z_x86_early_serial_init(void);
|
||||
#endif /* CONFIG_X86_VERY_EARLY_CONSOLE */
|
||||
|
||||
#ifdef CONFIG_X86_MMU
|
||||
/* Create all page tables with boot configuration and enable paging */
|
||||
void z_x86_paging_init(void);
|
||||
|
||||
static inline struct x86_page_tables *
|
||||
z_x86_thread_page_tables_get(struct k_thread *thread)
|
||||
{
|
||||
#ifdef CONFIG_USERSPACE
|
||||
return thread->arch.ptables;
|
||||
#else
|
||||
return &z_x86_kernel_ptables;
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_X86_MMU */
|
||||
|
||||
/* Called upon CPU exception that is unhandled and hence fatal; dump
|
||||
* interesting info and call z_x86_fatal_error()
|
||||
|
@ -102,19 +88,10 @@ extern FUNC_NORETURN void z_x86_userspace_enter(k_thread_entry_t user_entry,
|
|||
*/
|
||||
void *z_x86_userspace_prepare_thread(struct k_thread *thread);
|
||||
|
||||
void z_x86_thread_pt_init(struct k_thread *thread);
|
||||
|
||||
void z_x86_apply_mem_domain(struct x86_page_tables *ptables,
|
||||
struct k_mem_domain *mem_domain);
|
||||
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
void z_x86_do_kernel_oops(const z_arch_esf_t *esf);
|
||||
|
||||
#ifdef CONFIG_X86_STACK_PROTECTION
|
||||
void z_x86_set_stack_guard(k_thread_stack_t *stack);
|
||||
#endif
|
||||
|
||||
#endif /* !_ASMLANGUAGE */
|
||||
|
||||
#endif /* ZEPHYR_ARCH_X86_INCLUDE_KERNEL_ARCH_FUNC_H_ */
|
||||
|
|
164
arch/x86/include/x86_mmu.h
Normal file
164
arch/x86/include/x86_mmu.h
Normal file
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Copyright (c) 2011-2014 Wind River Systems, Inc.
|
||||
* Copyright (c) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Internal memory management interfaces implemented in x86_mmu.c.
|
||||
* None of these are application-facing, use only if you know what you are
|
||||
* doing!
|
||||
*/
|
||||
|
||||
#ifndef ZEPHYR_ARCH_X86_INCLUDE_X86_MMU_H
|
||||
#define ZEPHYR_ARCH_X86_INCLUDE_X86_MMU_H
|
||||
|
||||
#include <kernel.h>
|
||||
#include <arch/x86/mmustructs.h>
|
||||
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
#define XD_SUPPORTED
|
||||
#define BITL BIT64
|
||||
#define PRI_ENTRY "0x%016llx"
|
||||
#else
|
||||
#define BITL BIT
|
||||
#define PRI_ENTRY "0x%08x"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Common flags in the same bit position regardless of which structure level,
|
||||
* although not every flag is supported at every level, and some may be
|
||||
* ignored depending on the state of other bits (such as P or PS)
|
||||
*
|
||||
* These flags indicate bit position, and can be used for setting flags or
|
||||
* masks as needed.
|
||||
*/
|
||||
|
||||
#define MMU_P BITL(0) /** Present */
|
||||
#define MMU_RW BITL(1) /** Read-Write */
|
||||
#define MMU_US BITL(2) /** User-Supervisor */
|
||||
#define MMU_PWT BITL(3) /** Page Write Through */
|
||||
#define MMU_PCD BITL(4) /** Page Cache Disable */
|
||||
#define MMU_A BITL(5) /** Accessed */
|
||||
#define MMU_D BITL(6) /** Dirty */
|
||||
#define MMU_PS BITL(7) /** Page Size */
|
||||
#define MMU_G BITL(8) /** Global */
|
||||
#ifdef XD_SUPPORTED
|
||||
#define MMU_XD BITL(63) /** Execute Disable */
|
||||
#else
|
||||
#define MMU_XD 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EXCEPTION_DEBUG
|
||||
/**
|
||||
* Dump out page table entries for a particular virtual memory address
|
||||
*
|
||||
* For the provided memory address, dump out interesting information about
|
||||
* its mapping to the error log
|
||||
*
|
||||
* @param ptables Page tables to walk
|
||||
* @param virt Virtual address to inspect
|
||||
*/
|
||||
void z_x86_dump_mmu_flags(pentry_t *ptables, void *virt);
|
||||
|
||||
/**
|
||||
* Fetch the page table entry for a virtual memory address
|
||||
*
|
||||
* @param paging_level [out] what paging level the entry was found at.
|
||||
* 0=toplevel
|
||||
* @param val Value stored in page table entry, with address and flags
|
||||
* @param ptables Toplevel pointer to page tables
|
||||
* @param virt Virtual address to lookup
|
||||
*/
|
||||
void z_x86_pentry_get(int *paging_level, pentry_t *val, pentry_t *ptables,
|
||||
void *virt);
|
||||
|
||||
/**
|
||||
* Debug function for dumping out page tables
|
||||
*
|
||||
* Iterates through the entire linked set of page table structures,
|
||||
* dumping out codes for the configuration of each table entry.
|
||||
*
|
||||
* Entry codes:
|
||||
*
|
||||
* . - not present
|
||||
* w - present, writable, not executable
|
||||
* a - present, writable, executable
|
||||
* r - present, read-only, not executable
|
||||
* x - present, read-only, executable
|
||||
*
|
||||
* Entry codes in uppercase indicate that user mode may access.
|
||||
*
|
||||
* Color is used to indicate the physical mapping characteristics:
|
||||
*
|
||||
* yellow - Identity mapping (virt = phys)
|
||||
* green - Fixed virtual memory mapping (virt = phys + constant)
|
||||
* magenta - entry is child page table
|
||||
* cyan - General mapped memory
|
||||
*
|
||||
* @param ptables Top-level pointer to the page tables, as programmed in CR3
|
||||
*/
|
||||
void z_x86_dump_page_tables(pentry_t *ptables);
|
||||
#endif /* CONFIG_EXCEPTION_DEBUG */
|
||||
|
||||
#ifdef CONFIG_HW_STACK_PROTECTION
|
||||
/* Legacy function - set identity-mapped MMU stack guard page to RO in the
|
||||
* kernel's page tables to prevent writes and generate an exception
|
||||
*/
|
||||
void z_x86_set_stack_guard(k_thread_stack_t *stack);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Defined in linker script. Contains all the data that must be mapped
|
||||
* in a KPTI table even though US bit is not set (trampoline stack, GDT,
|
||||
* IDT, etc)
|
||||
*/
|
||||
extern uint8_t z_shared_kernel_page_start;
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
/* Set up per-thread page tables just prior to entering user mode */
|
||||
void z_x86_thread_pt_init(struct k_thread *thread);
|
||||
|
||||
/* Apply a memory domain policy to a set of thread page tables */
|
||||
void z_x86_apply_mem_domain(struct k_thread *thread,
|
||||
struct k_mem_domain *mem_domain);
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
/* Return cr3 value, which is the physical (not virtual) address of the
|
||||
* current set of page tables
|
||||
*/
|
||||
static inline uintptr_t z_x86_cr3_get(void)
|
||||
{
|
||||
uintptr_t cr3;
|
||||
#ifdef CONFIG_X86_64
|
||||
__asm__ volatile("movq %%cr3, %0\n\t" : "=r" (cr3));
|
||||
#else
|
||||
__asm__ volatile("movl %%cr3, %0\n\t" : "=r" (cr3));
|
||||
#endif
|
||||
return cr3;
|
||||
}
|
||||
|
||||
/* Return the virtual address of the page tables installed in this CPU in CR3 */
|
||||
static inline pentry_t *z_x86_page_tables_get(void)
|
||||
{
|
||||
return z_mem_virt_addr(z_x86_cr3_get());
|
||||
}
|
||||
|
||||
/* Kernel's page table. This is in CR3 for all supervisor threads.
|
||||
* if KPTI is enabled, we switch to this when handling exceptions or syscalls
|
||||
*/
|
||||
extern pentry_t z_x86_kernel_ptables;
|
||||
|
||||
/* Get the page tables used by this thread during normal execution */
|
||||
static inline pentry_t *z_x86_thread_page_tables_get(struct k_thread *thread)
|
||||
{
|
||||
#ifdef CONFIG_USERSPACE
|
||||
return z_mem_virt_addr(thread->arch.ptables);
|
||||
#else
|
||||
return &z_x86_kernel_ptables;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Early-boot paging setup tasks, called from prep_c */
|
||||
void z_x86_mmu_init(void);
|
||||
#endif /* ZEPHYR_ARCH_X86_INCLUDE_X86_MMU_H */
|
|
@ -51,9 +51,12 @@
|
|||
#define RAMABLE_REGION RAM
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MMU
|
||||
#define MMU_PAGE_SIZE KB(4)
|
||||
#define MMU_PAGE_ALIGN . = ALIGN(MMU_PAGE_SIZE);
|
||||
/* Used to align areas with separate memory permission characteristics
|
||||
* so that the page permissions can be set in the MMU. Without this,
|
||||
* the kernel is just one blob with the same RWX permissions on all RAM
|
||||
*/
|
||||
#ifdef CONFIG_SRAM_REGION_PERMISSIONS
|
||||
#define MMU_PAGE_ALIGN . = ALIGN(CONFIG_MMU_PAGE_SIZE);
|
||||
#else
|
||||
#define MMU_PAGE_ALIGN
|
||||
#endif
|
||||
|
@ -317,9 +320,11 @@ SECTIONS
|
|||
__data_rom_start = LOADADDR(_DATA_SECTION_NAME);
|
||||
|
||||
#include <linker/common-ram.ld>
|
||||
|
||||
#include <linker/kobject.ld>
|
||||
#include <linker/cplusplus-ram.ld>
|
||||
#include <arch/x86/pagetables.ld>
|
||||
|
||||
/* Must be last in RAM */
|
||||
#include <linker/kobject.ld>
|
||||
|
||||
MMU_PAGE_ALIGN
|
||||
__data_ram_end = .;
|
||||
|
|
|
@ -215,11 +215,16 @@ struct _thread_arch {
|
|||
uint8_t flags;
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* Pointer to page tables used by this thread. Supervisor threads
|
||||
* always use the kernel's page table, user thread use per-thread
|
||||
* tables stored in the stack object
|
||||
/* Physical address of the page tables used by this thread. Supervisor
|
||||
* threads always use the kernel's page table, user thread use
|
||||
* per-thread tables stored in the stack object.
|
||||
*/
|
||||
struct x86_page_tables *ptables;
|
||||
uintptr_t ptables;
|
||||
|
||||
/* Track available unused space in the stack object used for building
|
||||
* thread-specific page tables.
|
||||
*/
|
||||
uint8_t *mmu_pos;
|
||||
|
||||
/* Initial privilege mode stack pointer when doing a system call.
|
||||
* Un-set for supervisor threads.
|
||||
|
|
|
@ -9,9 +9,12 @@
|
|||
#define ROMABLE_REGION RAM
|
||||
#define RAMABLE_REGION RAM
|
||||
|
||||
#ifdef CONFIG_X86_MMU
|
||||
#define MMU_PAGE_SIZE KB(4)
|
||||
#define MMU_PAGE_ALIGN . = ALIGN(MMU_PAGE_SIZE);
|
||||
/* Used to align areas with separate memory permission characteristics
|
||||
* so that the page permissions can be set in the MMU. Without this,
|
||||
* the kernel is just one blob with the same RWX permissions on all RAM
|
||||
*/
|
||||
#ifdef CONFIG_SRAM_REGION_PERMISSIONS
|
||||
#define MMU_PAGE_ALIGN . = ALIGN(CONFIG_MMU_PAGE_SIZE);
|
||||
#else
|
||||
#define MMU_PAGE_ALIGN
|
||||
#endif
|
||||
|
@ -167,10 +170,11 @@ SECTIONS
|
|||
#include <snippets-ram-sections.ld>
|
||||
#include <linker/common-ram.ld>
|
||||
#include <linker/cplusplus-ram.ld>
|
||||
#include <arch/x86/pagetables.ld>
|
||||
|
||||
/* Must be last in RAM */
|
||||
#include <linker/kobject.ld>
|
||||
|
||||
. = ALIGN(8);
|
||||
MMU_PAGE_ALIGN
|
||||
_image_ram_end = .;
|
||||
_end = .;
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#ifndef _ASMLANGUAGE
|
||||
|
||||
#include <zephyr/types.h>
|
||||
#include <arch/x86/mmustructs.h>
|
||||
|
||||
/*
|
||||
* 64-bit Task State Segment. One defined per CPU.
|
||||
|
@ -114,11 +115,16 @@ struct _thread_arch {
|
|||
uint8_t flags;
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* Pointer to page tables used by this thread. Supervisor threads
|
||||
* always use the kernel's page table, user thread use per-thread
|
||||
* tables stored in the stack object
|
||||
/* Physical address to page tables used by this thread. Supervisor
|
||||
* threads always use the kernel's page table, user thread use
|
||||
* per-thread tables stored in the stack object
|
||||
*/
|
||||
struct x86_page_tables *ptables;
|
||||
uintptr_t ptables;
|
||||
|
||||
/* Track available unused space in the stack object used for building
|
||||
* thread-specific page tables.
|
||||
*/
|
||||
uint8_t *mmu_pos;
|
||||
|
||||
/* Initial privilege mode stack pointer when doing a system call.
|
||||
* Un-set for supervisor threads.
|
||||
|
|
|
@ -1,429 +1,161 @@
|
|||
/*
|
||||
* Copyright (c) 2011-2014 Wind River Systems, Inc.
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef ZEPHYR_INCLUDE_ARCH_X86_MMUSTRUCTS_H_
|
||||
#define ZEPHYR_INCLUDE_ARCH_X86_MMUSTRUCTS_H_
|
||||
#ifndef ZEPHYR_INCLUDE_ARCH_X86_MMU_H
|
||||
#define ZEPHYR_INCLUDE_ARCH_X86_MMU_H
|
||||
|
||||
#include <sys/util.h>
|
||||
|
||||
#define MMU_PAGE_SIZE 4096UL
|
||||
#define MMU_PAGE_MASK 0xfffU
|
||||
#define MMU_PAGE_SHIFT 12U
|
||||
#define PAGES(x) ((x) << (MMU_PAGE_SHIFT))
|
||||
#define MMU_ARE_IN_SAME_PAGE(a, b) \
|
||||
(((uint32_t)(a) & ~MMU_PAGE_MASK) == ((uint32_t)(b) & ~MMU_PAGE_MASK))
|
||||
#define MMU_IS_ON_PAGE_BOUNDARY(a) (!((uint32_t)(a) & MMU_PAGE_MASK))
|
||||
/* Macros for reserving space for page tables
|
||||
*
|
||||
* Z_X86_NUM_TABLE_PAGES. In order to produce a set of page tables which has
|
||||
* virtual mappings for all system RAM, Z_X86_NUM_TABLE_PAGES is the number of
|
||||
* memory pages required. If CONFIG_X86_PAE is enabled, an additional 0x20
|
||||
* bytes are required for the toplevel 4-entry PDPT.
|
||||
*
|
||||
* Z_X86_INITIAL_PAGETABLE_SIZE is the total amount of memory in bytes
|
||||
* required, for any paging mode.
|
||||
*
|
||||
* These macros are currently used for two purposes:
|
||||
* - Reserving memory in the stack for thread-level page tables (slated
|
||||
* for eventual removal when USERSPACE is reworked to fully utilize
|
||||
* virtual memory and page tables are maintained at the process level)
|
||||
* - Reserving room for dummy pagetable memory for the first link, so that
|
||||
* memory addresses are not disturbed by the insertion of the real page
|
||||
* tables created by gen_mmu.py in the second link phase.
|
||||
*/
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
#ifdef CONFIG_X86_64
|
||||
#define Z_X86_NUM_PML4_ENTRIES 512U
|
||||
#define Z_X86_NUM_PDPT_ENTRIES 512U
|
||||
#else
|
||||
#define Z_X86_NUM_PDPT_ENTRIES 4U
|
||||
#endif /* CONFIG_X86_64 */
|
||||
#define Z_X86_NUM_PD_ENTRIES 512U
|
||||
#define Z_X86_NUM_PT_ENTRIES 512U
|
||||
#else
|
||||
#define Z_X86_NUM_PD_ENTRIES 1024U
|
||||
#define Z_X86_NUM_PT_ENTRIES 1024U
|
||||
#endif /* !CONFIG_X86_64 && !CONFIG_X86_PAE */
|
||||
/* Memory range covered by an instance of various table types */
|
||||
#define Z_X86_PT_AREA ((uintptr_t)(CONFIG_MMU_PAGE_SIZE * \
|
||||
Z_X86_NUM_PT_ENTRIES))
|
||||
#define Z_X86_PD_AREA (Z_X86_PT_AREA * Z_X86_NUM_PD_ENTRIES)
|
||||
#ifdef CONFIG_X86_64
|
||||
#define Z_X86_PDPT_AREA (Z_X86_PD_AREA * Z_X86_NUM_PDPT_ENTRIES)
|
||||
#endif
|
||||
|
||||
#define PHYS_RAM_ADDR DT_REG_ADDR(DT_CHOSEN(zephyr_sram))
|
||||
#define PHYS_RAM_SIZE DT_REG_SIZE(DT_CHOSEN(zephyr_sram))
|
||||
|
||||
/* Define a range [Z_X86_PT_START, Z_X86_PT_END) which is the memory range
|
||||
* covered by all the page tables needed for system RAM
|
||||
*/
|
||||
#define Z_X86_PT_START ((uintptr_t)ROUND_DOWN(PHYS_RAM_ADDR, Z_X86_PT_AREA))
|
||||
#define Z_X86_PT_END ((uintptr_t)ROUND_UP(PHYS_RAM_ADDR + PHYS_RAM_SIZE, \
|
||||
Z_X86_PT_AREA))
|
||||
|
||||
/* Number of page tables needed to cover system RAM. Depends on the specific
|
||||
* bounds of system RAM, but roughly 1 page table per 2MB of RAM
|
||||
*/
|
||||
#define Z_X86_NUM_PT ((Z_X86_PT_END - Z_X86_PT_START) / Z_X86_PT_AREA)
|
||||
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
/* Same semantics as above, but for the page directories needed to cover
|
||||
* system RAM.
|
||||
*/
|
||||
#define Z_X86_PD_START ((uintptr_t)ROUND_DOWN(PHYS_RAM_ADDR, Z_X86_PD_AREA))
|
||||
#define Z_X86_PD_END ((uintptr_t)ROUND_UP(PHYS_RAM_ADDR + PHYS_RAM_SIZE, \
|
||||
Z_X86_PD_AREA))
|
||||
/* Number of page directories needed to cover system RAM. Depends on the
|
||||
* specific bounds of system RAM, but roughly 1 page directory per 1GB of RAM
|
||||
*/
|
||||
#define Z_X86_NUM_PD ((Z_X86_PD_END - Z_X86_PD_START) / Z_X86_PD_AREA)
|
||||
#else
|
||||
/* 32-bit page tables just have one toplevel page directory */
|
||||
#define Z_X86_NUM_PD 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Same semantics as above, but for the page directory pointer tables needed
|
||||
* to cover system RAM. On 32-bit there is just one 4-entry PDPT.
|
||||
*/
|
||||
#define Z_X86_PDPT_START ((uintptr_t)ROUND_DOWN(PHYS_RAM_ADDR, \
|
||||
Z_X86_PDPT_AREA))
|
||||
#define Z_X86_PDPT_END ((uintptr_t)ROUND_UP(PHYS_RAM_ADDR + PHYS_RAM_SIZE, \
|
||||
Z_X86_PDPT_AREA))
|
||||
/* Number of PDPTs needed to cover system RAM. Depends on the
|
||||
* specific bounds of system RAM, but roughly 1 PDPT per 512GB of RAM
|
||||
*/
|
||||
#define Z_X86_NUM_PDPT ((Z_X86_PDPT_END - Z_X86_PDPT_START) / Z_X86_PDPT_AREA)
|
||||
|
||||
/* All pages needed for page tables, using computed values plus one more for
|
||||
* the top-level PML4
|
||||
*/
|
||||
#define Z_X86_NUM_TABLE_PAGES (Z_X86_NUM_PT + Z_X86_NUM_PD + \
|
||||
Z_X86_NUM_PDPT + 1)
|
||||
#else /* !CONFIG_X86_64 */
|
||||
/* Number of pages we need to reserve in the stack for per-thread page tables */
|
||||
#define Z_X86_NUM_TABLE_PAGES (Z_X86_NUM_PT + Z_X86_NUM_PD)
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/* Toplevel PDPT wasn't included as it is not a page in size */
|
||||
#define Z_X86_INITIAL_PAGETABLE_SIZE ((Z_X86_NUM_TABLE_PAGES * \
|
||||
CONFIG_MMU_PAGE_SIZE) + 0x20)
|
||||
#else
|
||||
#define Z_X86_INITIAL_PAGETABLE_SIZE (Z_X86_NUM_TABLE_PAGES * \
|
||||
CONFIG_MMU_PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Common flags in the same bit position regardless of which structure level,
|
||||
* although not every flag is supported at every level, and some may be
|
||||
* ignored depending on the state of other bits (such as P or PS)
|
||||
* K_MEM_PARTITION_* defines
|
||||
*
|
||||
* These flags indicate bit position, and can be used for setting flags or
|
||||
* masks as needed.
|
||||
* Slated for removal when virtual memory is implemented, memory
|
||||
* mapping APIs will replace memory domains.
|
||||
*/
|
||||
|
||||
#define Z_X86_MMU_P BIT64(0) /** Present */
|
||||
#define Z_X86_MMU_RW BIT64(1) /** Read-Write */
|
||||
#define Z_X86_MMU_US BIT64(2) /** User-Supervisor */
|
||||
#define Z_X86_MMU_PWT BIT64(3) /** Page Write Through */
|
||||
#define Z_X86_MMU_PCD BIT64(4) /** Page Cache Disable */
|
||||
#define Z_X86_MMU_A BIT64(5) /** Accessed */
|
||||
#define Z_X86_MMU_D BIT64(6) /** Dirty */
|
||||
#define Z_X86_MMU_PS BIT64(7) /** Page Size */
|
||||
#define Z_X86_MMU_G BIT64(8) /** Global */
|
||||
#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
|
||||
#define Z_X86_MMU_XD BIT64(63) /** Execute Disable */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define Z_X86_MMU_PROT_KEY_MASK 0x7800000000000000ULL
|
||||
#else
|
||||
#define Z_X86_MMU_XD 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Structure-specific flags / masks
|
||||
/* Always true with 32-bit page tables, don't enable
|
||||
* CONFIG_EXECUTE_XOR_WRITE and expect it to work for you
|
||||
*/
|
||||
#define Z_X86_MMU_PDPTE_PAT BIT64(12)
|
||||
#define Z_X86_MMU_PDE_PAT BIT64(12)
|
||||
#define Z_X86_MMU_PTE_PAT BIT64(7) /** Page Attribute Table */
|
||||
|
||||
/* The true size of the mask depends on MAXADDR, which is found at run-time.
|
||||
* As a simplification, roll the area for the memory address, and the
|
||||
* reserved or ignored regions immediately above it, into a single area.
|
||||
* This will work as expected if valid memory addresses are written.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
#define Z_X86_MMU_PML4E_PDPT_MASK 0x7FFFFFFFFFFFF000ULL
|
||||
#endif
|
||||
#define Z_X86_MMU_PDPTE_PD_MASK 0x7FFFFFFFFFFFF000ULL
|
||||
#ifdef CONFIG_X86_64
|
||||
#define Z_X86_MMU_PDPTE_1G_MASK 0x07FFFFFFC0000000ULL
|
||||
#endif
|
||||
#define Z_X86_MMU_PDE_PT_MASK 0x7FFFFFFFFFFFF000ULL
|
||||
#define Z_X86_MMU_PDE_2MB_MASK 0x07FFFFFFFFC00000ULL
|
||||
#define Z_X86_MMU_PTE_ADDR_MASK 0x07FFFFFFFFFFF000ULL
|
||||
|
||||
/*
|
||||
* These flags indicate intention when setting access properties.
|
||||
*/
|
||||
|
||||
#define MMU_ENTRY_NOT_PRESENT 0ULL
|
||||
#define MMU_ENTRY_PRESENT Z_X86_MMU_P
|
||||
|
||||
#define MMU_ENTRY_READ 0ULL
|
||||
#define MMU_ENTRY_WRITE Z_X86_MMU_RW
|
||||
|
||||
#define MMU_ENTRY_SUPERVISOR 0ULL
|
||||
#define MMU_ENTRY_USER Z_X86_MMU_US
|
||||
|
||||
#define MMU_ENTRY_WRITE_BACK 0ULL
|
||||
#define MMU_ENTRY_WRITE_THROUGH Z_X86_MMU_PWT
|
||||
|
||||
#define MMU_ENTRY_CACHING_ENABLE 0ULL
|
||||
#define MMU_ENTRY_CACHING_DISABLE Z_X86_MMU_PCD
|
||||
|
||||
#define MMU_ENTRY_NOT_ACCESSED 0ULL
|
||||
#define MMU_ENTRY_ACCESSED Z_X86_MMU_A
|
||||
|
||||
#define MMU_ENTRY_NOT_DIRTY 0ULL
|
||||
#define MMU_ENTRY_DIRTY Z_X86_MMU_D
|
||||
|
||||
#define MMU_ENTRY_NOT_GLOBAL 0ULL
|
||||
#define MMU_ENTRY_GLOBAL Z_X86_MMU_G
|
||||
|
||||
#define MMU_ENTRY_EXECUTE_DISABLE Z_X86_MMU_XD
|
||||
#define MMU_ENTRY_EXECUTE_ENABLE 0ULL
|
||||
|
||||
/* memory partition arch/soc independent attribute */
|
||||
#define K_MEM_PARTITION_P_RW_U_RW (MMU_ENTRY_WRITE | \
|
||||
MMU_ENTRY_USER | \
|
||||
MMU_ENTRY_EXECUTE_DISABLE)
|
||||
|
||||
#define K_MEM_PARTITION_P_RW_U_NA (MMU_ENTRY_WRITE | \
|
||||
MMU_ENTRY_SUPERVISOR | \
|
||||
MMU_ENTRY_EXECUTE_DISABLE)
|
||||
|
||||
#define K_MEM_PARTITION_P_RO_U_RO (MMU_ENTRY_READ | \
|
||||
MMU_ENTRY_USER | \
|
||||
MMU_ENTRY_EXECUTE_DISABLE)
|
||||
|
||||
#define K_MEM_PARTITION_P_RO_U_NA (MMU_ENTRY_READ | \
|
||||
MMU_ENTRY_SUPERVISOR | \
|
||||
MMU_ENTRY_EXECUTE_DISABLE)
|
||||
|
||||
/* Execution-allowed attributes */
|
||||
#define K_MEM_PARTITION_P_RWX_U_RWX (MMU_ENTRY_WRITE | MMU_ENTRY_USER)
|
||||
|
||||
#define K_MEM_PARTITION_P_RWX_U_NA (MMU_ENTRY_WRITE | MMU_ENTRY_SUPERVISOR)
|
||||
|
||||
#define K_MEM_PARTITION_P_RX_U_RX (MMU_ENTRY_READ | MMU_ENTRY_USER)
|
||||
|
||||
#define K_MEM_PARTITION_P_RX_U_NA (MMU_ENTRY_READ | MMU_ENTRY_SUPERVISOR)
|
||||
|
||||
#define K_MEM_PARTITION_IS_EXECUTABLE(attr) (((attr) & Z_X86_MMU_XD) == 0)
|
||||
#define K_MEM_PARTITION_IS_WRITABLE(attr) (((attr) & Z_X86_MMU_RW) != 0)
|
||||
|
||||
/* memory partition arch/soc independent attribute */
|
||||
#define K_MEM_PARTITION_P_RW_U_RW (Z_X86_MMU_RW | Z_X86_MMU_US | \
|
||||
Z_X86_MMU_XD)
|
||||
#define K_MEM_PARTITION_P_RW_U_NA (Z_X86_MMU_RW | Z_X86_MMU_XD)
|
||||
#define K_MEM_PARTITION_P_RO_U_RO (Z_X86_MMU_US | Z_X86_MMU_XD)
|
||||
#define K_MEM_PARTITION_P_RO_U_NA Z_X86_MMU_XD
|
||||
/* Execution-allowed attributes */
|
||||
#define K_MEM_PARTITION_P_RWX_U_RWX (Z_X86_MMU_RW | Z_X86_MMU_US)
|
||||
#define K_MEM_PARTITION_P_RWX_U_NA Z_X96_MMU_RW
|
||||
#define K_MEM_PARTITION_P_RX_U_RX Z_X86_MMU_US
|
||||
#define K_MEM_PARTITION_P_RX_U_NA (0)
|
||||
/* memory partition access permission mask */
|
||||
#define K_MEM_PARTITION_PERM_MASK (Z_X86_MMU_RW | Z_X86_MMU_US | \
|
||||
Z_X86_MMU_XD)
|
||||
#define K_MEM_PARTITION_PERM_MASK (Z_X86_MMU_RW | Z_X86_MMU_US | \
|
||||
Z_X86_MMU_XD)
|
||||
|
||||
#ifndef _ASMLANGUAGE
|
||||
#include <sys/__assert.h>
|
||||
#include <zephyr/types.h>
|
||||
|
||||
/* Structure used by gen_mmu.py to create page directories and page tables.
|
||||
* In order to populate this structure use macro MMU_BOOT_REGION.
|
||||
/* Page table entry data type at all levels. Defined here due to
|
||||
* k_mem_partition_attr_t, eventually move to private x86_mmu.h
|
||||
*/
|
||||
struct mmu_region {
|
||||
uintptr_t address; /*Start address of the memory region */
|
||||
size_t size; /* Size of the memory region*/
|
||||
uint64_t flags; /* Permissions needed for this region*/
|
||||
};
|
||||
|
||||
/* permission_flags are calculated using the macros
|
||||
* region_size has to be provided in bytes
|
||||
* for read write access = MMU_ENTRY_READ/MMU_ENTRY_WRITE
|
||||
* for supervisor/user mode access = MMU_ENTRY_SUPERVISOR/MMU_ENTRY_USER
|
||||
*
|
||||
* Preprocessor indirection layers used to ensure __COUNTER__ is expanded
|
||||
* properly.
|
||||
*/
|
||||
|
||||
#define __MMU_BOOT_REGION(id, addr, region_size, permission_flags) \
|
||||
static const Z_STRUCT_SECTION_ITERABLE(mmu_region, region_##id) = \
|
||||
{ \
|
||||
.address = (uintptr_t)(addr), \
|
||||
.size = (size_t)(region_size), \
|
||||
.flags = (permission_flags), \
|
||||
}
|
||||
|
||||
#define Z_MMU_BOOT_REGION(id, addr, region_size, permission_flags) \
|
||||
__MMU_BOOT_REGION(id, addr, region_size, permission_flags)
|
||||
|
||||
#define MMU_BOOT_REGION(addr, region_size, permission_flags) \
|
||||
Z_MMU_BOOT_REGION(__COUNTER__, addr, region_size, permission_flags)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define Z_X86_NUM_PML4_ENTRIES 512U
|
||||
#define Z_X86_NUM_PDPT_ENTRIES 512U
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
typedef uint64_t pentry_t;
|
||||
#else
|
||||
#define Z_X86_NUM_PDPT_ENTRIES 4U
|
||||
typedef uint32_t pentry_t;
|
||||
#endif
|
||||
#define Z_X86_NUM_PD_ENTRIES 512U
|
||||
#define Z_X86_NUM_PT_ENTRIES 512U
|
||||
|
||||
/* Memory range covered by an instance of various table types */
|
||||
#define Z_X86_PT_AREA (MMU_PAGE_SIZE * Z_X86_NUM_PT_ENTRIES)
|
||||
#define Z_X86_PD_AREA (Z_X86_PT_AREA * Z_X86_NUM_PD_ENTRIES)
|
||||
#define Z_X86_PDPT_AREA (Z_X86_PD_AREA * Z_X86_NUM_PDPT_ENTRIES)
|
||||
|
||||
typedef uint64_t k_mem_partition_attr_t;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct x86_mmu_pml4 {
|
||||
uint64_t entry[Z_X86_NUM_PML4_ENTRIES];
|
||||
};
|
||||
#endif
|
||||
|
||||
struct x86_mmu_pdpt {
|
||||
uint64_t entry[Z_X86_NUM_PDPT_ENTRIES];
|
||||
};
|
||||
|
||||
struct x86_mmu_pd {
|
||||
uint64_t entry[Z_X86_NUM_PD_ENTRIES];
|
||||
};
|
||||
|
||||
struct x86_mmu_pt {
|
||||
uint64_t entry[Z_X86_NUM_PT_ENTRIES];
|
||||
};
|
||||
|
||||
struct x86_page_tables {
|
||||
#ifdef CONFIG_X86_64
|
||||
struct x86_mmu_pml4 pml4;
|
||||
#else
|
||||
struct x86_mmu_pdpt pdpt;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Inline functions for getting the next linked structure
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline uint64_t *z_x86_pml4_get_pml4e(struct x86_mmu_pml4 *pml4,
|
||||
uintptr_t addr)
|
||||
{
|
||||
int index = (addr >> 39U) & (Z_X86_NUM_PML4_ENTRIES - 1);
|
||||
|
||||
return &pml4->entry[index];
|
||||
}
|
||||
|
||||
static inline struct x86_mmu_pdpt *z_x86_pml4e_get_pdpt(uint64_t pml4e)
|
||||
{
|
||||
uintptr_t addr = pml4e & Z_X86_MMU_PML4E_PDPT_MASK;
|
||||
|
||||
return (struct x86_mmu_pdpt *)addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint64_t *z_x86_pdpt_get_pdpte(struct x86_mmu_pdpt *pdpt,
|
||||
uintptr_t addr)
|
||||
{
|
||||
int index = (addr >> 30U) & (Z_X86_NUM_PDPT_ENTRIES - 1);
|
||||
|
||||
return &pdpt->entry[index];
|
||||
}
|
||||
|
||||
static inline struct x86_mmu_pd *z_x86_pdpte_get_pd(uint64_t pdpte)
|
||||
{
|
||||
uintptr_t addr = pdpte & Z_X86_MMU_PDPTE_PD_MASK;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
__ASSERT((pdpte & Z_X86_MMU_PS) == 0, "PDPT is for 1GB page");
|
||||
#endif
|
||||
return (struct x86_mmu_pd *)addr;
|
||||
}
|
||||
|
||||
static inline uint64_t *z_x86_pd_get_pde(struct x86_mmu_pd *pd, uintptr_t addr)
|
||||
{
|
||||
int index = (addr >> 21U) & (Z_X86_NUM_PD_ENTRIES - 1);
|
||||
|
||||
return &pd->entry[index];
|
||||
}
|
||||
|
||||
static inline struct x86_mmu_pt *z_x86_pde_get_pt(uint64_t pde)
|
||||
{
|
||||
uintptr_t addr = pde & Z_X86_MMU_PDE_PT_MASK;
|
||||
|
||||
__ASSERT((pde & Z_X86_MMU_PS) == 0, "pde is for 2MB page");
|
||||
|
||||
return (struct x86_mmu_pt *)addr;
|
||||
}
|
||||
|
||||
static inline uint64_t *z_x86_pt_get_pte(struct x86_mmu_pt *pt, uintptr_t addr)
|
||||
{
|
||||
int index = (addr >> 12U) & (Z_X86_NUM_PT_ENTRIES - 1);
|
||||
|
||||
return &pt->entry[index];
|
||||
}
|
||||
|
||||
/*
|
||||
* Inline functions for obtaining page table structures from the top-level
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline struct x86_mmu_pml4 *
|
||||
z_x86_get_pml4(struct x86_page_tables *ptables)
|
||||
{
|
||||
return &ptables->pml4;
|
||||
}
|
||||
|
||||
static inline uint64_t *z_x86_get_pml4e(struct x86_page_tables *ptables,
|
||||
uintptr_t addr)
|
||||
{
|
||||
return z_x86_pml4_get_pml4e(z_x86_get_pml4(ptables), addr);
|
||||
}
|
||||
|
||||
static inline struct x86_mmu_pdpt *
|
||||
z_x86_get_pdpt(struct x86_page_tables *ptables, uintptr_t addr)
|
||||
{
|
||||
return z_x86_pml4e_get_pdpt(*z_x86_get_pml4e(ptables, addr));
|
||||
}
|
||||
#else
|
||||
static inline struct x86_mmu_pdpt *
|
||||
z_x86_get_pdpt(struct x86_page_tables *ptables, uintptr_t addr)
|
||||
{
|
||||
ARG_UNUSED(addr);
|
||||
|
||||
return &ptables->pdpt;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static inline uint64_t *z_x86_get_pdpte(struct x86_page_tables *ptables,
|
||||
uintptr_t addr)
|
||||
{
|
||||
return z_x86_pdpt_get_pdpte(z_x86_get_pdpt(ptables, addr), addr);
|
||||
}
|
||||
|
||||
static inline struct x86_mmu_pd *
|
||||
z_x86_get_pd(struct x86_page_tables *ptables, uintptr_t addr)
|
||||
{
|
||||
return z_x86_pdpte_get_pd(*z_x86_get_pdpte(ptables, addr));
|
||||
}
|
||||
|
||||
static inline uint64_t *z_x86_get_pde(struct x86_page_tables *ptables,
|
||||
uintptr_t addr)
|
||||
{
|
||||
return z_x86_pd_get_pde(z_x86_get_pd(ptables, addr), addr);
|
||||
}
|
||||
|
||||
static inline struct x86_mmu_pt *
|
||||
z_x86_get_pt(struct x86_page_tables *ptables, uintptr_t addr)
|
||||
{
|
||||
return z_x86_pde_get_pt(*z_x86_get_pde(ptables, addr));
|
||||
}
|
||||
|
||||
static inline uint64_t *z_x86_get_pte(struct x86_page_tables *ptables,
|
||||
uintptr_t addr)
|
||||
{
|
||||
return z_x86_pt_get_pte(z_x86_get_pt(ptables, addr), addr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump out page table entries for a particular memory address
|
||||
*
|
||||
* For the provided memory address, dump out the P, W, XD, US flags
|
||||
* at each paging level to the error log.
|
||||
*/
|
||||
void z_x86_dump_mmu_flags(struct x86_page_tables *ptables, uintptr_t addr);
|
||||
|
||||
/**
|
||||
* Debug function for dumping out page tables
|
||||
*
|
||||
* Iterates through the entire linked set of page table structures,
|
||||
* dumping out codes for the configuration of each table entry.
|
||||
*
|
||||
* Entry codes:
|
||||
*
|
||||
* . - not present
|
||||
* w - present, writable, not executable
|
||||
* a - present, writable, executable
|
||||
* r - present, read-only, not executable
|
||||
* x - present, read-only, executable
|
||||
*
|
||||
* Entry codes in uppercase indicate that user mode may access.
|
||||
*
|
||||
* @param ptables Top-level pointer to the page tables, as programmed in CR3
|
||||
*/
|
||||
void z_x86_dump_page_tables(struct x86_page_tables *ptables);
|
||||
|
||||
static inline struct x86_page_tables *z_x86_page_tables_get(void)
|
||||
{
|
||||
struct x86_page_tables *ret;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
__asm__ volatile("movq %%cr3, %0\n\t" : "=r" (ret));
|
||||
#else
|
||||
__asm__ volatile("movl %%cr3, %0\n\t" : "=r" (ret));
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Kernel's page table. Always active when threads are running in supervisor
|
||||
* mode, or handling an interrupt.
|
||||
*
|
||||
* If KPTI is not enabled, this is used as a template to create per-thread
|
||||
* page tables for when threads run in user mode.
|
||||
*/
|
||||
extern struct x86_page_tables z_x86_kernel_ptables;
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Separate page tables for user mode threads. This is never installed into the
|
||||
* CPU; instead it is used as a template for creating per-thread page tables.
|
||||
*/
|
||||
extern struct x86_page_tables z_x86_user_ptables;
|
||||
#define USER_PTABLES z_x86_user_ptables
|
||||
#else
|
||||
#define USER_PTABLES z_x86_kernel_ptables
|
||||
#endif
|
||||
/**
|
||||
* @brief Fetch page table flags for a particular page
|
||||
*
|
||||
* Given a memory address, return the flags for the containing page's
|
||||
* PDE and PTE entries. Intended for debugging.
|
||||
*
|
||||
* @param ptables Which set of page tables to use
|
||||
* @param addr Memory address to example
|
||||
* @param pde_flags Output parameter for page directory entry flags
|
||||
* @param pte_flags Output parameter for page table entry flags
|
||||
*/
|
||||
void z_x86_mmu_get_flags(struct x86_page_tables *ptables, void *addr,
|
||||
uint64_t *pde_flags, uint64_t *pte_flags);
|
||||
|
||||
/**
|
||||
* @brief set flags in the MMU page tables
|
||||
*
|
||||
* Modify bits in the existing page tables for a particular memory
|
||||
* range, which must be page-aligned
|
||||
*
|
||||
* @param ptables Which set of page tables to use
|
||||
* @param ptr Starting memory address which must be page-aligned
|
||||
* @param size Size of the region, must be page size multiple
|
||||
* @param flags Value of bits to set in the page table entries
|
||||
* @param mask Mask indicating which particular bits in the page table entries
|
||||
* to modify
|
||||
* @param flush Whether to flush the TLB for the modified pages, only needed
|
||||
* when modifying the active page tables
|
||||
*/
|
||||
void z_x86_mmu_set_flags(struct x86_page_tables *ptables, void *ptr,
|
||||
size_t size, uint64_t flags, uint64_t mask, bool flush);
|
||||
|
||||
int z_x86_mmu_validate(struct x86_page_tables *ptables, void *addr, size_t size,
|
||||
bool write);
|
||||
|
||||
void z_x86_add_mmu_region(uintptr_t addr, size_t size, uint64_t flags);
|
||||
|
||||
typedef pentry_t k_mem_partition_attr_t;
|
||||
#endif /* _ASMLANGUAGE */
|
||||
|
||||
#endif /* ZEPHYR_INCLUDE_ARCH_X86_MMUSTRUCTS_H_ */
|
||||
#endif /* ZEPHYR_INCLUDE_ARCH_X86_MMU_H */
|
||||
|
|
39
include/arch/x86/pagetables.ld
Normal file
39
include/arch/x86/pagetables.ld
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Intel Corp.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/* Pagetables. These are produced by arch/x86/gen-mmu.py based on
|
||||
* data in zephyr_prebuilt.elf (the result of linker pass 1).
|
||||
* For the pass 1 build, an equal-sized dummy area is provided as
|
||||
* to not shift memory addresses that occur after this.
|
||||
*/
|
||||
#ifdef CONFIG_MMU
|
||||
SECTION_DATA_PROLOGUE(pagetables,,)
|
||||
{
|
||||
. = ALIGN(4096);
|
||||
z_x86_pagetables_start = .;
|
||||
#ifdef LINKER_PASS2
|
||||
KEEP(*(pagetables)) /* gen_mmu.py */
|
||||
#else
|
||||
KEEP(*(.dummy_pagetables)) /* from x86_mmu.c, just an empty array */
|
||||
#endif /* LINKER_PASS2 */
|
||||
|
||||
/* Top-level paging structure is the last thing in this section */
|
||||
#if CONFIG_X86_PAE
|
||||
/* 4-entry PDPT */
|
||||
z_x86_kernel_ptables = . - 32;
|
||||
#else
|
||||
/* Page directory or PML4 */
|
||||
z_x86_kernel_ptables = . - 4096;
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
} GROUP_DATA_LINK_IN(RAMABLE_REGION, ROMABLE_REGION)
|
||||
|
||||
#ifdef LINKER_PASS2
|
||||
/DISCARD/ :
|
||||
{
|
||||
/* We have the real ones in this build */
|
||||
*(.dummy_pagetables)
|
||||
}
|
||||
#endif /* LINKER_PASS2 */
|
||||
#endif /* CONFIG_MMU */
|
|
@ -19,13 +19,16 @@
|
|||
* user mode. For each thread, we have:
|
||||
*
|
||||
* - On 32-bit
|
||||
* - a toplevel PD
|
||||
* - On 32-bit (PAE)
|
||||
* - a toplevel PDPT
|
||||
* - a set of PDs for the memory range covered by system RAM
|
||||
* - On 64-bit
|
||||
* - a toplevel PML4
|
||||
* - a set of PDPTs for the memory range covered by system RAM
|
||||
* - a set of PDs for the memory range covered by system RAM
|
||||
* - On all modes:
|
||||
* - a set of page directories for the memory range covered by system RAM
|
||||
* - a set of page tbales for the memory range covered by system RAM
|
||||
* - a set of PTs for the memory range covered by system RAM
|
||||
*
|
||||
* Directories and tables for memory ranges outside of system RAM will be
|
||||
* shared and not thread-specific.
|
||||
|
@ -44,73 +47,21 @@
|
|||
*
|
||||
* The PDPT is fairly small singleton on x86 PAE (32 bytes) and also must
|
||||
* be aligned to 32 bytes, so we place it at the highest addresses of the
|
||||
* page reserved for the privilege elevation stack. On 64-bit all table
|
||||
* entities up to and including the PML4 are page-sized.
|
||||
* page reserved for the privilege elevation stack. On 64-bit or legacy 32-bit
|
||||
* all table entities up to and including the PML4 are page-sized.
|
||||
*
|
||||
* The page directories and tables require page alignment so we put them as
|
||||
* additional fields in the stack object, using the below macros to compute how
|
||||
* many pages we need.
|
||||
*/
|
||||
|
||||
#define PHYS_RAM_ADDR DT_REG_ADDR(DT_CHOSEN(zephyr_sram))
|
||||
#define PHYS_RAM_SIZE DT_REG_SIZE(DT_CHOSEN(zephyr_sram))
|
||||
|
||||
/* Define a range [Z_X86_PT_START, Z_X86_PT_END) which is the memory range
|
||||
* covered by all the page tables needed for system RAM
|
||||
*/
|
||||
#define Z_X86_PT_START ((uintptr_t)ROUND_DOWN(PHYS_RAM_ADDR, Z_X86_PT_AREA))
|
||||
#define Z_X86_PT_END ((uintptr_t)ROUND_UP(PHYS_RAM_ADDR + PHYS_RAM_SIZE, \
|
||||
Z_X86_PT_AREA))
|
||||
|
||||
/* Number of page tables needed to cover system RAM. Depends on the specific
|
||||
* bounds of system RAM, but roughly 1 page table per 2MB of RAM
|
||||
*/
|
||||
#define Z_X86_NUM_PT ((Z_X86_PT_END - Z_X86_PT_START) / Z_X86_PT_AREA)
|
||||
|
||||
/* Same semantics as above, but for the page directories needed to cover
|
||||
* system RAM.
|
||||
*/
|
||||
#define Z_X86_PD_START ((uintptr_t)ROUND_DOWN(PHYS_RAM_ADDR, Z_X86_PD_AREA))
|
||||
#define Z_X86_PD_END ((uintptr_t)ROUND_UP(PHYS_RAM_ADDR + PHYS_RAM_SIZE, \
|
||||
Z_X86_PD_AREA))
|
||||
/* Number of page directories needed to cover system RAM. Depends on the
|
||||
* specific bounds of system RAM, but roughly 1 page directory per 1GB of RAM
|
||||
*/
|
||||
#define Z_X86_NUM_PD ((Z_X86_PD_END - Z_X86_PD_START) / Z_X86_PD_AREA)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Same semantics as above, but for the page directory pointer tables needed
|
||||
* to cover system RAM. On 32-bit there is just one 4-entry PDPT.
|
||||
*/
|
||||
#define Z_X86_PDPT_START ((uintptr_t)ROUND_DOWN(PHYS_RAM_ADDR, \
|
||||
Z_X86_PDPT_AREA))
|
||||
#define Z_X86_PDPT_END ((uintptr_t)ROUND_UP(PHYS_RAM_ADDR + PHYS_RAM_SIZE, \
|
||||
Z_X86_PDPT_AREA))
|
||||
/* Number of PDPTs needed to cover system RAM. Depends on the
|
||||
* specific bounds of system RAM, but roughly 1 PDPT per 512GB of RAM
|
||||
*/
|
||||
#define Z_X86_NUM_PDPT ((Z_X86_PDPT_END - Z_X86_PDPT_START) / Z_X86_PDPT_AREA)
|
||||
|
||||
/* All pages needed for page tables, using computed values plus one more for
|
||||
* the top-level PML4
|
||||
*/
|
||||
#define Z_X86_NUM_TABLE_PAGES (Z_X86_NUM_PT + Z_X86_NUM_PD + \
|
||||
Z_X86_NUM_PDPT + 1)
|
||||
#else /* !CONFIG_X86_64 */
|
||||
/* Number of pages we need to reserve in the stack for per-thread page tables */
|
||||
#define Z_X86_NUM_TABLE_PAGES (Z_X86_NUM_PT + Z_X86_NUM_PD)
|
||||
#endif /* CONFIG_X86_64 */
|
||||
#else /* !CONFIG_USERSPACE */
|
||||
/* If we're not implementing user mode, then the MMU tables don't get changed
|
||||
* on context switch and we don't need any per-thread page tables
|
||||
*/
|
||||
#define Z_X86_NUM_TABLE_PAGES 0UL
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
#define Z_X86_THREAD_PT_AREA (Z_X86_NUM_TABLE_PAGES * MMU_PAGE_SIZE)
|
||||
#define Z_X86_THREAD_PT_AREA (Z_X86_NUM_TABLE_PAGES * \
|
||||
(uintptr_t)CONFIG_MMU_PAGE_SIZE)
|
||||
#else
|
||||
#define Z_X86_THREAD_PT_AREA 0UL
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_HW_STACK_PROTECTION) || defined(CONFIG_USERSPACE)
|
||||
#define Z_X86_STACK_BASE_ALIGN MMU_PAGE_SIZE
|
||||
#define Z_X86_STACK_BASE_ALIGN CONFIG_MMU_PAGE_SIZE
|
||||
#else
|
||||
#define Z_X86_STACK_BASE_ALIGN ARCH_STACK_PTR_ALIGN
|
||||
#endif
|
||||
|
@ -120,7 +71,7 @@
|
|||
* the access control granularity and we don't want other kernel data to
|
||||
* unintentionally fall in the latter part of the page
|
||||
*/
|
||||
#define Z_X86_STACK_SIZE_ALIGN MMU_PAGE_SIZE
|
||||
#define Z_X86_STACK_SIZE_ALIGN CONFIG_MMU_PAGE_SIZE
|
||||
#else
|
||||
#define Z_X86_STACK_SIZE_ALIGN ARCH_STACK_PTR_ALIGN
|
||||
#endif
|
||||
|
@ -136,7 +87,7 @@ struct z_x86_kernel_stack_data {
|
|||
* are page-aligned and we just reserve room for them in
|
||||
* Z_X86_THREAD_PT_AREA.
|
||||
*/
|
||||
struct x86_page_tables ptables;
|
||||
uint8_t ptables[0x20];
|
||||
} __aligned(0x20);
|
||||
#endif /* !CONFIG_X86_64 */
|
||||
|
||||
|
@ -180,14 +131,14 @@ struct z_x86_thread_stack_header {
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_HW_STACK_PROTECTION
|
||||
char guard_page[MMU_PAGE_SIZE];
|
||||
char guard_page[CONFIG_MMU_PAGE_SIZE];
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
#ifdef CONFIG_X86_64
|
||||
char privilege_stack[MMU_PAGE_SIZE];
|
||||
char privilege_stack[CONFIG_MMU_PAGE_SIZE];
|
||||
#else
|
||||
char privilege_stack[MMU_PAGE_SIZE -
|
||||
char privilege_stack[CONFIG_MMU_PAGE_SIZE -
|
||||
sizeof(struct z_x86_kernel_stack_data)];
|
||||
|
||||
struct z_x86_kernel_stack_data kernel_data;
|
||||
|
@ -204,8 +155,8 @@ struct z_x86_thread_stack_header {
|
|||
sizeof(struct z_x86_thread_stack_header)
|
||||
|
||||
#ifdef CONFIG_HW_STACK_PROTECTION
|
||||
#define ARCH_KERNEL_STACK_RESERVED MMU_PAGE_SIZE
|
||||
#define ARCH_KERNEL_STACK_OBJ_ALIGN MMU_PAGE_SIZE
|
||||
#define ARCH_KERNEL_STACK_RESERVED CONFIG_MMU_PAGE_SIZE
|
||||
#define ARCH_KERNEL_STACK_OBJ_ALIGN CONFIG_MMU_PAGE_SIZE
|
||||
#else
|
||||
#define ARCH_KERNEL_STACK_RESERVED 0
|
||||
#define ARCH_KERNEL_STACK_OBJ_ALIGN ARCH_STACK_PTR_ALIGN
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue