x86: ia32: mark symbols for boot and pinned regions

This marks code and data within x86/ia32 so they are going to
reside in boot and pinned regions. This is a step to enable
demand paging for whole kernel.

Signed-off-by: Daniel Leung <daniel.leung@intel.com>
This commit is contained in:
Daniel Leung 2021-02-25 16:42:53 -08:00 committed by Kumar Gala
commit 2c2d313cb9
13 changed files with 134 additions and 27 deletions

View file

@ -7,6 +7,7 @@
#include <tracing/tracing.h>
#include <arch/cpu.h>
__pinned_func
void arch_cpu_idle(void)
{
sys_trace_idle();
@ -15,6 +16,7 @@ void arch_cpu_idle(void)
"hlt\n\t");
}
__pinned_func
void arch_cpu_atomic_idle(unsigned int key)
{
sys_trace_idle();

View file

@ -52,6 +52,7 @@ static inline uintptr_t esf_get_code(const z_arch_esf_t *esf)
}
#ifdef CONFIG_THREAD_STACK_INFO
__pinned_func
bool z_x86_check_stack_bounds(uintptr_t addr, size_t size, uint16_t cs)
{
uintptr_t start, end;
@ -105,6 +106,7 @@ struct stack_frame {
#define MAX_STACK_FRAMES 8
__pinned_func
static void unwind_stack(uintptr_t base_ptr, uint16_t cs)
{
struct stack_frame *frame;
@ -174,6 +176,7 @@ static inline pentry_t *get_ptables(const z_arch_esf_t *esf)
}
#ifdef CONFIG_X86_64
__pinned_func
static void dump_regs(const z_arch_esf_t *esf)
{
LOG_ERR("RAX: 0x%016lx RBX: 0x%016lx RCX: 0x%016lx RDX: 0x%016lx",
@ -196,6 +199,7 @@ static void dump_regs(const z_arch_esf_t *esf)
#endif
}
#else /* 32-bit */
__pinned_func
static void dump_regs(const z_arch_esf_t *esf)
{
LOG_ERR("EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x",
@ -215,6 +219,7 @@ static void dump_regs(const z_arch_esf_t *esf)
}
#endif /* CONFIG_X86_64 */
__pinned_func
static void log_exception(uintptr_t vector, uintptr_t code)
{
switch (vector) {
@ -284,6 +289,7 @@ static void log_exception(uintptr_t vector, uintptr_t code)
}
}
__pinned_func
static void dump_page_fault(z_arch_esf_t *esf)
{
uintptr_t err;
@ -317,6 +323,7 @@ static void dump_page_fault(z_arch_esf_t *esf)
}
#endif /* CONFIG_EXCEPTION_DEBUG */
__pinned_func
FUNC_NORETURN void z_x86_fatal_error(unsigned int reason,
const z_arch_esf_t *esf)
{
@ -339,6 +346,7 @@ FUNC_NORETURN void z_x86_fatal_error(unsigned int reason,
CODE_UNREACHABLE;
}
__pinned_func
FUNC_NORETURN void z_x86_unhandled_cpu_exception(uintptr_t vector,
const z_arch_esf_t *esf)
{
@ -358,6 +366,7 @@ static const struct z_exc_handle exceptions[] = {
};
#endif
__pinned_func
void z_x86_page_fault_handler(z_arch_esf_t *esf)
{
#ifdef CONFIG_DEMAND_PAGING
@ -433,6 +442,7 @@ void z_x86_page_fault_handler(z_arch_esf_t *esf)
CODE_UNREACHABLE;
}
__pinned_func
void z_x86_do_kernel_oops(const z_arch_esf_t *esf)
{
uintptr_t reason;

View file

@ -97,7 +97,7 @@ vm_enter:
#endif /* CONFIG_X86_MMU */
.endm
SECTION_FUNC(TEXT, __start)
SECTION_FUNC(BOOT_TEXT, __start)
#include "../common.S"
@ -290,14 +290,6 @@ z_x86_idt:
#ifdef CONFIG_SET_GDT
/* GDT should be aligned on 8-byte boundary for best processor
* performance, see Section 3.5.1 of IA architecture SW developer
* manual, Vol 3.
*/
.balign 8
/*
* The following 3 GDT entries implement the so-called "basic
* flat model", i.e. a single code segment descriptor and a single
@ -314,10 +306,17 @@ z_x86_idt:
* and fails to set it.
*/
SECTION_VAR(PINNED_RODATA, _gdt_rom)
#ifndef CONFIG_GDT_DYNAMIC
_gdt:
#endif
_gdt_rom:
/* GDT should be aligned on 8-byte boundary for best processor
* performance, see Section 3.5.1 of IA architecture SW developer
* manual, Vol 3.
*/
.balign 8
/* Entry 0 (selector=0x0000): The "NULL descriptor". The CPU never
* actually looks at this entry, so we stuff 6-byte the pseudo

View file

@ -53,7 +53,7 @@
*
*/
SECTION_FUNC(TEXT, _exception_enter)
SECTION_FUNC(PINNED_TEXT, _exception_enter)
/*
* The gen_idt tool creates an interrupt-gate descriptor for
@ -234,7 +234,7 @@ nestedException:
/* Pop of EFLAGS will re-enable interrupts and restore direction flag */
KPTI_IRET
SECTION_FUNC(TEXT, _kernel_oops_handler)
SECTION_FUNC(PINNED_TEXT, _kernel_oops_handler)
push $0 /* dummy error code */
push $z_x86_do_kernel_oops
jmp _exception_enter

View file

@ -29,6 +29,7 @@ unsigned int z_x86_exception_vector;
__weak void z_debug_fatal_hook(const z_arch_esf_t *esf) { ARG_UNUSED(esf); }
__pinned_func
void z_x86_spurious_irq(const z_arch_esf_t *esf)
{
int vector = z_irq_controller_isr_vector_get();
@ -40,6 +41,7 @@ void z_x86_spurious_irq(const z_arch_esf_t *esf)
z_x86_fatal_error(K_ERR_SPURIOUS_IRQ, esf);
}
__pinned_func
void arch_syscall_oops(void *ssf)
{
struct _x86_syscall_stack_frame *ssf_ptr =
@ -62,6 +64,7 @@ NANO_CPU_INT_REGISTER(_kernel_oops_handler, NANO_SOFT_IRQ,
Z_X86_OOPS_VECTOR / 16, Z_X86_OOPS_VECTOR, 3);
#if CONFIG_EXCEPTION_DEBUG
__pinned_func
FUNC_NORETURN static void generic_exc_handle(unsigned int vector,
const z_arch_esf_t *pEsf)
{
@ -73,6 +76,7 @@ FUNC_NORETURN static void generic_exc_handle(unsigned int vector,
}
#define _EXC_FUNC(vector) \
__pinned_func \
FUNC_NORETURN __used static void handle_exc_##vector(const z_arch_esf_t *pEsf) \
{ \
generic_exc_handle(vector, pEsf); \
@ -116,13 +120,13 @@ EXC_FUNC_NOCODE(IV_MACHINE_CHECK, 0);
_EXCEPTION_CONNECT_CODE(z_x86_page_fault_handler, IV_PAGE_FAULT, 0);
#ifdef CONFIG_X86_ENABLE_TSS
static __noinit volatile z_arch_esf_t _df_esf;
static __pinned_noinit volatile z_arch_esf_t _df_esf;
/* Very tiny stack; just enough for the bogus error code pushed by the CPU
* and a frame pointer push by the compiler. All df_handler_top does is
* shuffle some data around with 'mov' statements and then 'iret'.
*/
static __noinit char _df_stack[8];
static __pinned_noinit char _df_stack[8];
static FUNC_NORETURN __used void df_handler_top(void);
@ -155,6 +159,7 @@ struct task_state_segment _df_tss = {
Z_MEM_PHYS_ADDR(POINTER_TO_UINT(&z_x86_kernel_ptables[0]))
};
__pinned_func
static __used void df_handler_bottom(void)
{
/* We're back in the main hardware task on the interrupt stack */
@ -173,6 +178,7 @@ static __used void df_handler_bottom(void)
z_x86_fatal_error(reason, (z_arch_esf_t *)&_df_esf);
}
__pinned_func
static FUNC_NORETURN __used void df_handler_top(void)
{
/* State of the system when the double-fault forced a task switch

View file

@ -62,7 +62,7 @@
*
* void _interrupt_enter(void *isr, void *isr_param);
*/
SECTION_FUNC(TEXT, _interrupt_enter)
SECTION_FUNC(PINNED_TEXT, _interrupt_enter)
/*
* Note that the processor has pushed both the EFLAGS register
* and the logical return address (cs:eip) onto the stack prior
@ -349,14 +349,14 @@ handle_idle:
* thus z_SpuriousIntNoErrCodeHandler()/z_SpuriousIntHandler() shall be
* invoked with interrupts disabled.
*/
SECTION_FUNC(TEXT, z_SpuriousIntNoErrCodeHandler)
SECTION_FUNC(PINNED_TEXT, z_SpuriousIntNoErrCodeHandler)
pushl $0 /* push dummy err code onto stk */
/* fall through to z_SpuriousIntHandler */
SECTION_FUNC(TEXT, z_SpuriousIntHandler)
SECTION_FUNC(PINNED_TEXT, z_SpuriousIntHandler)
cld /* Clear direction flag */
@ -384,7 +384,7 @@ SECTION_FUNC(TEXT, z_SpuriousIntHandler)
/* handler doesn't return */
#if CONFIG_IRQ_OFFLOAD
SECTION_FUNC(TEXT, _irq_sw_handler)
SECTION_FUNC(PINNED_TEXT, _irq_sw_handler)
push $0
push $z_irq_do_offload
jmp _interrupt_enter

View file

@ -38,6 +38,7 @@ void *__attribute__((section(".spurNoErrIsr")))
MK_ISR_NAME(z_SpuriousIntNoErrCodeHandler) =
&z_SpuriousIntNoErrCodeHandler;
__pinned_func
void arch_isr_direct_footer_swap(unsigned int key)
{
(void)z_swap_irqlock(key);
@ -67,7 +68,10 @@ struct dyn_irq_info {
* which is used by common_dynamic_handler() to fetch the appropriate
* information out of this much smaller table
*/
__pinned_bss
static struct dyn_irq_info dyn_irq_list[CONFIG_X86_DYNAMIC_IRQ_STUBS];
__pinned_bss
static unsigned int next_irq_stub;
/* Memory address pointing to where in ROM the code for the dynamic stubs are.
@ -167,6 +171,7 @@ static unsigned int priority_to_free_vector(unsigned int requested_priority)
* @param stub_idx Stub number to fetch the corresponding stub function
* @return Pointer to the stub code to install into the IDT
*/
__pinned_func
static void *get_dynamic_stub(int stub_idx)
{
uint32_t offset;
@ -233,6 +238,7 @@ int arch_irq_connect_dynamic(unsigned int irq, unsigned int priority,
*
* @param stub_idx Index into the dyn_irq_list array
*/
__pinned_func
void z_x86_dynamic_irq_handler(uint8_t stub_idx)
{
dyn_irq_list[stub_idx].handler(dyn_irq_list[stub_idx].param);

View file

@ -16,15 +16,20 @@ NANO_CPU_INT_REGISTER(_irq_sw_handler, NANO_SOFT_IRQ,
CONFIG_IRQ_OFFLOAD_VECTOR / 16,
CONFIG_IRQ_OFFLOAD_VECTOR, 0);
__pinned_bss
static irq_offload_routine_t offload_routine;
__pinned_bss
static const void *offload_param;
/* Called by asm stub */
__pinned_func
void z_irq_do_offload(void)
{
offload_routine(offload_param);
}
__pinned_func
void arch_irq_offload(irq_offload_routine_t routine, const void *parameter)
{
unsigned int key;

View file

@ -65,7 +65,7 @@
* unsigned int arch_swap (unsigned int eflags);
*/
SECTION_FUNC(TEXT, arch_swap)
SECTION_FUNC(PINNED_TEXT, arch_swap)
#if defined(CONFIG_INSTRUMENT_THREAD_SWITCHING)
pushl %eax
call z_thread_mark_switched_out
@ -400,7 +400,7 @@ CROHandlingDone:
* @return this routine does NOT return.
*/
SECTION_FUNC(TEXT, z_x86_thread_entry_wrapper)
SECTION_FUNC(PINNED_TEXT, z_x86_thread_entry_wrapper)
movl $0, (%esp)
jmp *%edi
#endif /* _THREAD_WRAPPER_REQUIRED */

View file

@ -39,7 +39,7 @@ GDATA(_k_syscall_table)
* 4 isr or exc handler
* 0 return address
*/
SECTION_FUNC(TEXT, z_x86_trampoline_to_kernel)
SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_kernel)
/* Check interrupted code segment to see if we came from ring 3
* and hence on the trampoline stack
*/
@ -99,7 +99,7 @@ SECTION_FUNC(TEXT, z_x86_trampoline_to_kernel)
*
* This function is conditionally macroed to KPTI_IRET/KPTI_IRET_USER
*/
SECTION_FUNC(TEXT, z_x86_trampoline_to_user)
SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user)
/* Check interrupted code segment to see if we came from ring 3
* and hence on the trampoline stack
*/
@ -108,7 +108,7 @@ SECTION_FUNC(TEXT, z_x86_trampoline_to_user)
/* Otherwise, fall through ... */
SECTION_FUNC(TEXT, z_x86_trampoline_to_user_always)
SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user_always)
/* Stash EDI, need a free register */
pushl %edi

View file

@ -16,6 +16,7 @@ extern void x86_64_irq_init(void);
/* Early global initialization functions, C domain. This runs only on the first
* CPU for SMP systems.
*/
__boot_func
FUNC_NORETURN void z_x86_prep_c(void *arg)
{
struct multiboot_info *info = arg;

View file

@ -27,6 +27,7 @@
* code has to manually transition off of it to the appropriate stack after
* switching page tables.
*/
__pinned_func
void z_x86_swap_update_page_tables(struct k_thread *incoming)
{
#ifndef CONFIG_X86_64

View file

@ -56,6 +56,7 @@ LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
/* Protects x86_domain_list and serializes instantiation of intermediate
* paging structures.
*/
__pinned_bss
static struct k_spinlock x86_mmu_lock;
#if defined(CONFIG_USERSPACE) && !defined(CONFIG_X86_COMMON_PAGE_TABLE)
@ -63,6 +64,7 @@ static struct k_spinlock x86_mmu_lock;
* sure all memory mappings are the same across all page tables when invoking
* range_map()
*/
__pinned_bss
static sys_slist_t x86_domain_list;
#endif
@ -112,6 +114,7 @@ struct paging_level {
*
* See Figures 4-4, 4-7, 4-11 in the Intel SDM, vol 3A
*/
__pinned_rodata
static const struct paging_level paging_levels[] = {
#ifdef CONFIG_X86_64
/* Page Map Level 4 */
@ -279,41 +282,48 @@ static __used char dummy_pagetables[INITIAL_PTABLE_SIZE];
/* For a table at a particular level, get the entry index that corresponds to
* the provided virtual address
*/
__pinned_func
static inline int get_index(void *virt, int level)
{
return (((uintptr_t)virt >> paging_levels[level].shift) %
paging_levels[level].entries);
}
__pinned_func
static inline pentry_t *get_entry_ptr(pentry_t *ptables, void *virt, int level)
{
return &ptables[get_index(virt, level)];
}
__pinned_func
static inline pentry_t get_entry(pentry_t *ptables, void *virt, int level)
{
return ptables[get_index(virt, level)];
}
/* Get the physical memory address associated with this table entry */
__pinned_func
static inline uintptr_t get_entry_phys(pentry_t entry, int level)
{
return entry & paging_levels[level].mask;
}
/* Return the virtual address of a linked table stored in the provided entry */
__pinned_func
static inline pentry_t *next_table(pentry_t entry, int level)
{
return z_mem_virt_addr(get_entry_phys(entry, level));
}
/* Number of table entries at this level */
__pinned_func
static inline size_t get_num_entries(int level)
{
return paging_levels[level].entries;
}
/* 4K for everything except PAE PDPTs */
__pinned_func
static inline size_t table_size(int level)
{
return get_num_entries(level) * sizeof(pentry_t);
@ -322,6 +332,7 @@ static inline size_t table_size(int level)
/* For a table at a particular level, size of the amount of virtual memory
* that an entry within the table covers
*/
__pinned_func
static inline size_t get_entry_scope(int level)
{
return (1UL << paging_levels[level].shift);
@ -330,6 +341,7 @@ static inline size_t get_entry_scope(int level)
/* For a table at a particular level, size of the amount of virtual memory
* that this entire table covers
*/
__pinned_func
static inline size_t get_table_scope(int level)
{
return get_entry_scope(level) * get_num_entries(level);
@ -338,6 +350,7 @@ static inline size_t get_table_scope(int level)
/* Must have checked Present bit first! Non-present entries may have OS data
* stored in any other bits
*/
__pinned_func
static inline bool is_leaf(int level, pentry_t entry)
{
if (level == PTE_LEVEL) {
@ -349,6 +362,7 @@ static inline bool is_leaf(int level, pentry_t entry)
}
/* This does NOT (by design) un-flip KPTI PTEs, it's just the raw PTE value */
__pinned_func
static inline void pentry_get(int *paging_level, pentry_t *val,
pentry_t *ptables, void *virt)
{
@ -369,6 +383,7 @@ static inline void pentry_get(int *paging_level, pentry_t *val,
}
}
__pinned_func
static inline void tlb_flush_page(void *addr)
{
/* Invalidate TLB entries corresponding to the page containing the
@ -380,6 +395,7 @@ static inline void tlb_flush_page(void *addr)
}
#ifdef CONFIG_X86_KPTI
__pinned_func
static inline bool is_flipped_pte(pentry_t pte)
{
return (pte & MMU_P) == 0 && (pte & PTE_ZERO) != 0;
@ -387,6 +403,7 @@ static inline bool is_flipped_pte(pentry_t pte)
#endif
#if defined(CONFIG_SMP)
__pinned_func
void z_x86_tlb_ipi(const void *arg)
{
uintptr_t ptables_phys;
@ -419,12 +436,14 @@ void z_x86_tlb_ipi(const void *arg)
/* NOTE: This is not synchronous and the actual flush takes place some short
* time after this exits.
*/
__pinned_func
static inline void tlb_shootdown(void)
{
z_loapic_ipi(0, LOAPIC_ICR_IPI_OTHERS, CONFIG_TLB_IPI_VECTOR);
}
#endif /* CONFIG_SMP */
__pinned_func
static inline void assert_addr_aligned(uintptr_t addr)
{
#if __ASSERT_ON
@ -433,11 +452,13 @@ static inline void assert_addr_aligned(uintptr_t addr)
#endif
}
__pinned_func
static inline void assert_virt_addr_aligned(void *addr)
{
assert_addr_aligned((uintptr_t)addr);
}
__pinned_func
static inline void assert_region_page_aligned(void *addr, size_t size)
{
assert_virt_addr_aligned(addr);
@ -470,6 +491,7 @@ static inline void assert_region_page_aligned(void *addr, size_t size)
#define COLOR(x) do { } while (0)
#endif
__pinned_func
static char get_entry_code(pentry_t value)
{
char ret;
@ -506,6 +528,7 @@ static char get_entry_code(pentry_t value)
return ret;
}
__pinned_func
static void print_entries(pentry_t entries_array[], uint8_t *base, int level,
size_t count)
{
@ -578,6 +601,7 @@ static void print_entries(pentry_t entries_array[], uint8_t *base, int level,
}
}
__pinned_func
static void dump_ptables(pentry_t *table, uint8_t *base, int level)
{
const struct paging_level *info = &paging_levels[level];
@ -622,6 +646,7 @@ static void dump_ptables(pentry_t *table, uint8_t *base, int level)
}
}
__pinned_func
void z_x86_dump_page_tables(pentry_t *ptables)
{
dump_ptables(ptables, NULL, 0);
@ -633,6 +658,7 @@ void z_x86_dump_page_tables(pentry_t *ptables)
#define DUMP_PAGE_TABLES 0
#if DUMP_PAGE_TABLES
__pinned_func
static int dump_kernel_tables(const struct device *unused)
{
z_x86_dump_page_tables(z_x86_kernel_ptables);
@ -643,6 +669,7 @@ static int dump_kernel_tables(const struct device *unused)
SYS_INIT(dump_kernel_tables, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT);
#endif
__pinned_func
static void str_append(char **buf, size_t *size, const char *str)
{
int ret = snprintk(*buf, *size, "%s", str);
@ -657,6 +684,7 @@ static void str_append(char **buf, size_t *size, const char *str)
}
__pinned_func
static void dump_entry(int level, void *virt, pentry_t entry)
{
const struct paging_level *info = &paging_levels[level];
@ -686,6 +714,7 @@ static void dump_entry(int level, void *virt, pentry_t entry)
#undef DUMP_BIT
}
__pinned_func
void z_x86_pentry_get(int *paging_level, pentry_t *val, pentry_t *ptables,
void *virt)
{
@ -696,6 +725,7 @@ void z_x86_pentry_get(int *paging_level, pentry_t *val, pentry_t *ptables,
* Debug function for dumping out MMU table information to the LOG for a
* specific virtual address, such as when we get an unexpected page fault.
*/
__pinned_func
void z_x86_dump_mmu_flags(pentry_t *ptables, void *virt)
{
pentry_t entry = 0;
@ -712,6 +742,7 @@ void z_x86_dump_mmu_flags(pentry_t *ptables, void *virt)
#endif /* CONFIG_EXCEPTION_DEBUG */
/* Reset permissions on a PTE to original state when the mapping was made */
__pinned_func
static inline pentry_t reset_pte(pentry_t old_val)
{
pentry_t new_val;
@ -742,6 +773,7 @@ static inline pentry_t reset_pte(pentry_t old_val)
* - Flipping the physical address bits cheaply mitigates L1TF
* - State is preserved; to get original PTE, just complement again
*/
__pinned_func
static inline pentry_t pte_finalize_value(pentry_t val, bool user_table,
int level)
{
@ -763,11 +795,13 @@ static inline pentry_t pte_finalize_value(pentry_t val, bool user_table,
*/
#ifndef CONFIG_X86_PAE
/* Non-PAE, pentry_t is same size as void ptr so use atomic_ptr_* APIs */
__pinned_func
static inline pentry_t atomic_pte_get(const pentry_t *target)
{
return (pentry_t)atomic_ptr_get((atomic_ptr_t *)target);
}
__pinned_func
static inline bool atomic_pte_cas(pentry_t *target, pentry_t old_value,
pentry_t new_value)
{
@ -781,11 +815,13 @@ static inline bool atomic_pte_cas(pentry_t *target, pentry_t old_value,
*/
BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
__pinned_func
static inline pentry_t atomic_pte_get(const pentry_t *target)
{
return *target;
}
__pinned_func
static inline bool atomic_pte_cas(pentry_t *target, pentry_t old_value,
pentry_t new_value)
{
@ -840,6 +876,7 @@ static inline bool atomic_pte_cas(pentry_t *target, pentry_t old_value,
* @param options Control flags
* @retval Old PTE value
*/
__pinned_func
static inline pentry_t pte_atomic_update(pentry_t *pte, pentry_t update_val,
pentry_t update_mask,
uint32_t options)
@ -910,6 +947,7 @@ static inline pentry_t pte_atomic_update(pentry_t *pte, pentry_t update_val,
* OPTION_CLEAR)
* @param options Control options, described above
*/
__pinned_func
static void page_map_set(pentry_t *ptables, void *virt, pentry_t entry_val,
pentry_t *old_val_ptr, pentry_t mask, uint32_t options)
{
@ -975,6 +1013,7 @@ static void page_map_set(pentry_t *ptables, void *virt, pentry_t entry_val,
* modified. Ignored if OPTION_RESET or OPTION_CLEAR.
* @param options Control options, described above
*/
__pinned_func
static void range_map_ptables(pentry_t *ptables, void *virt, uintptr_t phys,
size_t size, pentry_t entry_flags, pentry_t mask,
uint32_t options)
@ -1029,6 +1068,7 @@ static void range_map_ptables(pentry_t *ptables, void *virt, uintptr_t phys,
* @param options Control options. Do not set OPTION_USER here. OPTION_FLUSH
* will trigger a TLB shootdown after all tables are updated.
*/
__pinned_func
static void range_map(void *virt, uintptr_t phys, size_t size,
pentry_t entry_flags, pentry_t mask, uint32_t options)
{
@ -1076,6 +1116,7 @@ static void range_map(void *virt, uintptr_t phys, size_t size,
#endif /* CONFIG_SMP */
}
__pinned_func
static inline void range_map_unlocked(void *virt, uintptr_t phys, size_t size,
pentry_t entry_flags, pentry_t mask,
uint32_t options)
@ -1087,6 +1128,7 @@ static inline void range_map_unlocked(void *virt, uintptr_t phys, size_t size,
k_spin_unlock(&x86_mmu_lock, key);
}
__pinned_func
static pentry_t flags_to_entry(uint32_t flags)
{
pentry_t entry_flags = MMU_P;
@ -1126,6 +1168,7 @@ static pentry_t flags_to_entry(uint32_t flags)
}
/* map new region virt..virt+size to phys with provided arch-neutral flags */
__pinned_func
void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
{
range_map_unlocked(virt, phys, size, flags_to_entry(flags),
@ -1140,6 +1183,7 @@ void arch_mem_unmap(void *addr, size_t size)
}
#ifdef Z_VM_KERNEL
__boot_func
static void identity_map_remove(uint32_t level)
{
size_t size, scope = get_entry_scope(level);
@ -1174,6 +1218,7 @@ static void identity_map_remove(uint32_t level)
/* Invoked to remove the identity mappings in the page tables,
* they were only needed to tranisition the instruction pointer at early boot
*/
__boot_func
void z_x86_mmu_init(void)
{
#ifdef Z_VM_KERNEL
@ -1193,6 +1238,7 @@ void z_x86_mmu_init(void)
}
#if CONFIG_X86_STACK_PROTECTION
__boot_func
void z_x86_set_stack_guard(k_thread_stack_t *stack)
{
/* Applied to all page tables as this affects supervisor mode.
@ -1209,6 +1255,7 @@ void z_x86_set_stack_guard(k_thread_stack_t *stack)
#endif /* CONFIG_X86_STACK_PROTECTION */
#ifdef CONFIG_USERSPACE
__pinned_func
static bool page_validate(pentry_t *ptables, uint8_t *addr, bool write)
{
pentry_t *table = (pentry_t *)ptables;
@ -1247,6 +1294,7 @@ static bool page_validate(pentry_t *ptables, uint8_t *addr, bool write)
return true;
}
__pinned_func
static inline void bcb_fence(void)
{
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
@ -1254,6 +1302,7 @@ static inline void bcb_fence(void)
#endif
}
__pinned_func
int arch_buffer_validate(void *addr, size_t size, int write)
{
pentry_t *ptables = z_x86_thread_page_tables_get(_current);
@ -1304,12 +1353,14 @@ int arch_buffer_validate(void *addr, size_t size, int write)
* update permissions similarly on the thread stack region.
*/
__pinned_func
static inline void reset_region(uintptr_t start, size_t size)
{
range_map_unlocked((void *)start, 0, size, 0, 0,
OPTION_FLUSH | OPTION_RESET);
}
__pinned_func
static inline void apply_region(uintptr_t start, size_t size, pentry_t attr)
{
range_map_unlocked((void *)start, 0, size, attr, MASK_PERM,
@ -1319,10 +1370,11 @@ static inline void apply_region(uintptr_t start, size_t size, pentry_t attr)
/* Cache of the current memory domain applied to the common page tables and
* the stack buffer region that had User access granted.
*/
static struct k_mem_domain *current_domain;
static uintptr_t current_stack_start;
static size_t current_stack_size;
static __pinned_bss struct k_mem_domain *current_domain;
static __pinned_bss uintptr_t current_stack_start;
static __pinned_bss size_t current_stack_size;
__pinned_func
void z_x86_swap_update_common_page_table(struct k_thread *incoming)
{
k_spinlock_key_t key;
@ -1395,6 +1447,7 @@ out_unlock:
/* If a partition was added or removed in the cached domain, update the
* page tables.
*/
__pinned_func
void arch_mem_domain_partition_remove(struct k_mem_domain *domain,
uint32_t partition_id)
{
@ -1408,6 +1461,7 @@ void arch_mem_domain_partition_remove(struct k_mem_domain *domain,
reset_region(ptn->start, ptn->size);
}
__pinned_func
void arch_mem_domain_partition_add(struct k_mem_domain *domain,
uint32_t partition_id)
{
@ -1422,11 +1476,13 @@ void arch_mem_domain_partition_add(struct k_mem_domain *domain,
}
/* Rest of the APIs don't need to do anything */
__pinned_func
void arch_mem_domain_thread_add(struct k_thread *thread)
{
}
__pinned_func
void arch_mem_domain_thread_remove(struct k_thread *thread)
{
@ -1439,15 +1495,17 @@ void arch_mem_domain_thread_remove(struct k_thread *thread)
*/
#define PTABLE_COPY_SIZE (INITIAL_PTABLE_PAGES * CONFIG_MMU_PAGE_SIZE)
static uint8_t __noinit
static uint8_t __pinned_noinit
page_pool[PTABLE_COPY_SIZE * CONFIG_X86_MAX_ADDITIONAL_MEM_DOMAINS]
__aligned(CONFIG_MMU_PAGE_SIZE);
__pinned_data
static uint8_t *page_pos = page_pool + sizeof(page_pool);
/* Return a zeroed and suitably aligned memory page for page table data
* from the global page pool
*/
__pinned_func
static void *page_pool_get(void)
{
void *ret;
@ -1467,6 +1525,7 @@ static void *page_pool_get(void)
}
/* Debugging function to show how many pages are free in the pool */
__pinned_func
static inline unsigned int pages_free(void)
{
return (page_pos - page_pool) / CONFIG_MMU_PAGE_SIZE;
@ -1488,6 +1547,7 @@ static inline unsigned int pages_free(void)
* @retval 0 Success
* @retval -ENOMEM Insufficient page pool memory
*/
__pinned_func
static int copy_page_table(pentry_t *dst, pentry_t *src, int level)
{
if (level == PTE_LEVEL) {
@ -1544,6 +1604,7 @@ static int copy_page_table(pentry_t *dst, pentry_t *src, int level)
return 0;
}
__pinned_func
static void region_map_update(pentry_t *ptables, void *start,
size_t size, pentry_t flags, bool reset)
{
@ -1567,12 +1628,14 @@ static void region_map_update(pentry_t *ptables, void *start,
#endif
}
__pinned_func
static inline void reset_region(pentry_t *ptables, void *start, size_t size)
{
LOG_DBG("%s(%p, %p, %zu)", __func__, ptables, start, size);
region_map_update(ptables, start, size, 0, true);
}
__pinned_func
static inline void apply_region(pentry_t *ptables, void *start,
size_t size, pentry_t attr)
{
@ -1581,6 +1644,7 @@ static inline void apply_region(pentry_t *ptables, void *start,
region_map_update(ptables, start, size, attr, false);
}
__pinned_func
static void set_stack_perms(struct k_thread *thread, pentry_t *ptables)
{
LOG_DBG("update stack for thread %p's ptables at %p: %p (size %zu)",
@ -1595,6 +1659,7 @@ static void set_stack_perms(struct k_thread *thread, pentry_t *ptables)
* Arch interface implementations for memory domains and userspace
*/
__boot_func
int arch_mem_domain_init(struct k_mem_domain *domain)
{
int ret;
@ -1690,6 +1755,7 @@ void arch_mem_domain_thread_remove(struct k_thread *thread)
thread->stack_info.size);
}
__pinned_func
void arch_mem_domain_partition_add(struct k_mem_domain *domain,
uint32_t partition_id)
{
@ -1701,6 +1767,7 @@ void arch_mem_domain_partition_add(struct k_mem_domain *domain,
}
/* Invoked from memory domain API calls, as well as during thread creation */
__pinned_func
void arch_mem_domain_thread_add(struct k_thread *thread)
{
/* New memory domain we are being added to */
@ -1754,12 +1821,14 @@ void arch_mem_domain_thread_add(struct k_thread *thread)
}
#endif /* !CONFIG_X86_COMMON_PAGE_TABLE */
__pinned_func
int arch_mem_domain_max_partitions_get(void)
{
return CONFIG_MAX_DOMAIN_PARTITIONS;
}
/* Invoked from z_x86_userspace_enter */
__pinned_func
void z_x86_current_stack_perms(void)
{
/* Clear any previous context in the stack buffer to prevent
@ -1788,6 +1857,7 @@ void z_x86_current_stack_perms(void)
#endif /* CONFIG_USERSPACE */
#ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
__boot_func
static void mark_addr_page_reserved(uintptr_t addr, size_t len)
{
uintptr_t pos = ROUND_DOWN(addr, CONFIG_MMU_PAGE_SIZE);
@ -1804,6 +1874,7 @@ static void mark_addr_page_reserved(uintptr_t addr, size_t len)
}
}
__boot_func
void arch_reserved_pages_update(void)
{
#ifdef CONFIG_X86_PC_COMPATIBLE
@ -1870,6 +1941,7 @@ int arch_page_phys_get(void *virt, uintptr_t *phys)
#ifdef CONFIG_DEMAND_PAGING
#define PTE_MASK (paging_levels[PTE_LEVEL].mask)
__pinned_func
void arch_mem_page_out(void *addr, uintptr_t location)
{
pentry_t mask = PTE_MASK | MMU_P | MMU_A;
@ -1881,6 +1953,7 @@ void arch_mem_page_out(void *addr, uintptr_t location)
OPTION_FLUSH);
}
__pinned_func
void arch_mem_page_in(void *addr, uintptr_t phys)
{
pentry_t mask = PTE_MASK | MMU_P | MMU_D | MMU_A;
@ -1889,6 +1962,7 @@ void arch_mem_page_in(void *addr, uintptr_t phys)
OPTION_FLUSH);
}
__pinned_func
void arch_mem_scratch(uintptr_t phys)
{
page_map_set(z_x86_page_tables_get(), Z_SCRATCH_PAGE,
@ -1896,6 +1970,7 @@ void arch_mem_scratch(uintptr_t phys)
OPTION_FLUSH);
}
__pinned_func
uintptr_t arch_page_info_get(void *addr, uintptr_t *phys, bool clear_accessed)
{
pentry_t all_pte, mask;
@ -1971,6 +2046,7 @@ uintptr_t arch_page_info_get(void *addr, uintptr_t *phys, bool clear_accessed)
return (uintptr_t)all_pte;
}
__pinned_func
enum arch_page_location arch_page_location_get(void *addr, uintptr_t *location)
{
pentry_t pte;
@ -1997,6 +2073,7 @@ enum arch_page_location arch_page_location_get(void *addr, uintptr_t *location)
}
#ifdef CONFIG_X86_KPTI
__pinned_func
bool z_x86_kpti_is_access_ok(void *addr, pentry_t *ptables)
{
pentry_t pte;