x86: 32-bit: restore virtual linking capability

This reverts commit 7d32e9f9a5.

We now allow the kernel to be linked virtually. This patch:

- Properly converts between virtual/physical addresses
- Handles early boot instruction pointer transition
- Double-maps SRAM to both virtual and physical locations
  in boot page tables to facilitate instruction pointer
  transition, with logic to clean this up after completed.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
Signed-off-by: Daniel Leung <daniel.leung@intel.com>
This commit is contained in:
Andrew Boie 2021-03-06 18:14:28 -08:00 committed by Anas Nashif
commit 348d1315d2
6 changed files with 110 additions and 55 deletions

View file

@ -21,6 +21,7 @@
#include <arch/cpu.h>
#include <arch/x86/multiboot.h>
#include <x86_mmu.h>
#include <sys/mem_manage.h>
/* exports (private APIs) */
@ -43,6 +44,51 @@
GDATA(_sse_mxcsr_default_value)
#endif
.macro install_page_tables
#ifdef CONFIG_X86_MMU
/* Enable paging. If virtual memory is enabled, the instruction pointer
* is currently at a physical address. There is an identity mapping
* for all RAM, plus a virtual mapping of RAM starting at
* CONFIG_KERNEL_VM_BASE using the same paging structures.
*
* Until we enable these page tables, only physical memory addresses
* work.
*/
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
movl %eax, %cr3
#ifdef CONFIG_X86_PAE
/* Enable PAE */
movl %cr4, %eax
orl $CR4_PAE, %eax
movl %eax, %cr4
/* IA32_EFER NXE bit set */
movl $0xC0000080, %ecx
rdmsr
orl $0x800, %eax
wrmsr
#endif /* CONFIG_X86_PAE */
/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
movl %cr0, %eax
orl $(CR0_PG | CR0_WP), %eax
movl %eax, %cr0
#ifdef Z_VM_KERNEL
/* Jump to a virtual address, which works because the identity and
* virtual mappings both are to the same physical address.
*/
ljmp $CODE_SEG, $vm_enter
vm_enter:
/* We are now executing in virtual memory. We'll un-map the identity
* mappings later once we are in the C domain
*/
#endif /* Z_VM_KERNEL */
#endif /* CONFIG_X86_MMU */
.endm
SECTION_FUNC(TEXT_START, __start)
#include "../common.S"
@ -158,9 +204,6 @@ __csSet:
addl $CONFIG_ISR_STACK_SIZE, %esp
#endif
/* Clear BSS */
call z_bss_zero
#ifdef CONFIG_XIP
/* Copy data from flash to RAM.
*
@ -179,53 +222,18 @@ __csSet:
mov $MAIN_TSS, %ax
ltr %ax
#endif
/* load 32-bit operand size IDT */
lidt Z_MEM_PHYS_ADDR(z_x86_idt)
#ifdef CONFIG_X86_MMU
/* Install page tables */
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
movl %eax, %cr3
#ifdef CONFIG_X86_PAE
/* Enable PAE */
movl %cr4, %eax
orl $CR4_PAE, %eax
movl %eax, %cr4
/* IA32_EFER NXE bit set */
movl $0xC0000080, %ecx
rdmsr
orl $0x800, %eax
wrmsr
#endif /* CONFIG_X86_PAE */
/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
movl %cr0, %eax
orl $(CR0_PG | CR0_WP), %eax
movl %eax, %cr0
#endif /* CONFIG_X86_MMU */
#if (Z_MEM_PHYS_ADDR(0x1000) != 0x1000)
/*
* Page table loaded so we can start executing in
* virtual address space.
*
* Note that __prep_c and z_x86_prep_c() must be
* mapped in virtual address space in gen_mmu.py.
*
* This jump must be done due to relative
* addressing code emitted by the toolchain
* (think EIP + offset). If we are not already in
* virtual address space, everything afterwards
* would still be referenced via physical addresses
* and will crash if we have a kernel bigger than
* physical memory with demand paging (for example).
/* Note that installing page tables must be done after
* z_data_copy() as the page tables are being copied into
* RAM there.
*/
ljmp $CODE_SEG, $__prep_c
install_page_tables
__prep_c:
#endif
/* Clear BSS */
call z_bss_zero
/* load 32-bit operand size IDT */
lidt z_x86_idt
pushl %ebx /* pointer to multiboot info, or NULL */
call z_x86_prep_c /* enter kernel; never returns */
@ -250,8 +258,8 @@ z_x86_idt:
* setup by the BIOS (or GRUB?).
*/
/* physical start address */
.long Z_MEM_PHYS_ADDR(_idt_base_address)
/* IDT table start address */
.long _idt_base_address
#ifdef CONFIG_SET_GDT

View file

@ -42,7 +42,7 @@
/* Page tables created at build time by gen_mmu.py
* NOTE: Presumes phys=virt
*/
movl $z_x86_kernel_ptables, %eax
movl $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %eax
movl %eax, %cr3
set_efer
@ -64,7 +64,7 @@
clts
/* NOTE: Presumes phys=virt */
movq $z_x86_kernel_ptables, %rax
movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
movq %rax, %cr3
set_efer

View file

@ -87,7 +87,7 @@ z_x86_syscall_entry_stub:
pushq %rax
/* NOTE: Presumes phys=virt */
movq $z_x86_kernel_ptables, %rax
movq $Z_MEM_PHYS_ADDR(z_x86_kernel_ptables), %rax
movq %rax, %cr3
popq %rax
movq $0, -8(%rsp) /* Delete stashed RAX data */

View file

@ -40,6 +40,10 @@ FUNC_NORETURN void z_x86_prep_c(void *arg)
ARG_UNUSED(info);
#endif
#ifdef CONFIG_MMU
z_x86_mmu_init();
#endif
#if CONFIG_X86_STACK_PROTECTION
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
z_x86_set_stack_guard(z_interrupt_stacks[i]);

View file

@ -517,9 +517,12 @@ static void print_entries(pentry_t entries_array[], uint8_t *base, int level,
if (phys == virt) {
/* Identity mappings */
COLOR(YELLOW);
} else {
/* Other mappings */
} else if (phys + Z_MEM_VM_OFFSET == virt) {
/* Permanent RAM mappings */
COLOR(GREEN);
} else {
/* General mapped pages */
COLOR(CYAN);
}
} else {
/* Intermediate entry */
@ -580,7 +583,8 @@ static void dump_ptables(pentry_t *table, uint8_t *base, int level)
}
#endif
printk("%s at %p: ", info->name, table);
printk("%s at %p (0x%" PRIxPTR "): ", info->name, table,
z_mem_phys_addr(table));
if (level == 0) {
printk("entire address space\n");
} else {
@ -1111,6 +1115,42 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
MASK_ALL, 0);
}
static void identity_map_remove(void)
{
#ifdef Z_VM_KERNEL
size_t size, scope = get_entry_scope(0);
uint8_t *pos;
k_mem_region_align((uintptr_t *)&pos, &size,
(uintptr_t)CONFIG_SRAM_BASE_ADDRESS,
(size_t)CONFIG_SRAM_SIZE * 1024U, scope);
/* We booted with RAM mapped both to its identity and virtual
* mapping starting at CONFIG_KERNEL_VM_BASE. This was done by
* double-linking the relevant tables in the top-level table.
* At this point we don't need the identity mapping(s) any more,
* zero the top-level table entries corresponding to the
* physical mapping.
*/
while (size) {
pentry_t *entry = get_entry_ptr(z_x86_kernel_ptables, pos, 0);
/* set_pte */
*entry = 0;
pos += scope;
size -= scope;
}
#endif
}
/* Invoked to remove the identity mappings in the page tables,
* they were only needed to tranisition the instruction pointer at early boot
*/
void z_x86_mmu_init(void)
{
identity_map_remove();
}
#if CONFIG_X86_STACK_PROTECTION
void z_x86_set_stack_guard(k_thread_stack_t *stack)
{

View file

@ -229,5 +229,8 @@ void z_x86_tlb_ipi(const void *arg);
#ifdef CONFIG_X86_COMMON_PAGE_TABLE
void z_x86_swap_update_common_page_table(struct k_thread *incoming);
#endif
/* Early-boot paging setup tasks, called from prep_c */
void z_x86_mmu_init(void);
#endif /* _ASMLANGUAGE */
#endif /* ZEPHYR_ARCH_X86_INCLUDE_X86_MMU_H */