diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4fbedd13ff2..4f363b23ba8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -370,6 +370,7 @@ config X86_MAX_ADDITIONAL_MEM_DOMAINS config X86_EXTRA_PAGE_TABLE_PAGES int "Reserve extra pages in page table" + default 1 if X86_PAE && (KERNEL_VM_BASE != SRAM_BASE_ADDRESS) default 0 depends on X86_MMU help diff --git a/arch/x86/core/ia32/crt0.S b/arch/x86/core/ia32/crt0.S index 5a173dc07fc..1c9afd3318d 100644 --- a/arch/x86/core/ia32/crt0.S +++ b/arch/x86/core/ia32/crt0.S @@ -68,6 +68,14 @@ rdmsr orl $0x800, %eax wrmsr +#else + /* Enable Page Size Extensions (allowing 4MB pages). + * This is ignored if PAE is enabled so no need to do + * this above in PAE code. + */ + movl %cr4, %eax + orl $CR4_PSE, %eax + movl %eax, %cr4 #endif /* CONFIG_X86_PAE */ /* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */ diff --git a/arch/x86/core/prep_c.c b/arch/x86/core/prep_c.c index cdecdec4437..f9175fea0fc 100644 --- a/arch/x86/core/prep_c.c +++ b/arch/x86/core/prep_c.c @@ -22,6 +22,10 @@ FUNC_NORETURN void z_x86_prep_c(void *arg) _kernel.cpus[0].nested = 0; +#ifdef CONFIG_MMU + z_x86_mmu_init(); +#endif + #if defined(CONFIG_LOAPIC) z_loapic_enable(0); #endif @@ -40,10 +44,6 @@ FUNC_NORETURN void z_x86_prep_c(void *arg) ARG_UNUSED(info); #endif -#ifdef CONFIG_MMU - z_x86_mmu_init(); -#endif - #if CONFIG_X86_STACK_PROTECTION for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { z_x86_set_stack_guard(z_interrupt_stacks[i]); diff --git a/arch/x86/core/x86_mmu.c b/arch/x86/core/x86_mmu.c index fe30b4a1975..14db5d19686 100644 --- a/arch/x86/core/x86_mmu.c +++ b/arch/x86/core/x86_mmu.c @@ -1122,40 +1122,57 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags) MASK_ALL, 0); } -static void identity_map_remove(void) -{ #ifdef Z_VM_KERNEL - size_t size, scope = get_entry_scope(0); +static void identity_map_remove(uint32_t level) +{ + size_t size, scope = get_entry_scope(level); + pentry_t *table; + uint32_t cur_level; uint8_t *pos; + pentry_t entry; + pentry_t *entry_ptr; k_mem_region_align((uintptr_t *)&pos, &size, (uintptr_t)CONFIG_SRAM_BASE_ADDRESS, (size_t)CONFIG_SRAM_SIZE * 1024U, scope); - /* We booted with RAM mapped both to its identity and virtual - * mapping starting at CONFIG_KERNEL_VM_BASE. This was done by - * double-linking the relevant tables in the top-level table. - * At this point we don't need the identity mapping(s) any more, - * zero the top-level table entries corresponding to the - * physical mapping. - */ while (size != 0U) { - pentry_t *entry = get_entry_ptr(z_x86_kernel_ptables, pos, 0); + /* Need to get to the correct table */ + table = z_x86_kernel_ptables; + for (cur_level = 0; cur_level < level; cur_level++) { + entry = get_entry(table, pos, cur_level); + table = next_table(entry, level); + } + + entry_ptr = get_entry_ptr(table, pos, level); /* set_pte */ - *entry = 0; + *entry_ptr = 0; pos += scope; size -= scope; } -#endif } +#endif /* Invoked to remove the identity mappings in the page tables, * they were only needed to tranisition the instruction pointer at early boot */ void z_x86_mmu_init(void) { - identity_map_remove(); +#ifdef Z_VM_KERNEL + /* We booted with physical address space being identity mapped. + * As we are now executing in virtual address space, + * the identity map is no longer needed. So remove them. + * + * Without PAE, only need to remove the entries at the PD level. + * With PAE, need to also remove the entry at PDP level. + */ + identity_map_remove(PDE_LEVEL); + +#ifdef CONFIG_X86_PAE + identity_map_remove(0); +#endif +#endif } #if CONFIG_X86_STACK_PROTECTION diff --git a/arch/x86/gen_mmu.py b/arch/x86/gen_mmu.py index d6b59dea375..e30f330a571 100755 --- a/arch/x86/gen_mmu.py +++ b/arch/x86/gen_mmu.py @@ -432,7 +432,7 @@ class PtableSet(): def reserve(self, virt_base, size, to_level=PT_LEVEL): """Reserve page table space with already aligned virt_base and size""" - debug("Reserving paging structures 0x%x (0x%x)" % + debug("Reserving paging structures for 0x%x (0x%x)" % (virt_base, size)) align_check(virt_base, size) @@ -458,15 +458,11 @@ class PtableSet(): self.reserve(mem_start, mem_size, to_level) - def map(self, phys_base, virt_base, size, flags, level=PT_LEVEL, double_map=True): + def map(self, phys_base, virt_base, size, flags, level=PT_LEVEL): """Map an address range in the page tables provided access flags. - If virt_base is None, identity mapping using phys_base is done. - If virt_base is not the same address as phys_base, the same memory - will be double mapped to the virt_base address if double_map == True; - or normal mapping to virt_base if double_map == False. """ - skip_vm_map = virt_base is None or virt_base == phys_base + is_identity_map = virt_base is None or virt_base == phys_base if virt_base is None: virt_base = phys_base @@ -479,53 +475,23 @@ class PtableSet(): align_check(phys_base, size, scope) align_check(virt_base, size, scope) for paddr in range(phys_base, phys_base + size, scope): - if paddr == 0 and skip_vm_map: - # Never map the NULL page - # - # If skip_vm_map, the identify map of physical - # memory will be unmapped at boot. So the actual - # NULL page will not be mapped after that. + if is_identity_map and paddr == 0 and level == PT_LEVEL: + # Never map the NULL page at page table level. continue vaddr = virt_base + (paddr - phys_base) self.map_page(vaddr, paddr, flags, False, level) - if skip_vm_map or not double_map: - return + def identity_map_unaligned(self, phys_base, size, flags, level=PT_LEVEL): + """Identity map a region of memory""" + scope = 1 << self.levels[level].addr_shift - # Find how much VM a top-level entry covers - scope = 1 << self.toplevel.addr_shift - debug("Double map %s entries with scope 0x%x" % - (self.toplevel.__class__.__name__, scope)) + phys_aligned_base = round_down(phys_base, scope) + phys_aligned_end = round_up(phys_base + size, scope) + phys_aligned_size = phys_aligned_end - phys_aligned_base - # Round bases down to the entry granularity - pd_virt_base = round_down(virt_base, scope) - pd_phys_base = round_down(phys_base, scope) - size = size + (phys_base - pd_phys_base) - - # The base addresses have to line up such that they can be mapped - # by the same second-level table - if phys_base - pd_phys_base != virt_base - pd_virt_base: - error("mis-aligned virtual 0x%x and physical base addresses 0x%x" % - (virt_base, phys_base)) - - # Round size up to entry granularity - size = round_up(size, scope) - - for offset in range(0, size, scope): - cur_virt = pd_virt_base + offset - cur_phys = pd_phys_base + offset - - # Get the physical address of the second-level table that - # maps the current chunk of virtual memory - table_link_phys = self.toplevel.lookup(cur_virt) - - debug("copy mappings 0x%x - 0x%x to 0x%x, using table 0x%x" % - (cur_phys, cur_phys + scope - 1, cur_virt, table_link_phys)) - - # Link to the entry for the physical mapping (i.e. mirroring). - self.toplevel.map(cur_phys, table_link_phys, INT_FLAGS) + self.map(phys_aligned_base, None, phys_aligned_size, flags, level) def set_region_perms(self, name, flags, level=PT_LEVEL): """Set access permissions for a named region that is already mapped @@ -723,7 +689,7 @@ def map_extra_regions(pt): # Reserve space in page table, and map the region pt.reserve_unaligned(virt, size, level) - pt.map(phys, virt, size, flags, level, double_map=False) + pt.map(phys, virt, size, flags, level) def main(): @@ -786,6 +752,10 @@ def main(): debug("Zephyr image: 0x%x - 0x%x size 0x%x" % (image_base, image_base + image_size - 1, image_size)) + if virt_to_phys_offset != 0: + debug("Physical address space: 0x%x - 0x%x size 0x%x" % + (sram_base, sram_base + sram_size - 1, sram_size)) + is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS") if image_size >= vm_size: @@ -804,6 +774,17 @@ def main(): # Map the zephyr image pt.map(image_base_phys, image_base, image_size, map_flags | ENTRY_RW) + if virt_to_phys_offset != 0: + # Need to identity map the physical address space + # as it is needed during early boot process. + # This will be unmapped once z_x86_mmu_init() + # is called. + # Note that this only does the identity mapping + # at the page directory level to minimize wasted space. + pt.reserve_unaligned(image_base_phys, image_size, to_level=PD_LEVEL) + pt.identity_map_unaligned(image_base_phys, image_size, + FLAG_P | FLAG_RW | FLAG_SZ, level=PD_LEVEL) + if isdef("CONFIG_X86_64"): # 64-bit has a special region in the first 64K to bootstrap other CPUs # from real mode diff --git a/arch/x86/include/kernel_arch_data.h b/arch/x86/include/kernel_arch_data.h index 07b172c85e1..75334f1a3c3 100644 --- a/arch/x86/include/kernel_arch_data.h +++ b/arch/x86/include/kernel_arch_data.h @@ -53,6 +53,7 @@ #define CR0_PG BIT(31) /* enable paging */ #define CR0_WP BIT(16) /* honor W bit even when supervisor */ +#define CR4_PSE BIT(4) /* Page size extension (4MB pages) */ #define CR4_PAE BIT(5) /* enable PAE */ #define CR4_OSFXSR BIT(9) /* enable SSE (OS FXSAVE/RSTOR) */ diff --git a/doc/guides/arch/x86.rst b/doc/guides/arch/x86.rst index d1c1885eacf..adc661a7e59 100644 --- a/doc/guides/arch/x86.rst +++ b/doc/guides/arch/x86.rst @@ -34,22 +34,22 @@ space before ``vm_enter`` inside :file:`arch/x86/core/ia32/crt0.S`. After ``vm_enter``, code execution is done via virtual addresses and data can be referred via their virtual addresses. This is possible as the page table generation script -(:file:`arch/x86/gen_mmu.py`) copies the mappings at the top level -page table such that the same second level tables are used for both -identity and virutal memory mappings. Later in the boot process, -the entries for identity mapping at the top level page table is +(:file:`arch/x86/gen_mmu.py`) identity maps the physical addresses +at the page directory level, in addition to mapping virtual addresses +to the physical memory. Later in the boot process, +the entries for identity mapping at the page directory level are cleared in :c:func:`z_x86_mmu_init()`, effectively removing the identity mapping of physical memory. This unmapping must be done for userspace isolation or else they would be able to access restricted memory via physical addresses. Since the identity mapping -is done at the top level, there is no need to allocate additional -space for lower level tables in the whole page table structure, -or else the extra tables become wasted space once unmapped and -no longer referred. Because of this, there are restrictions on -where virtual address space can be: +is done at the page directory level, there is no need to allocate +additional space for the page table. However, additional space may +still be required for additional page directory table. + +There are restrictions on where virtual address space can be: - Physical and virtual address spaces must be disjoint. This is - required as the entries in top level will be cleared. + required as the entries in page directory table will be cleared. If they are not disjoint, it would clear the entries needed for virtual addresses. @@ -73,10 +73,6 @@ where virtual address space can be: - Both ``CONFIG_SRAM_BASE_ADDRESS`` and ``CONFIG_KERNEL_VM_BASE`` must also align with the starting addresses of targeted regions. -- Due to re-using of second level entries, both - ``CONFIG_SRAM_OFFSET`` and ``CONFIG_KERNEL_VM_OFFSET`` must be of - same value. - Specifying Additional Memory Mappings at Build Time ***************************************************