x86: generate page tables at runtime

Removes very complex boot-time generation of page tables
with a much simpler runtime generation of them at bootup.

For those x86 boards that enable the MMU in the defconfig,
set the number of page pool pages appropriately.

The MMU_RUNTIME_* flags have been removed. They were an
artifact of the old page table generation and did not
correspond to any hardware state.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
Andrew Boie 2019-07-31 14:21:14 -07:00 committed by Andrew Boie
commit c3b3aafaec
15 changed files with 250 additions and 701 deletions

View file

@ -315,7 +315,6 @@
/arch/x86/gen_gdt.py @andrewboie
/arch/x86/gen_idt.py @andrewboie
/scripts/gen_kobject_list.py @andrewboie
/arch/x86/gen_mmu_x86.py @andrewboie
/scripts/gen_priv_stacks.py @andrewboie @agross-oss @ioannisg
/scripts/gen_syscall_header.py @andrewboie
/scripts/gen_syscalls.py @andrewboie

View file

@ -129,9 +129,18 @@ config X86_MMU
select MEMORY_PROTECTION
help
This options enables the memory management unit present in x86
and creates a set of page tables at build time. Requires an MMU
and creates a set of page tables at boot time. Requires an MMU
which supports PAE page tables.
config X86_MMU_PAGE_POOL_PAGES
int "Number of pages to reserve for building page tables"
default 16
depends on X86_MMU
help
Building page tables at boot requires a pool of free memory pages
to construct it. This can't be derived at build time, tune this
to your SoC's specific memory map.
config X86_NO_MELTDOWN
bool
help

View file

@ -20,6 +20,7 @@
/* exports (private APIs) */
GTEXT(__start)
GTEXT(z_x86_enable_paging)
/* externs */
GTEXT(z_cstart)
@ -273,30 +274,6 @@ __csSet:
lgdt %ds:_gdt
#endif
#ifdef CONFIG_X86_MMU
/* load the page directory address into the registers*/
movl $z_x86_kernel_pdpt, %eax
movl %eax, %cr3
/* Enable PAE */
movl %cr4, %eax
orl $CR4_PAE_ENABLE, %eax
movl %eax, %cr4
/* IA32_EFER NXE bit set */
movl $0xC0000080, %ecx
rdmsr
orl $0x800, %eax
wrmsr
/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
movl %cr0, %eax
orl $CR0_PG_WP_ENABLE, %eax
movl %eax, %cr0
#endif /* CONFIG_X86_MMU */
#if defined(CONFIG_X86_ENABLE_TSS)
mov $MAIN_TSS, %ax
ltr %ax
@ -399,6 +376,31 @@ dataWords:
ret
#endif /* CONFIG_XIP */
#ifdef CONFIG_X86_MMU
z_x86_enable_paging:
/* load the page directory address into the registers*/
movl $z_x86_kernel_pdpt, %eax
movl %eax, %cr3
/* Enable PAE */
movl %cr4, %eax
orl $CR4_PAE_ENABLE, %eax
movl %eax, %cr4
/* IA32_EFER NXE bit set */
movl $0xC0000080, %ecx
rdmsr
orl $0x800, %eax
wrmsr
/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
movl %cr0, %eax
orl $CR0_PG_WP_ENABLE, %eax
movl %eax, %cr0
ret
#endif /* CONFIG_X86_MMU */
#if defined(CONFIG_SSE)
/* SSE control & status register initial value */

View file

@ -24,13 +24,11 @@ MMU_BOOT_REGION((u32_t)&_image_text_start, (u32_t)&_image_text_size,
MMU_ENTRY_READ | MMU_ENTRY_USER);
MMU_BOOT_REGION((u32_t)&_image_rodata_start, (u32_t)&_image_rodata_size,
MMU_ENTRY_READ | MMU_ENTRY_USER |
MMU_ENTRY_EXECUTE_DISABLE);
MMU_ENTRY_READ | MMU_ENTRY_USER | MMU_ENTRY_EXECUTE_DISABLE);
#ifdef CONFIG_USERSPACE
MMU_BOOT_REGION((u32_t)&_app_smem_start, (u32_t)&_app_smem_size,
MMU_ENTRY_WRITE | MMU_ENTRY_RUNTIME_USER |
MMU_ENTRY_EXECUTE_DISABLE);
MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE);
#endif
#ifdef CONFIG_COVERAGE_GCOV
@ -43,9 +41,7 @@ MMU_BOOT_REGION((u32_t)&__gcov_bss_start, (u32_t)&__gcov_bss_size,
* automatically for stacks.
*/
MMU_BOOT_REGION((u32_t)&__kernel_ram_start, (u32_t)&__kernel_ram_size,
MMU_ENTRY_WRITE |
MMU_ENTRY_RUNTIME_USER |
MMU_ENTRY_EXECUTE_DISABLE);
MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE);
/* Works for PDPT, PD, PT entries, the bits we check here are all the same.
*
@ -298,13 +294,19 @@ static inline void tlb_flush_page(void *addr)
__asm__ ("invlpg %0" :: "m" (*page));
}
#define PDPTE_FLAGS_MASK MMU_ENTRY_PRESENT
#define PDE_FLAGS_MASK (MMU_ENTRY_WRITE | MMU_ENTRY_USER | \
PDPTE_FLAGS_MASK)
#define PTE_FLAGS_MASK (PDE_FLAGS_MASK | MMU_ENTRY_EXECUTE_DISABLE | \
MMU_ENTRY_WRITE_THROUGH | \
MMU_ENTRY_CACHING_DISABLE)
void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size,
x86_page_entry_data_t flags,
x86_page_entry_data_t mask, bool flush)
{
union x86_mmu_pte *pte;
u32_t addr = (u32_t)ptr;
__ASSERT((addr & MMU_PAGE_MASK) == 0U, "unaligned address provided");
@ -319,12 +321,26 @@ void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size,
}
while (size != 0) {
union x86_mmu_pte *pte;
union x86_mmu_pde_pt *pde;
union x86_mmu_pdpte *pdpte;
x86_page_entry_data_t cur_flags = flags;
/* TODO we're not generating 2MB entries at the moment */
__ASSERT(X86_MMU_GET_PDE(pdpt, addr)->ps != 1, "2MB PDE found");
pte = X86_MMU_GET_PTE(pdpt, addr);
pdpte = X86_MMU_GET_PDPTE(pdpt, addr);
__ASSERT(pdpte->p == 1, "set flags on non-present PDPTE");
pdpte->value |= (flags & PDPTE_FLAGS_MASK);
pde = X86_MMU_GET_PDE(pdpt, addr);
__ASSERT(pde->p == 1, "set flags on non-present PDE");
pde->value |= (flags & PDE_FLAGS_MASK);
/* If any flags enable execution, clear execute disable at the
* page directory level
*/
if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) {
pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE;
}
pte = X86_MMU_GET_PTE(pdpt, addr);
/* If we're setting the present bit, restore the address
* field. If we're clearing it, then the address field
* will be zeroed instead, mapping the PTE to the NULL page.
@ -344,6 +360,169 @@ void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size,
}
}
static char __aligned(MMU_PAGE_SIZE)
page_pool[MMU_PAGE_SIZE * CONFIG_X86_MMU_PAGE_POOL_PAGES];
static char *page_pos = page_pool + sizeof(page_pool);
static void *get_page(void)
{
page_pos -= MMU_PAGE_SIZE;
__ASSERT(page_pos >= page_pool, "out of MMU pages\n");
return page_pos;
}
__aligned(0x20) struct x86_mmu_pdpt z_x86_kernel_pdpt;
#ifdef CONFIG_X86_KPTI
__aligned(0x20) struct x86_mmu_pdpt z_x86_user_pdpt;
#endif
extern char z_shared_kernel_page_start[];
static inline bool is_within_system_ram(uintptr_t addr)
{
return (addr >= DT_PHYS_RAM_ADDR) &&
(addr < (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U)));
}
static void add_mmu_region_page(struct x86_mmu_pdpt *pdpt, uintptr_t addr,
u64_t flags, bool user_table)
{
union x86_mmu_pdpte *pdpte;
struct x86_mmu_pd *pd;
union x86_mmu_pde_pt *pde;
struct x86_mmu_pt *pt;
union x86_mmu_pte *pte;
#ifdef CONFIG_X86_KPTI
/* If we are generating a page table for user mode, and this address
* does not have the user flag set, and this address falls outside
* of system RAM, then don't bother generating any tables for it,
* we will never need them later as memory domains are limited to
* regions within system RAM.
*/
if (user_table && (flags & MMU_ENTRY_USER) == 0 &&
!is_within_system_ram(addr)) {
return;
}
#endif
/* Setup the PDPTE entry for the address, creating a page directory
* if one didn't exist
*/
pdpte = &pdpt->entry[MMU_PDPTE_NUM(addr)];
if (pdpte->p == 0) {
pd = get_page();
pdpte->pd = ((uintptr_t)pd) >> MMU_PAGE_SHIFT;
} else {
pd = (struct x86_mmu_pd *)(pdpte->pd << MMU_PAGE_SHIFT);
}
pdpte->value |= (flags & PDPTE_FLAGS_MASK);
/* Setup the PDE entry for the address, creating a page table
* if necessary
*/
pde = &pd->entry[MMU_PDE_NUM(addr)].pt;
if (pde->p == 0) {
pt = get_page();
pde->pt = ((uintptr_t)pt) >> MMU_PAGE_SHIFT;
} else {
pt = (struct x86_mmu_pt *)(pde->pt << MMU_PAGE_SHIFT);
}
pde->value |= (flags & PDE_FLAGS_MASK);
/* Execute disable bit needs special handling, we should only set it
* at the page directory level if ALL pages have XD set (instead of
* just one).
*
* Use the 'ignored2' field to store a marker on whether any
* configured region allows execution, the CPU never looks at
* or modifies it.
*/
if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) {
pde->ignored2 = 1;
pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE;
} else if (pde->ignored2 == 0) {
pde->value |= MMU_ENTRY_EXECUTE_DISABLE;
}
#ifdef CONFIG_X86_KPTI
if (user_table && (flags & MMU_ENTRY_USER) == 0 &&
addr != (uintptr_t)(&z_shared_kernel_page_start)) {
/* All non-user accessible pages except the shared page
* are marked non-present in the page table.
*/
return;
}
#else
ARG_UNUSED(user_table);
#endif
/* Finally set up the page table entry */
pte = &pt->entry[MMU_PAGE_NUM(addr)];
pte->page = addr >> MMU_PAGE_SHIFT;
pte->value |= (flags & PTE_FLAGS_MASK);
}
static void add_mmu_region(struct x86_mmu_pdpt *pdpt, struct mmu_region *rgn,
bool user_table)
{
size_t size;
u64_t flags;
uintptr_t addr;
__ASSERT((rgn->address & MMU_PAGE_MASK) == 0U,
"unaligned address provided");
__ASSERT((rgn->size & MMU_PAGE_MASK) == 0U,
"unaligned size provided");
addr = rgn->address;
/* Add the present flag, and filter out 'runtime user' since this
* has no meaning to the actual MMU
*/
flags = rgn->flags | MMU_ENTRY_PRESENT;
/* Iterate through the region a page at a time, creating entries as
* necessary.
*/
size = rgn->size;
while (size > 0) {
add_mmu_region_page(pdpt, addr, flags, user_table);
size -= MMU_PAGE_SIZE;
addr += MMU_PAGE_SIZE;
}
}
extern struct mmu_region z_x86_mmulist_start[];
extern struct mmu_region z_x86_mmulist_end[];
/* Called from x86's kernel_arch_init() */
void z_x86_paging_init(void)
{
size_t pages_free;
for (struct mmu_region *rgn = z_x86_mmulist_start;
rgn < z_x86_mmulist_end; rgn++) {
add_mmu_region(&z_x86_kernel_pdpt, rgn, false);
#ifdef CONFIG_X86_KPTI
add_mmu_region(&z_x86_user_pdpt, rgn, true);
#endif
}
pages_free = (page_pos - page_pool) / MMU_PAGE_SIZE;
if (pages_free != 0) {
printk("Optimal CONFIG_X86_MMU_PAGE_POOL_PAGES %zu\n",
CONFIG_X86_MMU_PAGE_POOL_PAGES - pages_free);
}
z_x86_enable_paging();
}
#ifdef CONFIG_X86_USERSPACE
int z_arch_buffer_validate(void *addr, size_t size, int write)
{

View file

@ -1,558 +0,0 @@
#!/usr/bin/env python3
#
# Copyright (c) 2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Generate MMU page tables for x86 CPUs.
This script generates 64-bit PAE style MMU page tables for x86.
Even though x86 is a 32-bit target, we use this type of page table
to support the No-Execute (NX) bit. Please consult the IA
Architecture SW Developer Manual, volume 3, chapter 4 for more
details on this data structure.
The script takes as input the zephyr_prebuilt.elf kernel binary,
which is a link of the Zephyr kernel without various build-time
generated data structures (such as the MMU tables) inserted into it.
The build cannot easily predict how large these tables will be,
so it is important that these MMU tables be inserted at the very
end of memory.
Of particular interest is the "mmulist" section, which is a
table of memory region access policies set in code by instances
of MMU_BOOT_REGION() macros. The set of regions defined here
specifies the boot-time configuration of the page tables.
The output of this script is a linked set of page tables, page
directories, and a page directory pointer table, which gets linked
into the final Zephyr binary, reflecting the access policies
read in the "mmulist" section. Any memory ranges not specified
in "mmulist" are marked non-present.
If Kernel Page Table Isolation (CONFIG_X86_KPTI) is enabled, this
script additionally outputs a second set of page tables intended
for use by user threads running in Ring 3. These tables have the
same policy as the kernel's set of page tables with one crucial
difference: any pages not accessible to user mode threads are not
marked 'present', preventing Meltdown-style side channel attacks
from reading their contents.
"""
import os
import sys
import struct
from collections import namedtuple
import ctypes
import argparse
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
mmu_region_details = namedtuple("mmu_region_details",
"pde_index page_entries_info")
valid_pages_inside_pde = namedtuple("valid_pages_inside_pde", "start_addr size \
pte_valid_addr_start \
pte_valid_addr_end \
permissions")
mmu_region_details_pdpt = namedtuple("mmu_region_details_pdpt",
"pdpte_index pd_entries")
# Constants
PAGE_ENTRY_PRESENT = 1
PAGE_ENTRY_READ_WRITE = 1 << 1
PAGE_ENTRY_USER_SUPERVISOR = 1 << 2
PAGE_ENTRY_XD = 1 << 63
# Struct formatters
struct_mmu_regions_format = "<IIQ"
header_values_format = "<II"
page_entry_format = "<Q"
entry_counter = 0
def print_code(val):
global entry_counter
if not val & PAGE_ENTRY_PRESENT:
ret = '.'
else:
if val & PAGE_ENTRY_READ_WRITE:
# Writable page
if val & PAGE_ENTRY_XD:
# Readable, writeable, not executable
ret = 'w'
else:
# Readable, writable, executable
ret = 'a'
else:
# Read-only
if val & PAGE_ENTRY_XD:
# Read-only
ret = 'r'
else:
# Readable, executable
ret = 'x'
if val & PAGE_ENTRY_USER_SUPERVISOR:
# User accessible pages are capital letters
ret = ret.upper()
sys.stdout.write(ret)
entry_counter = entry_counter + 1
if entry_counter == 128:
sys.stdout.write("\n")
entry_counter = 0
class PageMode_PAE:
total_pages = 511
size_addressed_per_pde = (512 * 4096) # 2MB In Bytes
size_addressed_per_pdpte = (512 * size_addressed_per_pde) # In Bytes
list_of_pdpte = {}
def __init__(self, pd_start_addr, mem_regions, syms, kpti):
self.pd_start_addr = pd_start_addr
self.mem_regions = mem_regions
self.pd_tables_list = []
self.output_offset = 0
self.kpti = kpti
self.syms = syms
for i in range(4):
self.list_of_pdpte[i] = mmu_region_details_pdpt(pdpte_index=i,
pd_entries={})
self.populate_required_structs()
self.pdpte_create_binary_file()
self.page_directory_create_binary_file()
self.page_table_create_binary_file()
# return the pdpte number for the give address
def get_pdpte_number(self, value):
return (value >> 30) & 0x3
# return the page directory number for the give address
def get_pde_number(self, value):
return (value >> 21) & 0x1FF
# return the page table number for the given address
def get_pte_number(self, value):
return (value >> 12) & 0x1FF
def get_number_of_pd(self):
return len(self.get_pdpte_list())
def get_pdpte_list(self):
return list({temp[0] for temp in self.pd_tables_list})
# the return value will have the page address and it is assumed to be a 4096
# boundary.hence the output of this API will be a 20bit address of the page
# table
def address_of_page_table(self, pdpte, page_table_number):
# first page given to page directory pointer
# and 2nd page till 5th page are used for storing the page directories.
# set the max pdpte used. this tells how many pd are needed after
# that we start keeping the pt
PT_start_addr = self.get_number_of_pd() * 4096 +\
self.pd_start_addr + 4096
return (PT_start_addr +
(self.pd_tables_list.index([pdpte, page_table_number]) *
4096) >> 12)
def get_binary_pde_value(self, pdpte, value):
perms = value.page_entries_info[0].permissions
present = PAGE_ENTRY_PRESENT
read_write = check_bits(perms, [1, 29]) << 1
user_mode = check_bits(perms, [2, 28]) << 2
page_table = self.address_of_page_table(pdpte, value.pde_index) << 12
return present | read_write | user_mode | page_table
def get_binary_pte_value(self, value, pde, pte, perm_for_pte):
read_write = perm_for_pte & PAGE_ENTRY_READ_WRITE
user_mode = perm_for_pte & PAGE_ENTRY_USER_SUPERVISOR
xd = perm_for_pte & PAGE_ENTRY_XD
# This points to the actual memory in the HW
# totally 20 bits to rep the phy address
# first 2bits is from pdpte then 9bits is the number got from pde and
# next 9bits is pte
page_table = ((value.pdpte_index << 18) | (pde << 9) | pte) << 12
if self.kpti:
if user_mode:
present = PAGE_ENTRY_PRESENT
else:
if page_table == self.syms['z_shared_kernel_page_start']:
present = PAGE_ENTRY_PRESENT
else:
present = 0
else:
present = PAGE_ENTRY_PRESENT
binary_value = (present | read_write | user_mode | xd)
# L1TF mitigation: map non-present pages to the NULL page
if present:
binary_value |= page_table
return binary_value
def clean_up_unused_pdpte(self):
self.list_of_pdpte = {key: value for key, value in
self.list_of_pdpte.items()
if value.pd_entries != {}}
# update the tuple values for the memory regions needed
def set_pde_pte_values(self, pdpte, pde_index, address, mem_size,
pte_valid_addr_start, pte_valid_addr_end, perm):
pages_tuple = valid_pages_inside_pde(
start_addr=address,
size=mem_size,
pte_valid_addr_start=pte_valid_addr_start,
pte_valid_addr_end=pte_valid_addr_end,
permissions=perm)
mem_region_values = mmu_region_details(pde_index=pde_index,
page_entries_info=[])
mem_region_values.page_entries_info.append(pages_tuple)
if pde_index in self.list_of_pdpte[pdpte].pd_entries.keys():
# this step adds the new page info to the exsisting pages info
self.list_of_pdpte[pdpte].pd_entries[pde_index].\
page_entries_info.append(pages_tuple)
else:
self.list_of_pdpte[pdpte].pd_entries[pde_index] = mem_region_values
def populate_required_structs(self):
for start, size, flags in self.mem_regions:
pdpte_index = self.get_pdpte_number(start)
pde_index = self.get_pde_number(start)
pte_valid_addr_start = self.get_pte_number(start)
# Get the end of the page table entries
# Since a memory region can take up only a few entries in the Page
# table, this helps us get the last valid PTE.
pte_valid_addr_end = self.get_pte_number(start +
size - 1)
mem_size = size
# In-case the start address aligns with a page table entry other
# than zero and the mem_size is greater than (1024*4096) i.e 4MB
# in case where it overflows the current PDE's range then limit the
# PTE to 1024 and so make the mem_size reflect the actual size
# taken up in the current PDE
if (size + (pte_valid_addr_start * 4096)) >= \
(self.size_addressed_per_pde):
pte_valid_addr_end = self.total_pages
mem_size = (((self.total_pages + 1) -
pte_valid_addr_start) * 4096)
self.set_pde_pte_values(pdpte_index,
pde_index,
start,
mem_size,
pte_valid_addr_start,
pte_valid_addr_end,
flags)
if [pdpte_index, pde_index] not in self.pd_tables_list:
self.pd_tables_list.append([pdpte_index, pde_index])
# IF the current pde couldn't fit the entire requested region
# size then there is a need to create new PDEs to match the size.
# Here the overflow_size represents the size that couldn't be fit
# inside the current PDE, this is will now to used to create a new
# PDE/PDEs so the size remaining will be
# requested size - allocated size(in the current PDE)
overflow_size = size - mem_size
# create all the extra PDEs needed to fit the requested size
# this loop starts from the current pde till the last pde that is
# needed the last pde is calculated as the (start_addr + size) >>
# 22
if overflow_size != 0:
for extra_pdpte in range(pdpte_index,
self.get_pdpte_number(start +
size) + 1):
for extra_pde in range(pde_index + 1, self.get_pde_number(
start + size) + 1):
# new pde's start address
# each page directory entry has a addr range of
# (1024 *4096) thus the new PDE start address is a
# multiple of that number
extra_pde_start_address = (
extra_pde * (self.size_addressed_per_pde))
# the start address of and extra pde will always be 0
# and the end address is calculated with the new
# pde's start address and the overflow_size
extra_pte_valid_addr_end = (
self.get_pte_number(extra_pde_start_address +
overflow_size - 1))
# if the overflow_size couldn't be fit inside this new
# pde then need another pde and so we now need to limit
# the end of the PTE to 1024 and set the size of this
# new region to the max possible
extra_region_size = overflow_size
if overflow_size >= (self.size_addressed_per_pde):
extra_region_size = self.size_addressed_per_pde
extra_pte_valid_addr_end = self.total_pages
# load the new PDE's details
self.set_pde_pte_values(extra_pdpte,
extra_pde,
extra_pde_start_address,
extra_region_size,
0,
extra_pte_valid_addr_end,
flags)
# for the next iteration of the loop the size needs
# to decreased
overflow_size -= extra_region_size
if [extra_pdpte, extra_pde] not in self.pd_tables_list:
self.pd_tables_list.append([extra_pdpte, extra_pde])
if overflow_size == 0:
break
self.pd_tables_list.sort()
self.clean_up_unused_pdpte()
pages_for_pdpte = 1
pages_for_pd = self.get_number_of_pd()
pages_for_pt = len(self.pd_tables_list)
self.output_buffer = ctypes.create_string_buffer((pages_for_pdpte +
pages_for_pd +
pages_for_pt) * 4096)
def pdpte_create_binary_file(self):
# pae needs a pdpte at 32byte aligned address
# Even though we have only 4 entries in the pdpte we need to move
# the self.output_offset variable to the next page to start pushing
# the pd contents
#
# FIXME: This wastes a ton of RAM!!
if args.verbose:
print("PDPTE at 0x%x" % self.pd_start_addr)
for pdpte in range(self.total_pages + 1):
if pdpte in self.get_pdpte_list():
present = 1 << 0
addr_of_pd = (((self.pd_start_addr + 4096) +
self.get_pdpte_list().index(pdpte) *
4096) >> 12) << 12
binary_value = (present | addr_of_pd)
else:
binary_value = 0
struct.pack_into(page_entry_format,
self.output_buffer,
self.output_offset,
binary_value)
self.output_offset += struct.calcsize(page_entry_format)
def page_directory_create_binary_file(self):
for pdpte, pde_info in self.list_of_pdpte.items():
if args.verbose:
print("Page directory %d at 0x%x" % (pde_info.pdpte_index,
self.pd_start_addr + self.output_offset))
for pde in range(self.total_pages + 1):
binary_value = 0 # the page directory entry is not valid
# if i have a valid entry to populate
if pde in pde_info.pd_entries.keys():
value = pde_info.pd_entries[pde]
binary_value = self.get_binary_pde_value(pdpte, value)
struct.pack_into(page_entry_format,
self.output_buffer,
self.output_offset,
binary_value)
if args.verbose:
print_code(binary_value)
self.output_offset += struct.calcsize(page_entry_format)
def page_table_create_binary_file(self):
for _, pde_info in sorted(self.list_of_pdpte.items()):
for pde, pte_info in sorted(pde_info.pd_entries.items()):
pe_info = pte_info.page_entries_info[0]
start_addr = pe_info.start_addr & ~0x1FFFFF
end_addr = start_addr + 0x1FFFFF
if args.verbose:
print("Page table for 0x%08x - 0x%08x at 0x%08x" %
(start_addr, end_addr,
self.pd_start_addr + self.output_offset))
for pte in range(self.total_pages + 1):
binary_value = 0 # the page directory entry is not valid
valid_pte = 0
# go through all the valid pages inside the pde to
# figure out if we need to populate this pte
for i in pte_info.page_entries_info:
temp_value = ((pte >= i.pte_valid_addr_start) and
(pte <= i.pte_valid_addr_end))
if temp_value:
perm_for_pte = i.permissions
valid_pte |= temp_value
# if i have a valid entry to populate
if valid_pte:
binary_value = self.get_binary_pte_value(pde_info,
pde,
pte,
perm_for_pte)
if args.verbose:
print_code(binary_value)
struct.pack_into(page_entry_format,
self.output_buffer,
self.output_offset,
binary_value)
self.output_offset += struct.calcsize(page_entry_format)
#*****************************************************************************#
def read_mmu_list(mmu_list_data):
regions = []
# Read mmu_list header data
num_of_regions, pd_start_addr = struct.unpack_from(
header_values_format, mmu_list_data, 0)
# a offset used to remember next location to read in the binary
size_read_from_binary = struct.calcsize(header_values_format)
if args.verbose:
print("Start address of page tables: 0x%08x" % pd_start_addr)
print("Build-time memory regions:")
# Read all the region definitions
for region_index in range(num_of_regions):
addr, size, flags = struct.unpack_from(struct_mmu_regions_format,
mmu_list_data,
size_read_from_binary)
size_read_from_binary += struct.calcsize(struct_mmu_regions_format)
if args.verbose:
print(" Region %03d: 0x%08x - 0x%08x (0x%016x)" %
(region_index, addr, addr + size - 1, flags))
# ignore zero sized memory regions
if size == 0:
continue
if (addr & 0xFFF) != 0:
print("Memory region %d start address %x is not page-aligned" %
(region_index, addr))
sys.exit(2)
if (size & 0xFFF) != 0:
print("Memory region %d size %d is not page-aligned" %
(region_index, size))
sys.exit(2)
# validate for memory overlap here
for other_region_index in range(len(regions)):
other_addr, other_size, _ = regions[other_region_index]
end_addr = addr + size
other_end_addr = other_addr + other_size
overlap = ((addr <= other_addr and end_addr > other_addr) or
(other_addr <= addr and other_end_addr > addr))
if overlap:
print("Memory region %d (%x:%x) overlaps memory region %d (%x:%x)" %
(region_index, addr, end_addr, other_region_index,
other_addr, other_end_addr))
sys.exit(2)
# add the retrieved info another list
regions.append((addr, size, flags))
return (pd_start_addr, regions)
def check_bits(val, bits):
for b in bits:
if val & (1 << b):
return 1
return 0
def get_symbols(obj):
for section in obj.iter_sections():
if isinstance(section, SymbolTableSection):
return {sym.name: sym.entry.st_value
for sym in section.iter_symbols()}
raise LookupError("Could not find symbol table")
# Read the parameters passed to the file
def parse_args():
global args
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-k", "--kernel",
help="Zephyr kernel image")
parser.add_argument("-o", "--output",
help="Output file into which the page tables are "
"written.")
parser.add_argument("-u", "--user-output",
help="User mode page tables for KPTI")
parser.add_argument("-v", "--verbose", action="count", default=0,
help="Print debugging information. Multiple "
"invocations increase verbosity")
args = parser.parse_args()
if "VERBOSE" in os.environ:
args.verbose = 1
def main():
parse_args()
with open(args.kernel, "rb") as fp:
kernel = ELFFile(fp)
syms = get_symbols(kernel)
irq_data = kernel.get_section_by_name("mmulist").data()
pd_start_addr, regions = read_mmu_list(irq_data)
# select the page table needed
page_table = PageMode_PAE(pd_start_addr, regions, syms, False)
# write the binary data into the file
with open(args.output, 'wb') as fp:
fp.write(page_table.output_buffer)
if "CONFIG_X86_KPTI" in syms:
pd_start_addr += page_table.output_offset
user_page_table = PageMode_PAE(pd_start_addr, regions, syms, True)
with open(args.user_output, 'wb') as fp:
fp.write(user_page_table.output_buffer)
if __name__ == "__main__":
main()

View file

@ -105,38 +105,6 @@ add_bin_file_to_the_next_link(gen_idt_output staticIdt)
add_bin_file_to_the_next_link(gen_idt_output irq_int_vector_map)
add_bin_file_to_the_next_link(gen_idt_output irq_vectors_alloc)
if(CONFIG_X86_MMU)
if(CONFIG_X86_KPTI)
set(user_mmu_tables_bin user_mmu_tables.bin)
endif()
add_custom_target(
mmu_tables_bin_target
DEPENDS
mmu_tables.bin
${user_mmu_tables_bin}
)
add_custom_command(
OUTPUT
mmu_tables.bin
${user_mmu_tables_bin}
COMMAND
${PYTHON_EXECUTABLE}
${ZEPHYR_BASE}/arch/x86/gen_mmu_x86.py
-k $<TARGET_FILE:${ZEPHYR_PREBUILT_EXECUTABLE}>
-o mmu_tables.bin
-u user_mmu_tables.bin
$<$<BOOL:${CMAKE_VERBOSE_MAKEFILE}>:-v>
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${ZEPHYR_PREBUILT_EXECUTABLE}
)
add_bin_file_to_the_next_link( mmu_tables_bin_target mmu_tables)
if(CONFIG_X86_KPTI)
add_bin_file_to_the_next_link(mmu_tables_bin_target user_mmu_tables)
endif()
endif()
if(CONFIG_GDT_DYNAMIC)
# Use gen_gdt.py and objcopy to generate gdt.o from from the elf
# file ${ZEPHYR_PREBUILT_EXECUTABLE}, creating the temp file gdt.bin along the

View file

@ -28,6 +28,9 @@ extern K_THREAD_STACK_DEFINE(_interrupt_stack, CONFIG_ISR_STACK_SIZE);
void z_x86_early_serial_init(void);
#endif
/* Create all page tables with boot configuration and enable paging */
void z_x86_paging_init(void);
/**
*
* @brief Performs architecture-specific initialization
@ -47,6 +50,9 @@ static inline void kernel_arch_init(void)
#ifdef CONFIG_X86_VERY_EARLY_CONSOLE
z_x86_early_serial_init();
#endif
#ifdef CONFIG_X86_MMU
z_x86_paging_init();
#endif
#if CONFIG_X86_STACK_PROTECTION
z_x86_mmu_set_flags(&z_x86_kernel_pdpt, _interrupt_stack, MMU_PAGE_SIZE,
MMU_ENTRY_READ, MMU_PTE_RW_MASK, true);
@ -95,6 +101,10 @@ static inline struct x86_mmu_pdpt *z_x86_pdpt_get(struct k_thread *thread)
return &header->kernel_data.pdpt;
}
#endif /* CONFIG_USERSPACE */
/* ASM code to fiddle with registers to enable the MMU with PAE paging */
void z_x86_enable_paging(void);
#include <stddef.h> /* For size_t */
#ifdef __cplusplus

View file

@ -116,29 +116,6 @@
#define MMU_ENTRY_EXECUTE_DISABLE 0x8000000000000000ULL
/* Special flag argument for MMU_BOOT region invocations */
/* Indicates that pages within this region may have their user/supervisor
* permissions adjusted at runtime. Unnecessary if MMU_ENTRY_USER is already
* set.
*
* The result of this is a guarantee that the 'user' bit for all PDEs referring
* to the region will be set, even if the boot configuration has no user pages
* in it.
*/
#define MMU_ENTRY_RUNTIME_USER 0x10000000ULL
/* Indicates that pages within this region may have their read/write
* permissions adjusted at runtime. Unnecessary if MMU_ENTRY_WRITE is already
* set.
*
* The result of this is a guarantee that the 'write' bit for all PDEs
* referring to the region will be set, even if the boot configuration has no
* writable pages in it.
*/
#define MMU_ENTRY_RUNTIME_WRITE 0x20000000ULL
/* Helper macros to ease the usage of the MMU page table structures.
*/
@ -228,8 +205,8 @@
* In order to populate this structure use macro MMU_BOOT_REGION.
*/
struct mmu_region {
u32_t address; /*Start address of the memory region */
u32_t size; /* Size of the memory region*/
uintptr_t address; /*Start address of the memory region */
size_t size; /* Size of the memory region*/
u64_t flags; /* Permissions needed for this region*/
};

View file

@ -16,7 +16,9 @@ CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC=25000000
CONFIG_TEST_RANDOM_GENERATOR=y
CONFIG_XIP=y
CONFIG_X86_MMU=y
CONFIG_X86_MMU_PAGE_POOL_PAGES=15
CONFIG_DEBUG_INFO=y
CONFIG_SCHED_SCALABLE=y
CONFIG_WAITQ_SCALABLE=y
CONFIG_COVERAGE=y
CONFIG_X86_VERY_EARLY_CONSOLE=y

View file

@ -15,6 +15,7 @@ CONFIG_UART_CONSOLE=y
CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC=25000000
CONFIG_TEST_RANDOM_GENERATOR=y
CONFIG_X86_MMU=y
CONFIG_X86_MMU_PAGE_POOL_PAGES=15
CONFIG_DEBUG_INFO=y
CONFIG_SCHED_SCALABLE=y
CONFIG_WAITQ_SCALABLE=y

View file

@ -15,4 +15,5 @@ CONFIG_UART_CONSOLE=y
CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC=25000000
CONFIG_X86_IAMCU=y
CONFIG_X86_MMU=y
CONFIG_X86_MMU_PAGE_POOL_PAGES=15
CONFIG_DEBUG_INFO=y

View file

@ -119,6 +119,13 @@ SECTIONS
*(".rodata.*")
*(.gnu.linkonce.r.*)
#ifdef CONFIG_X86_MMU
. = ALIGN(4);
z_x86_mmulist_start = .;
KEEP(*(.mmulist))
z_x86_mmulist_end = .;
#endif /* CONFIG_X86_MMU */
#ifndef CONFIG_DYNAMIC_INTERRUPTS
. = ALIGN(8);
_idt_base_address = .;
@ -349,29 +356,6 @@ SECTIONS
#include <linker/common-ram.ld>
#ifdef CONFIG_X86_MMU
/* Can't really predict the size of this section. Anything after this
* should not be affected if addresses change between builds (currently
* just the gperf tables which is fine).
*
* However, __mmu_tables_start *must* remain stable between builds,
* we can't have anything shifting the memory map beforehand.
*/
SECTION_DATA_PROLOGUE(mmu_tables,,)
{
/* Page Tables are located here if MMU is enabled.*/
MMU_PAGE_ALIGN
__mmu_tables_start = .;
z_x86_kernel_pdpt = .;
KEEP(*(mmu_tables));
#ifdef CONFIG_X86_KPTI
z_x86_user_pdpt = .;
KEEP(*(user_mmu_tables));
#endif /* CONFIG_X86_KPTI */
__mmu_tables_end = .;
} GROUP_DATA_LINK_IN(RAMABLE_REGION, ROMABLE_REGION)
#endif
#include <linker/kobject.ld>
MMU_PAGE_ALIGN
@ -397,23 +381,6 @@ SECTIONS
KEEP(*(.intList))
KEEP(*(.gnu.linkonce.intList.*))
} > IDT_LIST
#ifdef CONFIG_X86_MMU
/* Memory management unit*/
SECTION_PROLOGUE(mmulist,,)
{
/* get size of the mmu lists needed for gen_mmu_x86.py*/
LONG((__MMU_LIST_END__ - __MMU_LIST_START__) / __MMU_REGION_SIZEOF)
/* Get the start of mmu tables in data section so that the address
* of the page tables can be calculated.
*/
LONG(__mmu_tables_start)
__MMU_LIST_START__ = .;
KEEP(*(.mmulist))
__MMU_LIST_END__ = .;
} > MMU_LIST
#endif /* CONFIG_X86_MMU */
#else
/DISCARD/ :
{
@ -421,7 +388,6 @@ SECTIONS
KEEP(*(.spurNoErrIsr))
KEEP(*(.intList))
KEEP(*(.gnu.linkonce.intList.*))
KEEP(*(.mmulist))
}
#endif

View file

@ -32,9 +32,6 @@ MEMORY
*/
IDT_LIST : ORIGIN = 2K, LENGTH = 2K
#ifdef CONFIG_X86_MMU
MMU_LIST : ORIGIN = 4k, LENGTH = 1K
#endif
}
#include <arch/x86/ia32/linker.ld>

View file

@ -39,9 +39,6 @@ MEMORY
*/
IDT_LIST : ORIGIN = 2K, LENGTH = 2K
#ifdef CONFIG_X86_MMU
MMU_LIST : ORIGIN = 4k, LENGTH = 1K
#endif
}
#include <arch/x86/ia32/linker.ld>

View file

@ -40,7 +40,6 @@ MEMORY
*/
IDT_LIST : ORIGIN = 0xFFFF1000, LENGTH = 2K
MMU_LIST : ORIGIN = 0xFFFF2000, LENGTH = 1K
}
#include <arch/x86/ia32/linker.ld>