x86: add new page table generation script

This produces a set of page tables with system RAM
mapped for read/write/execute access by supervisor
mode, such that it may be installed in the CPU
in the earliest boot stages and mutable at runtime.

These tables optionally support a dual physical/virtual
mapping of RAM to help boot virtual memory systems.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
Andrew Boie 2020-06-12 17:10:01 -07:00 committed by Anas Nashif
commit e9d15451b1

556
arch/x86/gen_mmu.py Executable file
View file

@ -0,0 +1,556 @@
#!/usr/bin/env python3
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""Create the kernel's page tables for x86 CPUs.
For additional detail on paging and x86 memory management, please
consult the IA Architecture SW Developer Manual, volume 3a, chapter 4.
This script produces the initial page tables installed into the CPU
at early boot. These pages will have an identity mapping at
CONFIG_SRAM_BASE_ADDRESS of size CONFIG_SRAM_SIZE. The script takes
the 'zephyr_prebuilt.elf' as input to obtain region sizes, certain
memory addresses, and configuration values.
If CONFIG_SRAM_REGION_PERMISSIONS is not enabled, all RAM will be
mapped with the Present and Write bits set. The linker scripts shouldn't
add page alignment padding between sections.
If CONFIG_SRAM_REGION_PERMISSIONS is enabled, the access permissions
vary:
- By default, the Present, Write, and Execute Disable bits are
set.
- The _image_text region will have Present and User bits set
- The _image_rodata region will have Present, User, and Execute
Disable bits set
- On x86_64, the _locore region will have Present set and
the _lorodata region will have Present and Execute Disable set.
This script will establish a dual mapping at the address defined by
CONFIG_KERNEL_VM_BASE if it is not the same as CONFIG_SRAM_BASE_ADDRESS.
- The double-mapping is used to transition the
instruction pointer from a physical address at early boot to the
virtual address where the kernel is actually linked.
- The mapping is always double-mapped at the top-level paging structure
and the physical/virtual base addresses must have the same alignment
with respect to the scope of top-level paging structure entries.
This allows the same second-level paging structure(s) to be used for
both memory bases.
- The double-mapping is needed so that we can still fetch instructions
from identity-mapped physical addresses after we program this table
into the MMU, then jump to the equivalent virtual address.
The kernel then unlinks the identity mapping before continuing,
the address space is purely virtual after that.
Because the set of page tables are linked together by physical address,
we must know a priori the physical address of each table. The linker
script must define a z_x86_pagetables_start symbol where the page
tables will be placed, and this memory address must not shift between
prebuilt and final ELF builds. This script will not work on systems
where the physical load address of the kernel is unknown at build time.
64-bit systems will always build IA-32e page tables. 32-bit systems
build PAE page tables if CONFIG_X86_PAE is set, otherwise standard
32-bit page tables are built.
The kernel will expect to find the top-level structure of the produced
page tables at the physical address corresponding to the symbol
z_x86_kernel_ptables. The linker script will need to set that symbol
to the end of the binary produced by this script, minus the size of the
top-level paging structure as it is written out last.
"""
import sys
import array
import argparse
import os
import struct
import elftools
import math
from distutils.version import LooseVersion
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
if LooseVersion(elftools.__version__) < LooseVersion('0.24'):
sys.exit("pyelftools is out of date, need version 0.24 or later")
def bit(pos):
return 1 << pos
# Page table entry flags
FLAG_P = bit(0)
FLAG_RW = bit(1)
FLAG_US = bit(2)
FLAG_G = bit(8)
FLAG_XD = bit(63)
def debug(text):
if not args.verbose:
return
sys.stdout.write(os.path.basename(sys.argv[0]) + ": " + text + "\n")
def error(text):
sys.exit(os.path.basename(sys.argv[0]) + ": " + text)
def align_check(base, size):
if (base % 4096) != 0:
error("unaligned base address %x" % base)
if (size % 4096) != 0:
error("Unaligned region size %d for base %x" % (size, base))
def dump_flags(flags):
ret = ""
if flags & FLAG_P:
ret += "P "
if flags & FLAG_RW:
ret += "RW "
if flags & FLAG_US:
ret += "US "
if flags & FLAG_G:
ret += "G "
if flags & FLAG_XD:
ret += "XD "
return ret.strip()
# Hard-coded flags for intermediate paging levels. Permissive, we only control
# access or set caching properties at leaf levels.
INT_FLAGS = FLAG_P | FLAG_RW | FLAG_US
class MMUTable(object):
"""Represents a particular table in a set of page tables, at any level"""
def __init__(self):
self.entries = array.array(self.type_code,
[0 for i in range(self.num_entries)])
def get_binary(self):
"""Return a bytearray representation of this table"""
# Always little-endian
ctype = "<" + self.type_code
entry_size = struct.calcsize(ctype)
ret = bytearray(entry_size * self.num_entries)
for i in range(self.num_entries):
struct.pack_into(ctype, ret, entry_size * i, self.entries[i])
return ret
@property
def supported_flags(self):
"""Class property indicating what flag bits are supported"""
raise NotImplementedError()
@property
def addr_shift(self):
"""Class property for how much to shift virtual addresses to obtain
the appropriate index in the table for it"""
raise NotImplementedError()
@property
def addr_mask(self):
"""Mask to apply to an individual entry to get the physical address
mapping"""
raise NotImplementedError()
@property
def type_code(self):
"""Struct packing letter code for table entries. Either I for
32-bit entries, or Q for PAE/IA-32e"""
raise NotImplementedError()
@property
def num_entries(self):
"""Number of entries in the table. Varies by table type and paging
mode"""
raise NotImplementedError()
def entry_index(self, virt_addr):
"""Get the index of the entry in this table that corresponds to the
provided virtual address"""
return (virt_addr >> self.addr_shift) & (self.num_entries - 1)
def has_entry(self, virt_addr):
"""Indicate whether an entry is present in this table for the provided
virtual address"""
index = self.entry_index(virt_addr)
return (self.entries[index] & FLAG_P) != 0
def lookup(self, virt_addr):
"""Look up the physical mapping for a virtual address.
If this is a leaf table, this is the physical address mapping. If not,
this is the physical address of the next level table"""
index = self.entry_index(virt_addr)
return self.entries[index] & self.addr_mask
def map(self, virt_addr, phys_addr, entry_flags):
"""For the table entry corresponding to the provided virtual address,
set the corresponding physical entry in the table. Unsupported flags
will be filtered out.
If this is a leaf table, this is the physical address mapping. If not,
this is the physical address of the next level table"""
index = self.entry_index(virt_addr)
self.entries[index] = ((phys_addr & self.addr_mask) |
(entry_flags & self.supported_flags))
def set_perms(self, virt_addr, entry_flags):
""""For the table entry corresponding to the provided virtual address,
update just the flags, leaving the physical mapping alone.
Unsupported flags will be filtered out."""
index = self.entry_index(virt_addr)
self.entries[index] = ((self.entries[index] & self.addr_mask) |
(entry_flags & self.supported_flags))
# Specific supported table types
class Pml4(MMUTable):
"""Page mapping level 4 for IA-32e"""
addr_shift = 39
addr_mask = 0x7FFFFFFFFFFFF000
type_code = 'Q'
num_entries = 512
supported_flags = INT_FLAGS
class Pdpt(MMUTable):
"""Page directory pointer table for IA-32e"""
addr_shift = 30
addr_mask = 0x7FFFFFFFFFFFF000
type_code = 'Q'
num_entries = 512
supported_flags = INT_FLAGS
class PdptPAE(Pdpt):
"""Page directory pointer table for PAE"""
num_entries = 4
class Pd(MMUTable):
"""Page directory for 32-bit"""
addr_shift = 22
addr_mask = 0xFFFFF000
type_code = 'I'
num_entries = 1024
supported_flags = INT_FLAGS
class PdXd(Pd):
"""Page directory for either PAE or IA-32e"""
addr_shift = 21
addr_mask = 0x7FFFFFFFFFFFF000
num_entries = 512
type_code = 'Q'
class Pt(MMUTable):
"""Page table for 32-bit"""
addr_shift = 12
addr_mask = 0xFFFFF000
type_code = 'I'
num_entries = 1024
supported_flags = FLAG_P | FLAG_RW | FLAG_US | FLAG_G
class PtXd(Pt):
"""Page table for either PAE or IA-32e"""
addr_mask = 0x07FFFFFFFFFFF000
type_code = 'Q'
num_entries = 512
supported_flags = FLAG_P | FLAG_RW | FLAG_US | FLAG_G | FLAG_XD
class PtableSet(object):
"""Represents a complete set of page tables for any paging mode"""
def __init__(self, pages_start):
"""Instantiate a set of page tables which will be located in the
image starting at the provided physical memory location"""
self.page_pos = pages_start
self.toplevel = self.levels[0]()
debug("%s starting at physical address 0x%x" %
(self.__class__.__name__, pages_start))
# Database of page table pages. Maps physical memory address to
# MMUTable objects, excluding the top-level table which is tracked
# separately. Starts out empty as we haven't mapped anything and
# the top-level table is tracked separately.
self.tables = {}
def get_new_mmutable_addr(self):
"""If we need to instantiate a new MMUTable, return a physical
address location for it"""
ret = self.page_pos
self.page_pos += 4096
return ret
@property
def levels(self):
"""Class hierarchy of paging levels, with the first entry being
the toplevel table class, and the last entry always being
some kind of leaf page table class (Pt or PtXd)"""
raise NotImplementedError()
def map_page(self, virt_addr, phys_addr, flags):
"""Map a virtual address to a physical address in the page tables,
with provided access flags"""
table = self.toplevel
# Create and link up intermediate tables if necessary
for depth in range(1, len(self.levels)):
# Create child table if needed
if not table.has_entry(virt_addr):
new_table_addr = self.get_new_mmutable_addr()
new_table = self.levels[depth]()
debug("new %s at physical addr 0x%x"
% (self.levels[depth].__name__, new_table_addr))
self.tables[new_table_addr] = new_table
table.map(virt_addr, new_table_addr, INT_FLAGS)
table = new_table
else:
table = self.tables[table.lookup(virt_addr)]
# Set up entry in leaf page table
table.map(virt_addr, phys_addr, flags)
def map(self, phys_base, virt_base, size, flags):
"""Identity map an address range in the page tables, with provided
access flags.
If the virt_base argument is not the same address as phys_base,
the same memory will be double mapped to the virt_base address.
"""
debug("Identity-mapping 0x%x (%d): %s" %
(phys_base, size, dump_flags(flags)))
skip_vm_map = virt_base is None or virt_base == phys_base
align_check(phys_base, size)
for addr in range(phys_base, phys_base + size, 4096):
if addr == 0 and skip_vm_map:
# Never map the NULL page
continue
self.map_page(addr, addr, flags)
if skip_vm_map:
return
# Find how much VM a top-level entry covers
scope = 1 << self.toplevel.addr_shift
debug("Double map %s entries with scope 0x%x" %
(self.toplevel.__class__.__name__, scope))
# Round bases down to the entry granularity
pd_virt_base = math.floor(virt_base / scope) * scope
pd_phys_base = math.floor(phys_base / scope) * scope
size = size + (phys_base - pd_phys_base)
# The base addresses have to line up such that they can be mapped
# by the same second-level table
if phys_base - pd_phys_base != virt_base - pd_virt_base:
error("mis-aligned virtual 0x%x and physical base addresses 0x%x" %
(virt_base, phys_base))
# Round size up to entry granularity
size = math.ceil(size / scope) * scope
for offset in range(0, size, scope):
cur_virt = pd_virt_base + offset
cur_phys = pd_phys_base + offset
# Get the physical address of the second-level table that identity
# maps the current chunk of physical memory
table_link_phys = self.toplevel.lookup(cur_phys)
debug("copy mappings 0x%x - 0x%x to 0x%x, using table 0x%x" %
(cur_phys, cur_phys + scope - 1, cur_virt, table_link_phys))
# Link that to the entry for the virtual mapping
self.toplevel.map(cur_virt, table_link_phys, INT_FLAGS)
def set_region_perms(self, name, flags):
"""Set access permissions for a named region that is already mapped
The bounds of the region will be looked up in the symbol table
with _start and _size suffixes. The physical address mapping
is unchanged and this will not disturb any double-mapping."""
# Doesn't matter if this is a virtual address, we have a
# either dual mapping or it's the same as physical
base = syms[name + "_start"]
size = syms[name + "_size"]
debug("change flags for %s at 0x%x (%d): %s" %
(name, base, size, dump_flags(flags)))
align_check(base, size)
try:
for addr in range(base, base + size, 4096):
# Never map the NULL page
if addr == 0:
continue
table = self.toplevel
for _ in range(1, len(self.levels)):
table = self.tables[table.lookup(addr)]
table.set_perms(addr, flags)
except KeyError:
error("no mapping for %s region 0x%x (size 0x%x)" %
(name, base, size))
def write_output(self, filename):
"""Write the page tables to the output file in binary format"""
with open(filename, "wb") as fp:
for addr in sorted(self.tables):
mmu_table = self.tables[addr]
fp.write(mmu_table.get_binary())
# We always have the top-level table be last. This is because
# in PAE, the top-level PDPT has only 4 entries and is not a
# full page in size. We do not put it in the tables dictionary
# and treat it as a special case.
debug("top-level %s at physical addr 0x%x" %
(self.toplevel.__class__.__name__,
self.get_new_mmutable_addr()))
fp.write(self.toplevel.get_binary())
# Paging mode classes, we'll use one depending on configuration
class Ptables32bit(PtableSet):
levels = [Pd, Pt]
class PtablesPAE(PtableSet):
levels = [PdptPAE, PdXd, PtXd]
class PtablesIA32e(PtableSet):
levels = [Pml4, Pdpt, PdXd, PtXd]
def parse_args():
global args
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-k", "--kernel", required=True,
help="path to prebuilt kernel ELF binary")
parser.add_argument("-o", "--output", required=True,
help="output file")
parser.add_argument("-v", "--verbose", action="store_true",
help="Print extra debugging information")
args = parser.parse_args()
if "VERBOSE" in os.environ:
args.verbose = True
def get_symbols(obj):
for section in obj.iter_sections():
if isinstance(section, SymbolTableSection):
return {sym.name: sym.entry.st_value
for sym in section.iter_symbols()}
raise LookupError("Could not find symbol table")
def isdef(sym_name):
return sym_name in syms
def main():
global syms
parse_args()
with open(args.kernel, "rb") as fp:
kernel = ELFFile(fp)
syms = get_symbols(kernel)
if isdef("CONFIG_X86_64"):
pclass = PtablesIA32e
elif isdef("CONFIG_X86_PAE"):
pclass = PtablesPAE
else:
pclass = Ptables32bit
debug("building %s" % pclass.__name__)
ram_base = syms["CONFIG_SRAM_BASE_ADDRESS"]
virt_base = syms["CONFIG_KERNEL_VM_BASE"]
ram_size = syms["CONFIG_SRAM_SIZE"] * 1024
ptables_virt = syms["z_x86_pagetables_start"]
debug("Base addresses: physical 0x%x virtual 0x%x size %d" %
(ram_base, virt_base, ram_size))
is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS")
ptables_phys = ptables_virt - (virt_base - ram_base)
if is_perm_regions:
# Don't allow execution by default for any pages. We'll adjust this
# in later calls to pt.set_region_perms()
map_flags = FLAG_P | FLAG_XD
else:
map_flags = FLAG_P
pt = pclass(ptables_phys)
pt.map(ram_base, virt_base, ram_size, map_flags | FLAG_RW)
if isdef("CONFIG_XIP"):
if virt_base != ram_base:
error("XIP and virtual memory are currently incompatible")
# Additionally identity-map all ROM as read-only
pt.map(syms["CONFIG_FLASH_BASE_ADDRESS"], None,
syms["CONFIG_FLASH_SIZE"] * 1024, map_flags)
# Adjust mapped region permissions if configured
if is_perm_regions:
# Need to accomplish the following things:
# - Text regions need the XD flag cleared and RW flag removed
# - Rodata regions need the RW flag cleared
# - User mode needs access as we currently do not separate application
# text/rodata from kernel text/rodata
pt.set_region_perms("_image_text", FLAG_P | FLAG_US)
pt.set_region_perms("_image_rodata", FLAG_P | FLAG_US | FLAG_XD)
if isdef("CONFIG_COVERAGE_GCOV") and isdef("CONFIG_USERSPACE"):
# If GCOV is enabled, user mode must be able to write to its
# common data area
pt.set_region_perms("__gcov_bss",
FLAG_P | FLAG_RW | FLAG_US | FLAG_XD)
if isdef("CONFIG_X86_64"):
# Set appropriate permissions for locore areas much like we did
# with the main text/rodata regions
if isdef("CONFIG_X86_KPTI"):
# Set the User bit for the read-only locore/lorodata areas.
# This ensures they get mapped into the User page tables if
# KPTI is turned on. There is no sensitive data in them, and
# they contain text/data needed to take an exception or
# interrupt.
flag_user = FLAG_US
else:
flag_user = 0
pt.set_region_perms("_locore", FLAG_P | flag_user)
pt.set_region_perms("_lorodata", FLAG_P | FLAG_XD | flag_user)
pt.write_output(args.output)
if __name__ == "__main__":
main()