x86: add new page table generation script
This produces a set of page tables with system RAM mapped for read/write/execute access by supervisor mode, such that it may be installed in the CPU in the earliest boot stages and mutable at runtime. These tables optionally support a dual physical/virtual mapping of RAM to help boot virtual memory systems. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
bcc69f944d
commit
e9d15451b1
1 changed files with 556 additions and 0 deletions
556
arch/x86/gen_mmu.py
Executable file
556
arch/x86/gen_mmu.py
Executable file
|
@ -0,0 +1,556 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""Create the kernel's page tables for x86 CPUs.
|
||||
|
||||
For additional detail on paging and x86 memory management, please
|
||||
consult the IA Architecture SW Developer Manual, volume 3a, chapter 4.
|
||||
|
||||
This script produces the initial page tables installed into the CPU
|
||||
at early boot. These pages will have an identity mapping at
|
||||
CONFIG_SRAM_BASE_ADDRESS of size CONFIG_SRAM_SIZE. The script takes
|
||||
the 'zephyr_prebuilt.elf' as input to obtain region sizes, certain
|
||||
memory addresses, and configuration values.
|
||||
|
||||
If CONFIG_SRAM_REGION_PERMISSIONS is not enabled, all RAM will be
|
||||
mapped with the Present and Write bits set. The linker scripts shouldn't
|
||||
add page alignment padding between sections.
|
||||
|
||||
If CONFIG_SRAM_REGION_PERMISSIONS is enabled, the access permissions
|
||||
vary:
|
||||
- By default, the Present, Write, and Execute Disable bits are
|
||||
set.
|
||||
- The _image_text region will have Present and User bits set
|
||||
- The _image_rodata region will have Present, User, and Execute
|
||||
Disable bits set
|
||||
- On x86_64, the _locore region will have Present set and
|
||||
the _lorodata region will have Present and Execute Disable set.
|
||||
|
||||
This script will establish a dual mapping at the address defined by
|
||||
CONFIG_KERNEL_VM_BASE if it is not the same as CONFIG_SRAM_BASE_ADDRESS.
|
||||
|
||||
- The double-mapping is used to transition the
|
||||
instruction pointer from a physical address at early boot to the
|
||||
virtual address where the kernel is actually linked.
|
||||
|
||||
- The mapping is always double-mapped at the top-level paging structure
|
||||
and the physical/virtual base addresses must have the same alignment
|
||||
with respect to the scope of top-level paging structure entries.
|
||||
This allows the same second-level paging structure(s) to be used for
|
||||
both memory bases.
|
||||
|
||||
- The double-mapping is needed so that we can still fetch instructions
|
||||
from identity-mapped physical addresses after we program this table
|
||||
into the MMU, then jump to the equivalent virtual address.
|
||||
The kernel then unlinks the identity mapping before continuing,
|
||||
the address space is purely virtual after that.
|
||||
|
||||
Because the set of page tables are linked together by physical address,
|
||||
we must know a priori the physical address of each table. The linker
|
||||
script must define a z_x86_pagetables_start symbol where the page
|
||||
tables will be placed, and this memory address must not shift between
|
||||
prebuilt and final ELF builds. This script will not work on systems
|
||||
where the physical load address of the kernel is unknown at build time.
|
||||
|
||||
64-bit systems will always build IA-32e page tables. 32-bit systems
|
||||
build PAE page tables if CONFIG_X86_PAE is set, otherwise standard
|
||||
32-bit page tables are built.
|
||||
|
||||
The kernel will expect to find the top-level structure of the produced
|
||||
page tables at the physical address corresponding to the symbol
|
||||
z_x86_kernel_ptables. The linker script will need to set that symbol
|
||||
to the end of the binary produced by this script, minus the size of the
|
||||
top-level paging structure as it is written out last.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import array
|
||||
import argparse
|
||||
import os
|
||||
import struct
|
||||
import elftools
|
||||
import math
|
||||
from distutils.version import LooseVersion
|
||||
from elftools.elf.elffile import ELFFile
|
||||
from elftools.elf.sections import SymbolTableSection
|
||||
|
||||
if LooseVersion(elftools.__version__) < LooseVersion('0.24'):
|
||||
sys.exit("pyelftools is out of date, need version 0.24 or later")
|
||||
|
||||
|
||||
def bit(pos):
|
||||
return 1 << pos
|
||||
|
||||
|
||||
# Page table entry flags
|
||||
FLAG_P = bit(0)
|
||||
FLAG_RW = bit(1)
|
||||
FLAG_US = bit(2)
|
||||
FLAG_G = bit(8)
|
||||
FLAG_XD = bit(63)
|
||||
|
||||
|
||||
def debug(text):
|
||||
if not args.verbose:
|
||||
return
|
||||
sys.stdout.write(os.path.basename(sys.argv[0]) + ": " + text + "\n")
|
||||
|
||||
|
||||
def error(text):
|
||||
sys.exit(os.path.basename(sys.argv[0]) + ": " + text)
|
||||
|
||||
|
||||
def align_check(base, size):
|
||||
if (base % 4096) != 0:
|
||||
error("unaligned base address %x" % base)
|
||||
if (size % 4096) != 0:
|
||||
error("Unaligned region size %d for base %x" % (size, base))
|
||||
|
||||
|
||||
def dump_flags(flags):
|
||||
ret = ""
|
||||
|
||||
if flags & FLAG_P:
|
||||
ret += "P "
|
||||
|
||||
if flags & FLAG_RW:
|
||||
ret += "RW "
|
||||
|
||||
if flags & FLAG_US:
|
||||
ret += "US "
|
||||
|
||||
if flags & FLAG_G:
|
||||
ret += "G "
|
||||
|
||||
if flags & FLAG_XD:
|
||||
ret += "XD "
|
||||
|
||||
return ret.strip()
|
||||
|
||||
# Hard-coded flags for intermediate paging levels. Permissive, we only control
|
||||
# access or set caching properties at leaf levels.
|
||||
INT_FLAGS = FLAG_P | FLAG_RW | FLAG_US
|
||||
|
||||
class MMUTable(object):
|
||||
"""Represents a particular table in a set of page tables, at any level"""
|
||||
|
||||
def __init__(self):
|
||||
self.entries = array.array(self.type_code,
|
||||
[0 for i in range(self.num_entries)])
|
||||
|
||||
def get_binary(self):
|
||||
"""Return a bytearray representation of this table"""
|
||||
# Always little-endian
|
||||
ctype = "<" + self.type_code
|
||||
entry_size = struct.calcsize(ctype)
|
||||
ret = bytearray(entry_size * self.num_entries)
|
||||
|
||||
for i in range(self.num_entries):
|
||||
struct.pack_into(ctype, ret, entry_size * i, self.entries[i])
|
||||
return ret
|
||||
|
||||
@property
|
||||
def supported_flags(self):
|
||||
"""Class property indicating what flag bits are supported"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def addr_shift(self):
|
||||
"""Class property for how much to shift virtual addresses to obtain
|
||||
the appropriate index in the table for it"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def addr_mask(self):
|
||||
"""Mask to apply to an individual entry to get the physical address
|
||||
mapping"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def type_code(self):
|
||||
"""Struct packing letter code for table entries. Either I for
|
||||
32-bit entries, or Q for PAE/IA-32e"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def num_entries(self):
|
||||
"""Number of entries in the table. Varies by table type and paging
|
||||
mode"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def entry_index(self, virt_addr):
|
||||
"""Get the index of the entry in this table that corresponds to the
|
||||
provided virtual address"""
|
||||
return (virt_addr >> self.addr_shift) & (self.num_entries - 1)
|
||||
|
||||
def has_entry(self, virt_addr):
|
||||
"""Indicate whether an entry is present in this table for the provided
|
||||
virtual address"""
|
||||
index = self.entry_index(virt_addr)
|
||||
|
||||
return (self.entries[index] & FLAG_P) != 0
|
||||
|
||||
def lookup(self, virt_addr):
|
||||
"""Look up the physical mapping for a virtual address.
|
||||
|
||||
If this is a leaf table, this is the physical address mapping. If not,
|
||||
this is the physical address of the next level table"""
|
||||
index = self.entry_index(virt_addr)
|
||||
|
||||
return self.entries[index] & self.addr_mask
|
||||
|
||||
def map(self, virt_addr, phys_addr, entry_flags):
|
||||
"""For the table entry corresponding to the provided virtual address,
|
||||
set the corresponding physical entry in the table. Unsupported flags
|
||||
will be filtered out.
|
||||
|
||||
If this is a leaf table, this is the physical address mapping. If not,
|
||||
this is the physical address of the next level table"""
|
||||
index = self.entry_index(virt_addr)
|
||||
|
||||
self.entries[index] = ((phys_addr & self.addr_mask) |
|
||||
(entry_flags & self.supported_flags))
|
||||
|
||||
def set_perms(self, virt_addr, entry_flags):
|
||||
""""For the table entry corresponding to the provided virtual address,
|
||||
update just the flags, leaving the physical mapping alone.
|
||||
Unsupported flags will be filtered out."""
|
||||
index = self.entry_index(virt_addr)
|
||||
|
||||
self.entries[index] = ((self.entries[index] & self.addr_mask) |
|
||||
(entry_flags & self.supported_flags))
|
||||
|
||||
|
||||
# Specific supported table types
|
||||
class Pml4(MMUTable):
|
||||
"""Page mapping level 4 for IA-32e"""
|
||||
addr_shift = 39
|
||||
addr_mask = 0x7FFFFFFFFFFFF000
|
||||
type_code = 'Q'
|
||||
num_entries = 512
|
||||
supported_flags = INT_FLAGS
|
||||
|
||||
class Pdpt(MMUTable):
|
||||
"""Page directory pointer table for IA-32e"""
|
||||
addr_shift = 30
|
||||
addr_mask = 0x7FFFFFFFFFFFF000
|
||||
type_code = 'Q'
|
||||
num_entries = 512
|
||||
supported_flags = INT_FLAGS
|
||||
|
||||
class PdptPAE(Pdpt):
|
||||
"""Page directory pointer table for PAE"""
|
||||
num_entries = 4
|
||||
|
||||
class Pd(MMUTable):
|
||||
"""Page directory for 32-bit"""
|
||||
addr_shift = 22
|
||||
addr_mask = 0xFFFFF000
|
||||
type_code = 'I'
|
||||
num_entries = 1024
|
||||
supported_flags = INT_FLAGS
|
||||
|
||||
class PdXd(Pd):
|
||||
"""Page directory for either PAE or IA-32e"""
|
||||
addr_shift = 21
|
||||
addr_mask = 0x7FFFFFFFFFFFF000
|
||||
num_entries = 512
|
||||
type_code = 'Q'
|
||||
|
||||
class Pt(MMUTable):
|
||||
"""Page table for 32-bit"""
|
||||
addr_shift = 12
|
||||
addr_mask = 0xFFFFF000
|
||||
type_code = 'I'
|
||||
num_entries = 1024
|
||||
supported_flags = FLAG_P | FLAG_RW | FLAG_US | FLAG_G
|
||||
|
||||
class PtXd(Pt):
|
||||
"""Page table for either PAE or IA-32e"""
|
||||
addr_mask = 0x07FFFFFFFFFFF000
|
||||
type_code = 'Q'
|
||||
num_entries = 512
|
||||
supported_flags = FLAG_P | FLAG_RW | FLAG_US | FLAG_G | FLAG_XD
|
||||
|
||||
|
||||
class PtableSet(object):
|
||||
"""Represents a complete set of page tables for any paging mode"""
|
||||
|
||||
def __init__(self, pages_start):
|
||||
"""Instantiate a set of page tables which will be located in the
|
||||
image starting at the provided physical memory location"""
|
||||
self.page_pos = pages_start
|
||||
self.toplevel = self.levels[0]()
|
||||
|
||||
debug("%s starting at physical address 0x%x" %
|
||||
(self.__class__.__name__, pages_start))
|
||||
|
||||
# Database of page table pages. Maps physical memory address to
|
||||
# MMUTable objects, excluding the top-level table which is tracked
|
||||
# separately. Starts out empty as we haven't mapped anything and
|
||||
# the top-level table is tracked separately.
|
||||
self.tables = {}
|
||||
|
||||
def get_new_mmutable_addr(self):
|
||||
"""If we need to instantiate a new MMUTable, return a physical
|
||||
address location for it"""
|
||||
ret = self.page_pos
|
||||
self.page_pos += 4096
|
||||
return ret
|
||||
|
||||
@property
|
||||
def levels(self):
|
||||
"""Class hierarchy of paging levels, with the first entry being
|
||||
the toplevel table class, and the last entry always being
|
||||
some kind of leaf page table class (Pt or PtXd)"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def map_page(self, virt_addr, phys_addr, flags):
|
||||
"""Map a virtual address to a physical address in the page tables,
|
||||
with provided access flags"""
|
||||
table = self.toplevel
|
||||
|
||||
# Create and link up intermediate tables if necessary
|
||||
for depth in range(1, len(self.levels)):
|
||||
# Create child table if needed
|
||||
if not table.has_entry(virt_addr):
|
||||
new_table_addr = self.get_new_mmutable_addr()
|
||||
new_table = self.levels[depth]()
|
||||
debug("new %s at physical addr 0x%x"
|
||||
% (self.levels[depth].__name__, new_table_addr))
|
||||
self.tables[new_table_addr] = new_table
|
||||
table.map(virt_addr, new_table_addr, INT_FLAGS)
|
||||
table = new_table
|
||||
else:
|
||||
table = self.tables[table.lookup(virt_addr)]
|
||||
|
||||
# Set up entry in leaf page table
|
||||
table.map(virt_addr, phys_addr, flags)
|
||||
|
||||
def map(self, phys_base, virt_base, size, flags):
|
||||
"""Identity map an address range in the page tables, with provided
|
||||
access flags.
|
||||
|
||||
If the virt_base argument is not the same address as phys_base,
|
||||
the same memory will be double mapped to the virt_base address.
|
||||
"""
|
||||
debug("Identity-mapping 0x%x (%d): %s" %
|
||||
(phys_base, size, dump_flags(flags)))
|
||||
|
||||
skip_vm_map = virt_base is None or virt_base == phys_base
|
||||
|
||||
align_check(phys_base, size)
|
||||
for addr in range(phys_base, phys_base + size, 4096):
|
||||
if addr == 0 and skip_vm_map:
|
||||
# Never map the NULL page
|
||||
continue
|
||||
self.map_page(addr, addr, flags)
|
||||
|
||||
if skip_vm_map:
|
||||
return
|
||||
|
||||
# Find how much VM a top-level entry covers
|
||||
scope = 1 << self.toplevel.addr_shift
|
||||
debug("Double map %s entries with scope 0x%x" %
|
||||
(self.toplevel.__class__.__name__, scope))
|
||||
|
||||
# Round bases down to the entry granularity
|
||||
pd_virt_base = math.floor(virt_base / scope) * scope
|
||||
pd_phys_base = math.floor(phys_base / scope) * scope
|
||||
size = size + (phys_base - pd_phys_base)
|
||||
|
||||
# The base addresses have to line up such that they can be mapped
|
||||
# by the same second-level table
|
||||
if phys_base - pd_phys_base != virt_base - pd_virt_base:
|
||||
error("mis-aligned virtual 0x%x and physical base addresses 0x%x" %
|
||||
(virt_base, phys_base))
|
||||
|
||||
# Round size up to entry granularity
|
||||
size = math.ceil(size / scope) * scope
|
||||
|
||||
for offset in range(0, size, scope):
|
||||
cur_virt = pd_virt_base + offset
|
||||
cur_phys = pd_phys_base + offset
|
||||
|
||||
# Get the physical address of the second-level table that identity
|
||||
# maps the current chunk of physical memory
|
||||
table_link_phys = self.toplevel.lookup(cur_phys)
|
||||
|
||||
debug("copy mappings 0x%x - 0x%x to 0x%x, using table 0x%x" %
|
||||
(cur_phys, cur_phys + scope - 1, cur_virt, table_link_phys))
|
||||
|
||||
# Link that to the entry for the virtual mapping
|
||||
self.toplevel.map(cur_virt, table_link_phys, INT_FLAGS)
|
||||
|
||||
def set_region_perms(self, name, flags):
|
||||
"""Set access permissions for a named region that is already mapped
|
||||
|
||||
The bounds of the region will be looked up in the symbol table
|
||||
with _start and _size suffixes. The physical address mapping
|
||||
is unchanged and this will not disturb any double-mapping."""
|
||||
|
||||
# Doesn't matter if this is a virtual address, we have a
|
||||
# either dual mapping or it's the same as physical
|
||||
base = syms[name + "_start"]
|
||||
size = syms[name + "_size"]
|
||||
|
||||
debug("change flags for %s at 0x%x (%d): %s" %
|
||||
(name, base, size, dump_flags(flags)))
|
||||
align_check(base, size)
|
||||
|
||||
try:
|
||||
for addr in range(base, base + size, 4096):
|
||||
# Never map the NULL page
|
||||
if addr == 0:
|
||||
continue
|
||||
|
||||
table = self.toplevel
|
||||
for _ in range(1, len(self.levels)):
|
||||
table = self.tables[table.lookup(addr)]
|
||||
table.set_perms(addr, flags)
|
||||
except KeyError:
|
||||
error("no mapping for %s region 0x%x (size 0x%x)" %
|
||||
(name, base, size))
|
||||
|
||||
def write_output(self, filename):
|
||||
"""Write the page tables to the output file in binary format"""
|
||||
with open(filename, "wb") as fp:
|
||||
for addr in sorted(self.tables):
|
||||
mmu_table = self.tables[addr]
|
||||
fp.write(mmu_table.get_binary())
|
||||
|
||||
# We always have the top-level table be last. This is because
|
||||
# in PAE, the top-level PDPT has only 4 entries and is not a
|
||||
# full page in size. We do not put it in the tables dictionary
|
||||
# and treat it as a special case.
|
||||
debug("top-level %s at physical addr 0x%x" %
|
||||
(self.toplevel.__class__.__name__,
|
||||
self.get_new_mmutable_addr()))
|
||||
fp.write(self.toplevel.get_binary())
|
||||
|
||||
# Paging mode classes, we'll use one depending on configuration
|
||||
class Ptables32bit(PtableSet):
|
||||
levels = [Pd, Pt]
|
||||
|
||||
class PtablesPAE(PtableSet):
|
||||
levels = [PdptPAE, PdXd, PtXd]
|
||||
|
||||
class PtablesIA32e(PtableSet):
|
||||
levels = [Pml4, Pdpt, PdXd, PtXd]
|
||||
|
||||
|
||||
def parse_args():
|
||||
global args
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
|
||||
parser.add_argument("-k", "--kernel", required=True,
|
||||
help="path to prebuilt kernel ELF binary")
|
||||
parser.add_argument("-o", "--output", required=True,
|
||||
help="output file")
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
help="Print extra debugging information")
|
||||
args = parser.parse_args()
|
||||
if "VERBOSE" in os.environ:
|
||||
args.verbose = True
|
||||
|
||||
|
||||
def get_symbols(obj):
|
||||
for section in obj.iter_sections():
|
||||
if isinstance(section, SymbolTableSection):
|
||||
return {sym.name: sym.entry.st_value
|
||||
for sym in section.iter_symbols()}
|
||||
|
||||
raise LookupError("Could not find symbol table")
|
||||
|
||||
def isdef(sym_name):
|
||||
return sym_name in syms
|
||||
|
||||
def main():
|
||||
global syms
|
||||
parse_args()
|
||||
|
||||
with open(args.kernel, "rb") as fp:
|
||||
kernel = ELFFile(fp)
|
||||
syms = get_symbols(kernel)
|
||||
|
||||
if isdef("CONFIG_X86_64"):
|
||||
pclass = PtablesIA32e
|
||||
elif isdef("CONFIG_X86_PAE"):
|
||||
pclass = PtablesPAE
|
||||
else:
|
||||
pclass = Ptables32bit
|
||||
|
||||
debug("building %s" % pclass.__name__)
|
||||
|
||||
ram_base = syms["CONFIG_SRAM_BASE_ADDRESS"]
|
||||
virt_base = syms["CONFIG_KERNEL_VM_BASE"]
|
||||
ram_size = syms["CONFIG_SRAM_SIZE"] * 1024
|
||||
ptables_virt = syms["z_x86_pagetables_start"]
|
||||
|
||||
debug("Base addresses: physical 0x%x virtual 0x%x size %d" %
|
||||
(ram_base, virt_base, ram_size))
|
||||
|
||||
is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS")
|
||||
|
||||
ptables_phys = ptables_virt - (virt_base - ram_base)
|
||||
|
||||
if is_perm_regions:
|
||||
# Don't allow execution by default for any pages. We'll adjust this
|
||||
# in later calls to pt.set_region_perms()
|
||||
map_flags = FLAG_P | FLAG_XD
|
||||
else:
|
||||
map_flags = FLAG_P
|
||||
|
||||
pt = pclass(ptables_phys)
|
||||
pt.map(ram_base, virt_base, ram_size, map_flags | FLAG_RW)
|
||||
|
||||
if isdef("CONFIG_XIP"):
|
||||
if virt_base != ram_base:
|
||||
error("XIP and virtual memory are currently incompatible")
|
||||
|
||||
# Additionally identity-map all ROM as read-only
|
||||
pt.map(syms["CONFIG_FLASH_BASE_ADDRESS"], None,
|
||||
syms["CONFIG_FLASH_SIZE"] * 1024, map_flags)
|
||||
|
||||
# Adjust mapped region permissions if configured
|
||||
if is_perm_regions:
|
||||
# Need to accomplish the following things:
|
||||
# - Text regions need the XD flag cleared and RW flag removed
|
||||
# - Rodata regions need the RW flag cleared
|
||||
# - User mode needs access as we currently do not separate application
|
||||
# text/rodata from kernel text/rodata
|
||||
pt.set_region_perms("_image_text", FLAG_P | FLAG_US)
|
||||
pt.set_region_perms("_image_rodata", FLAG_P | FLAG_US | FLAG_XD)
|
||||
|
||||
if isdef("CONFIG_COVERAGE_GCOV") and isdef("CONFIG_USERSPACE"):
|
||||
# If GCOV is enabled, user mode must be able to write to its
|
||||
# common data area
|
||||
pt.set_region_perms("__gcov_bss",
|
||||
FLAG_P | FLAG_RW | FLAG_US | FLAG_XD)
|
||||
|
||||
if isdef("CONFIG_X86_64"):
|
||||
# Set appropriate permissions for locore areas much like we did
|
||||
# with the main text/rodata regions
|
||||
|
||||
if isdef("CONFIG_X86_KPTI"):
|
||||
# Set the User bit for the read-only locore/lorodata areas.
|
||||
# This ensures they get mapped into the User page tables if
|
||||
# KPTI is turned on. There is no sensitive data in them, and
|
||||
# they contain text/data needed to take an exception or
|
||||
# interrupt.
|
||||
flag_user = FLAG_US
|
||||
else:
|
||||
flag_user = 0
|
||||
|
||||
pt.set_region_perms("_locore", FLAG_P | flag_user)
|
||||
pt.set_region_perms("_lorodata", FLAG_P | FLAG_XD | flag_user)
|
||||
|
||||
pt.write_output(args.output)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue