x86: reserve room for per-thread page tables
Currently page tables have to be re-computed in an expensive operation on context switch. Here we reserve some room in the page tables such that we can have per-thread page table data, which will be much simpler to update on context switch at the expense of memory. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
76310f6896
commit
26dccaabcb
3 changed files with 115 additions and 25 deletions
|
@ -79,8 +79,8 @@ void z_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
|||
#endif /* CONFIG_X86_USERSPACE */
|
||||
|
||||
#if CONFIG_X86_STACK_PROTECTION
|
||||
z_x86_mmu_set_flags(&z_x86_kernel_pdpt, stack, MMU_PAGE_SIZE,
|
||||
MMU_ENTRY_READ, MMU_PTE_RW_MASK);
|
||||
z_x86_mmu_set_flags(&z_x86_kernel_pdpt, stack + Z_X86_THREAD_PT_AREA,
|
||||
MMU_PAGE_SIZE, MMU_ENTRY_READ, MMU_PTE_RW_MASK);
|
||||
#endif
|
||||
|
||||
stack_high = (char *)STACK_ROUND_DOWN(stack_buf + stack_size);
|
||||
|
|
|
@ -476,13 +476,21 @@ union x86_mmu_pte {
|
|||
};
|
||||
};
|
||||
|
||||
#define Z_X86_NUM_PDPT_ENTRIES 4
|
||||
#define Z_X86_NUM_PD_ENTRIES 512
|
||||
#define Z_X86_NUM_PT_ENTRIES 512
|
||||
|
||||
/* Memory range covered by an instance of various table types */
|
||||
#define Z_X86_PT_AREA (MMU_PAGE_SIZE * Z_X86_NUM_PT_ENTRIES)
|
||||
#define Z_X86_PD_AREA (Z_X86_PT_AREA * Z_X86_NUM_PD_ENTRIES)
|
||||
#define Z_X86_PDPT_AREA (Z_X86_PD_AREA * Z_X86_NUM_PDPT_ENTRIES)
|
||||
|
||||
typedef u64_t x86_page_entry_data_t;
|
||||
|
||||
typedef x86_page_entry_data_t k_mem_partition_attr_t;
|
||||
|
||||
struct x86_mmu_pdpt {
|
||||
union x86_mmu_pdpte entry[4];
|
||||
union x86_mmu_pdpte entry[Z_X86_NUM_PDPT_ENTRIES];
|
||||
};
|
||||
|
||||
union x86_mmu_pde {
|
||||
|
@ -491,11 +499,11 @@ union x86_mmu_pde {
|
|||
};
|
||||
|
||||
struct x86_mmu_pd {
|
||||
union x86_mmu_pde entry[512];
|
||||
union x86_mmu_pde entry[Z_X86_NUM_PD_ENTRIES];
|
||||
};
|
||||
|
||||
struct x86_mmu_pt {
|
||||
union x86_mmu_pte entry[512];
|
||||
union x86_mmu_pte entry[Z_X86_NUM_PT_ENTRIES];
|
||||
};
|
||||
|
||||
#endif /* _ASMLANGUAGE */
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <ia32/mmustructs.h>
|
||||
#include <stdbool.h>
|
||||
#include <arch/common/ffs.h>
|
||||
#include <misc/util.h>
|
||||
|
||||
#ifndef _ASMLANGUAGE
|
||||
#include <arch/common/addr_types.h>
|
||||
|
@ -571,37 +572,118 @@ extern u32_t z_timer_cycle_get_32(void);
|
|||
extern struct task_state_segment _main_tss;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* We need a set of page tables for each thread in the system which runs in
|
||||
* user mode. For each thread, we have:
|
||||
*
|
||||
* - a toplevel PDPT
|
||||
* - a set of page directories for the memory range covered by system RAM
|
||||
* - a set of page tbales for the memory range covered by system RAM
|
||||
*
|
||||
* Directories and tables for memory ranges outside of system RAM will be
|
||||
* shared and not thread-specific.
|
||||
*
|
||||
* NOTE: We are operating under the assumption that memory domain partitions
|
||||
* will not be configured which grant permission to address ranges outside
|
||||
* of system RAM.
|
||||
*
|
||||
* Each of these page tables will be programmed to reflect the memory
|
||||
* permission policy for that thread, which will be the union of:
|
||||
*
|
||||
* - The boot time memory regions (text, rodata, and so forth)
|
||||
* - The thread's stack buffer
|
||||
* - Partitions in the memory domain configuration (if a member of a
|
||||
* memory domain)
|
||||
*
|
||||
* The PDPT is fairly small singleton on x86 PAE (32 bytes) and also must
|
||||
* be aligned to 32 bytes, so we place it at the highest addresses of the
|
||||
* page reserved for the privilege elevation stack.
|
||||
*
|
||||
* The page directories and tables require page alignment so we put them as
|
||||
* additional fields in the stack object, using the below macros to compute how
|
||||
* many pages we need.
|
||||
*/
|
||||
|
||||
/* Define a range [Z_X86_PT_START, Z_X86_PT_END) which is the memory range
|
||||
* covered by all the page tables needed for system RAM
|
||||
*/
|
||||
#define Z_X86_PT_START ((u32_t)ROUND_DOWN(DT_PHYS_RAM_ADDR, Z_X86_PT_AREA))
|
||||
#define Z_X86_PT_END ((u32_t)ROUND_UP(DT_PHYS_RAM_ADDR + \
|
||||
(DT_RAM_SIZE * 1024U), \
|
||||
Z_X86_PT_AREA))
|
||||
|
||||
/* Number of page tables needed to cover system RAM. Depends on the specific
|
||||
* bounds of system RAM, but roughly 1 page table per 2MB of RAM */
|
||||
#define Z_X86_NUM_PT ((Z_X86_PT_END - Z_X86_PT_START) / Z_X86_PT_AREA)
|
||||
|
||||
/* Same semantics as above, but for the page directories needed to cover
|
||||
* system RAM.
|
||||
*/
|
||||
#define Z_X86_PD_START ((u32_t)ROUND_DOWN(DT_PHYS_RAM_ADDR, Z_X86_PD_AREA))
|
||||
#define Z_X86_PD_END ((u32_t)ROUND_UP(DT_PHYS_RAM_ADDR + \
|
||||
(DT_RAM_SIZE * 1024U), \
|
||||
Z_X86_PD_AREA))
|
||||
/* Number of page directories needed to cover system RAM. Depends on the
|
||||
* specific bounds of system RAM, but roughly 1 page directory per 1GB of RAM */
|
||||
#define Z_X86_NUM_PD ((Z_X86_PD_END - Z_X86_PD_START) / Z_X86_PD_AREA)
|
||||
|
||||
/* Number of pages we need to reserve in the stack for per-thread page tables */
|
||||
#define Z_X86_NUM_TABLE_PAGES (Z_X86_NUM_PT + Z_X86_NUM_PD)
|
||||
#else
|
||||
/* If we're not implementing user mode, then the MMU tables don't get changed
|
||||
* on context switch and we don't need any per-thread page tables
|
||||
*/
|
||||
#define Z_X86_NUM_TABLE_PAGES 0U
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
#define Z_X86_THREAD_PT_AREA (Z_X86_NUM_TABLE_PAGES * MMU_PAGE_SIZE)
|
||||
|
||||
#if defined(CONFIG_HW_STACK_PROTECTION) && defined(CONFIG_USERSPACE)
|
||||
/* With both hardware stack protection and userspace enabled, stacks are
|
||||
* arranged as follows:
|
||||
*
|
||||
* High memory addresses
|
||||
* +---------------+
|
||||
* | Thread stack |
|
||||
* +---------------+
|
||||
* | Kernel stack |
|
||||
* +---------------+
|
||||
* | Guard page |
|
||||
* +---------------+
|
||||
* +-----------------------------------------+
|
||||
* | Thread stack (varies) |
|
||||
* +-----------------------------------------+
|
||||
* | PDPT (32 bytes) |
|
||||
* | Privilege elevation stack (4064 bytes) |
|
||||
* +-----------------------------------------+
|
||||
* | Guard page (4096 bytes) |
|
||||
* +-----------------------------------------+
|
||||
* | User page tables (Z_X86_THREAD_PT_AREA) |
|
||||
* +-----------------------------------------+
|
||||
* Low Memory addresses
|
||||
*
|
||||
* Kernel stacks are fixed at 4K. All the pages containing the thread stack
|
||||
* are marked as user-accessible.
|
||||
* All threads start in supervisor mode, and the kernel stack/guard page
|
||||
* are both marked non-present in the MMU.
|
||||
* If a thread drops down to user mode, the kernel stack page will be marked
|
||||
* as present, supervior-only, and the _main_tss.esp0 field updated to point
|
||||
* to the top of it.
|
||||
* All context switches will save/restore the esp0 field in the TSS.
|
||||
* Privilege elevation stacks are fixed-size. All the pages containing the
|
||||
* thread stack are marked as user-accessible. The guard page is marked
|
||||
* read-only to catch stack overflows in supervisor mode.
|
||||
*
|
||||
* If a thread starts in supervisor mode, the page containing the PDPT and
|
||||
* privilege elevation stack is also marked read-only.
|
||||
*
|
||||
* If a thread starts in, or drops down to user mode, the privilege stack page
|
||||
* will be marked as present, supervior-only. The PDPT will be initialized and
|
||||
* used as the active page tables when that thread is active.
|
||||
*
|
||||
* If KPTI is not enabled, the _main_tss.esp0 field will always be updated
|
||||
* updated to point to the top of the privilege elevation stack. Otherwise
|
||||
* _main_tss.esp0 always points to the trampoline stack, which handles the
|
||||
* page table switch to the kernel PDPT and transplants context to the
|
||||
* privileged mode stack.
|
||||
*
|
||||
* TODO: The stack object layout is getting rather complex. We should define
|
||||
* its layout in a struct definition, rather than doing math in the kernel
|
||||
* code to find the parts we want or to obtain sizes.
|
||||
*/
|
||||
#define Z_ARCH_THREAD_STACK_RESERVED (MMU_PAGE_SIZE * 2)
|
||||
#define Z_ARCH_THREAD_STACK_RESERVED (MMU_PAGE_SIZE * (2 + Z_X86_NUM_TABLE_PAGES))
|
||||
#define _STACK_BASE_ALIGN MMU_PAGE_SIZE
|
||||
#elif defined(CONFIG_HW_STACK_PROTECTION) || defined(CONFIG_USERSPACE)
|
||||
/* If only one of HW stack protection or userspace is enabled, then the
|
||||
* stack will be preceded by one page which is a guard page or a kernel mode
|
||||
* stack, respectively.
|
||||
*/
|
||||
#define Z_ARCH_THREAD_STACK_RESERVED MMU_PAGE_SIZE
|
||||
#define Z_ARCH_THREAD_STACK_RESERVED (MMU_PAGE_SIZE * (1 + Z_X86_NUM_TABLE_PAGES))
|
||||
#define _STACK_BASE_ALIGN MMU_PAGE_SIZE
|
||||
#else /* Neither feature */
|
||||
#define Z_ARCH_THREAD_STACK_RESERVED 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue