x86: reserve room for per-thread page tables

Currently page tables have to be re-computed in
an expensive operation on context switch. Here we
reserve some room in the page tables such that
we can have per-thread page table data, which will
be much simpler to update on context switch at
the expense of memory.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
Andrew Boie 2019-07-24 17:25:56 -07:00 committed by Carles Cufí
commit 26dccaabcb
3 changed files with 115 additions and 25 deletions

View file

@ -79,8 +79,8 @@ void z_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
#endif /* CONFIG_X86_USERSPACE */ #endif /* CONFIG_X86_USERSPACE */
#if CONFIG_X86_STACK_PROTECTION #if CONFIG_X86_STACK_PROTECTION
z_x86_mmu_set_flags(&z_x86_kernel_pdpt, stack, MMU_PAGE_SIZE, z_x86_mmu_set_flags(&z_x86_kernel_pdpt, stack + Z_X86_THREAD_PT_AREA,
MMU_ENTRY_READ, MMU_PTE_RW_MASK); MMU_PAGE_SIZE, MMU_ENTRY_READ, MMU_PTE_RW_MASK);
#endif #endif
stack_high = (char *)STACK_ROUND_DOWN(stack_buf + stack_size); stack_high = (char *)STACK_ROUND_DOWN(stack_buf + stack_size);

View file

@ -476,13 +476,21 @@ union x86_mmu_pte {
}; };
}; };
#define Z_X86_NUM_PDPT_ENTRIES 4
#define Z_X86_NUM_PD_ENTRIES 512
#define Z_X86_NUM_PT_ENTRIES 512
/* Memory range covered by an instance of various table types */
#define Z_X86_PT_AREA (MMU_PAGE_SIZE * Z_X86_NUM_PT_ENTRIES)
#define Z_X86_PD_AREA (Z_X86_PT_AREA * Z_X86_NUM_PD_ENTRIES)
#define Z_X86_PDPT_AREA (Z_X86_PD_AREA * Z_X86_NUM_PDPT_ENTRIES)
typedef u64_t x86_page_entry_data_t; typedef u64_t x86_page_entry_data_t;
typedef x86_page_entry_data_t k_mem_partition_attr_t; typedef x86_page_entry_data_t k_mem_partition_attr_t;
struct x86_mmu_pdpt { struct x86_mmu_pdpt {
union x86_mmu_pdpte entry[4]; union x86_mmu_pdpte entry[Z_X86_NUM_PDPT_ENTRIES];
}; };
union x86_mmu_pde { union x86_mmu_pde {
@ -491,11 +499,11 @@ union x86_mmu_pde {
}; };
struct x86_mmu_pd { struct x86_mmu_pd {
union x86_mmu_pde entry[512]; union x86_mmu_pde entry[Z_X86_NUM_PD_ENTRIES];
}; };
struct x86_mmu_pt { struct x86_mmu_pt {
union x86_mmu_pte entry[512]; union x86_mmu_pte entry[Z_X86_NUM_PT_ENTRIES];
}; };
#endif /* _ASMLANGUAGE */ #endif /* _ASMLANGUAGE */

View file

@ -22,6 +22,7 @@
#include <ia32/mmustructs.h> #include <ia32/mmustructs.h>
#include <stdbool.h> #include <stdbool.h>
#include <arch/common/ffs.h> #include <arch/common/ffs.h>
#include <misc/util.h>
#ifndef _ASMLANGUAGE #ifndef _ASMLANGUAGE
#include <arch/common/addr_types.h> #include <arch/common/addr_types.h>
@ -571,41 +572,122 @@ extern u32_t z_timer_cycle_get_32(void);
extern struct task_state_segment _main_tss; extern struct task_state_segment _main_tss;
#endif #endif
#ifdef CONFIG_USERSPACE
/* We need a set of page tables for each thread in the system which runs in
* user mode. For each thread, we have:
*
* - a toplevel PDPT
* - a set of page directories for the memory range covered by system RAM
* - a set of page tbales for the memory range covered by system RAM
*
* Directories and tables for memory ranges outside of system RAM will be
* shared and not thread-specific.
*
* NOTE: We are operating under the assumption that memory domain partitions
* will not be configured which grant permission to address ranges outside
* of system RAM.
*
* Each of these page tables will be programmed to reflect the memory
* permission policy for that thread, which will be the union of:
*
* - The boot time memory regions (text, rodata, and so forth)
* - The thread's stack buffer
* - Partitions in the memory domain configuration (if a member of a
* memory domain)
*
* The PDPT is fairly small singleton on x86 PAE (32 bytes) and also must
* be aligned to 32 bytes, so we place it at the highest addresses of the
* page reserved for the privilege elevation stack.
*
* The page directories and tables require page alignment so we put them as
* additional fields in the stack object, using the below macros to compute how
* many pages we need.
*/
/* Define a range [Z_X86_PT_START, Z_X86_PT_END) which is the memory range
* covered by all the page tables needed for system RAM
*/
#define Z_X86_PT_START ((u32_t)ROUND_DOWN(DT_PHYS_RAM_ADDR, Z_X86_PT_AREA))
#define Z_X86_PT_END ((u32_t)ROUND_UP(DT_PHYS_RAM_ADDR + \
(DT_RAM_SIZE * 1024U), \
Z_X86_PT_AREA))
/* Number of page tables needed to cover system RAM. Depends on the specific
* bounds of system RAM, but roughly 1 page table per 2MB of RAM */
#define Z_X86_NUM_PT ((Z_X86_PT_END - Z_X86_PT_START) / Z_X86_PT_AREA)
/* Same semantics as above, but for the page directories needed to cover
* system RAM.
*/
#define Z_X86_PD_START ((u32_t)ROUND_DOWN(DT_PHYS_RAM_ADDR, Z_X86_PD_AREA))
#define Z_X86_PD_END ((u32_t)ROUND_UP(DT_PHYS_RAM_ADDR + \
(DT_RAM_SIZE * 1024U), \
Z_X86_PD_AREA))
/* Number of page directories needed to cover system RAM. Depends on the
* specific bounds of system RAM, but roughly 1 page directory per 1GB of RAM */
#define Z_X86_NUM_PD ((Z_X86_PD_END - Z_X86_PD_START) / Z_X86_PD_AREA)
/* Number of pages we need to reserve in the stack for per-thread page tables */
#define Z_X86_NUM_TABLE_PAGES (Z_X86_NUM_PT + Z_X86_NUM_PD)
#else
/* If we're not implementing user mode, then the MMU tables don't get changed
* on context switch and we don't need any per-thread page tables
*/
#define Z_X86_NUM_TABLE_PAGES 0U
#endif /* CONFIG_USERSPACE */
#define Z_X86_THREAD_PT_AREA (Z_X86_NUM_TABLE_PAGES * MMU_PAGE_SIZE)
#if defined(CONFIG_HW_STACK_PROTECTION) && defined(CONFIG_USERSPACE) #if defined(CONFIG_HW_STACK_PROTECTION) && defined(CONFIG_USERSPACE)
/* With both hardware stack protection and userspace enabled, stacks are /* With both hardware stack protection and userspace enabled, stacks are
* arranged as follows: * arranged as follows:
* *
* High memory addresses * High memory addresses
* +---------------+ * +-----------------------------------------+
* | Thread stack | * | Thread stack (varies) |
* +---------------+ * +-----------------------------------------+
* | Kernel stack | * | PDPT (32 bytes) |
* +---------------+ * | Privilege elevation stack (4064 bytes) |
* | Guard page | * +-----------------------------------------+
* +---------------+ * | Guard page (4096 bytes) |
* +-----------------------------------------+
* | User page tables (Z_X86_THREAD_PT_AREA) |
* +-----------------------------------------+
* Low Memory addresses * Low Memory addresses
* *
* Kernel stacks are fixed at 4K. All the pages containing the thread stack * Privilege elevation stacks are fixed-size. All the pages containing the
* are marked as user-accessible. * thread stack are marked as user-accessible. The guard page is marked
* All threads start in supervisor mode, and the kernel stack/guard page * read-only to catch stack overflows in supervisor mode.
* are both marked non-present in the MMU. *
* If a thread drops down to user mode, the kernel stack page will be marked * If a thread starts in supervisor mode, the page containing the PDPT and
* as present, supervior-only, and the _main_tss.esp0 field updated to point * privilege elevation stack is also marked read-only.
* to the top of it. *
* All context switches will save/restore the esp0 field in the TSS. * If a thread starts in, or drops down to user mode, the privilege stack page
* will be marked as present, supervior-only. The PDPT will be initialized and
* used as the active page tables when that thread is active.
*
* If KPTI is not enabled, the _main_tss.esp0 field will always be updated
* updated to point to the top of the privilege elevation stack. Otherwise
* _main_tss.esp0 always points to the trampoline stack, which handles the
* page table switch to the kernel PDPT and transplants context to the
* privileged mode stack.
*
* TODO: The stack object layout is getting rather complex. We should define
* its layout in a struct definition, rather than doing math in the kernel
* code to find the parts we want or to obtain sizes.
*/ */
#define Z_ARCH_THREAD_STACK_RESERVED (MMU_PAGE_SIZE * 2) #define Z_ARCH_THREAD_STACK_RESERVED (MMU_PAGE_SIZE * (2 + Z_X86_NUM_TABLE_PAGES))
#define _STACK_BASE_ALIGN MMU_PAGE_SIZE #define _STACK_BASE_ALIGN MMU_PAGE_SIZE
#elif defined(CONFIG_HW_STACK_PROTECTION) || defined(CONFIG_USERSPACE) #elif defined(CONFIG_HW_STACK_PROTECTION) || defined(CONFIG_USERSPACE)
/* If only one of HW stack protection or userspace is enabled, then the /* If only one of HW stack protection or userspace is enabled, then the
* stack will be preceded by one page which is a guard page or a kernel mode * stack will be preceded by one page which is a guard page or a kernel mode
* stack, respectively. * stack, respectively.
*/ */
#define Z_ARCH_THREAD_STACK_RESERVED MMU_PAGE_SIZE #define Z_ARCH_THREAD_STACK_RESERVED (MMU_PAGE_SIZE * (1 + Z_X86_NUM_TABLE_PAGES))
#define _STACK_BASE_ALIGN MMU_PAGE_SIZE #define _STACK_BASE_ALIGN MMU_PAGE_SIZE
#else /* Neither feature */ #else /* Neither feature */
#define Z_ARCH_THREAD_STACK_RESERVED 0 #define Z_ARCH_THREAD_STACK_RESERVED 0
#define _STACK_BASE_ALIGN STACK_ALIGN #define _STACK_BASE_ALIGN STACK_ALIGN
#endif #endif
#ifdef CONFIG_USERSPACE #ifdef CONFIG_USERSPACE