/* * Copyright 2023 Google LLC * SPDX-License-Identifier: Apache-2.0 */ #include #include #include #include #include #include #ifdef CONFIG_USERSPACE BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) && (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0); #endif #define ASID_INVALID 0 struct tlb_regs { uint32_t rasid; uint32_t ptevaddr; uint32_t ptepin_as; uint32_t ptepin_at; uint32_t vecpin_as; uint32_t vecpin_at; }; static void compute_regs(uint32_t user_asid, uint32_t *l1_page, struct tlb_regs *regs) { uint32_t vecbase = XTENSA_RSR("VECBASE"); __ASSERT_NO_MSG((((uint32_t)l1_page) & 0xfff) == 0); __ASSERT_NO_MSG((user_asid == 0) || ((user_asid > 2) && (user_asid < XTENSA_MMU_SHARED_ASID))); /* We don't use ring 1, ring 0 ASID must be 1 */ regs->rasid = (XTENSA_MMU_SHARED_ASID << 24) | (user_asid << 16) | 0x000201; /* Derive PTEVADDR from ASID so each domain gets its own PTE area */ regs->ptevaddr = CONFIG_XTENSA_MMU_PTEVADDR + user_asid * 0x400000; /* The ptables code doesn't add the mapping for the l1 page itself */ l1_page[XTENSA_MMU_L1_POS(regs->ptevaddr)] = (uint32_t)l1_page | XTENSA_MMU_PAGE_TABLE_ATTR; regs->ptepin_at = (uint32_t)l1_page; regs->ptepin_as = XTENSA_MMU_PTE_ENTRY_VADDR(regs->ptevaddr, regs->ptevaddr) | XTENSA_MMU_PTE_WAY; /* Pin mapping for refilling the vector address into the ITLB * (for handling TLB miss exceptions). Note: this is NOT an * instruction TLB entry for the vector code itself, it's a * DATA TLB entry for the page containing the vector mapping * so the refill on instruction fetch can find it. The * hardware doesn't have a 4k pinnable instruction TLB way, * frustratingly. */ uint32_t vb_pte = l1_page[XTENSA_MMU_L1_POS(vecbase)]; regs->vecpin_at = vb_pte; regs->vecpin_as = XTENSA_MMU_PTE_ENTRY_VADDR(regs->ptevaddr, vecbase) | XTENSA_MMU_VECBASE_WAY; } /* Switch to a new page table. There are four items we have to set in * the hardware: the PTE virtual address, the ring/ASID mapping * register, and two pinned entries in the data TLB handling refills * for the page tables and the vector handlers. * * These can be done in any order, provided that we ensure that no * memory access which cause a TLB miss can happen during the process. * This means that we must work entirely within registers in a single * asm block. Also note that instruction fetches are memory accesses * too, which means we cannot cross a page boundary which might reach * a new page not in the TLB (a single jump to an aligned address that * holds our five instructions is sufficient to guarantee that: I * couldn't think of a way to do the alignment statically that also * interoperated well with inline assembly). */ void xtensa_set_paging(uint32_t user_asid, uint32_t *l1_page) { /* Optimization note: the registers computed here are pure * functions of the two arguments. With a minor API tweak, * they could be cached in e.g. a thread struct instead of * being recomputed. This is called on context switch paths * and is performance-sensitive. */ struct tlb_regs regs; compute_regs(user_asid, l1_page, ®s); __asm__ volatile("j 1f\n" ".align 16\n" /* enough for 5 insns */ "1:\n" "wsr %0, PTEVADDR\n" "wsr %1, RASID\n" "wdtlb %2, %3\n" "wdtlb %4, %5\n" "isync" :: "r"(regs.ptevaddr), "r"(regs.rasid), "r"(regs.ptepin_at), "r"(regs.ptepin_as), "r"(regs.vecpin_at), "r"(regs.vecpin_as)); } /* This is effectively the same algorithm from xtensa_set_paging(), * but it also disables the hardware-initialized 512M TLB entries in * way 6 (because the hardware disallows duplicate TLB mappings). For * instruction fetches this produces a critical ordering constraint: * the instruction following the invalidation of ITLB entry mapping * the current PC will by definition create a refill condition, which * will (because the data TLB was invalidated) cause a refill * exception. Therefore this step must be the very last one, once * everything else is setup up and working, which includes the * invalidation of the virtual PTEVADDR area so that the resulting * refill can complete. * * Note that we can't guarantee that the compiler won't insert a data * fetch from our stack memory after exit from the asm block (while it * might be double-mapped), so we invalidate that data TLB inside the * asm for correctness. The other 13 entries get invalidated in a C * loop at the end. */ void xtensa_init_paging(uint32_t *l1_page) { extern char z_xt_init_pc; /* defined in asm below */ struct tlb_regs regs; unsigned int initial_rasid; /* The initial rasid after hardware initialization is 0x04030201. * 1 is hardwired to ring 0, other slots must be different * from each other and must not be 0. * * For our initial implementation we just set the 4th slot (ring 3), * to use the ASID value used for memory that is shared with all threads. */ initial_rasid = 0xff030201; #if CONFIG_MP_MAX_NUM_CPUS > 1 /* The incoherent cache can get into terrible trouble if it's * allowed to cache PTEs differently across CPUs. We require * that all page tables supplied by the OS have exclusively * uncached mappings for page data, but can't do anything * about earlier code/firmware. Dump the cache to be safe. */ sys_cache_data_flush_and_invd_all(); #endif compute_regs(ASID_INVALID, l1_page, ®s); uint32_t idtlb_pte = (regs.ptevaddr & 0xe0000000) | XCHAL_SPANNING_WAY; uint32_t idtlb_stk = (((uint32_t)®s) & ~0xfff) | XCHAL_SPANNING_WAY; uint32_t iitlb_pc = (((uint32_t)&z_xt_init_pc) & ~0xfff) | XCHAL_SPANNING_WAY; /* Note: the jump is mostly pedantry, as it's almost * inconceivable that a hardware memory region at boot is * going to cross a 512M page boundary. But we need the entry * symbol to get the address above, so the jump is here for * symmetry with the set_paging() code. */ __asm__ volatile("j z_xt_init_pc\n" ".align 32\n" /* room for 10 insns */ ".globl z_xt_init_pc\n" "z_xt_init_pc:\n" "wsr %0, PTEVADDR\n" "wsr %1, RASID\n" "wdtlb %2, %3\n" "wdtlb %4, %5\n" "idtlb %6\n" /* invalidate pte */ "idtlb %7\n" /* invalidate stk */ "isync\n" "iitlb %8\n" /* invalidate pc */ "isync\n" /* <--- traps a ITLB miss */ :: "r"(regs.ptevaddr), "r"(initial_rasid), "r"(regs.ptepin_at), "r"(regs.ptepin_as), "r"(regs.vecpin_at), "r"(regs.vecpin_as), "r"(idtlb_pte), "r"(idtlb_stk), "r"(iitlb_pc)); /* Invalidate the remaining (unused by this function) * initialization entries. Now we're flying free with our own * page table. */ for (uint32_t i = 0; i < 8; i++) { uint32_t ixtlb = (i * 0x20000000) | XCHAL_SPANNING_WAY; if (ixtlb != iitlb_pc) { __asm__ volatile("iitlb %0" :: "r"(ixtlb)); } if (ixtlb != idtlb_stk && ixtlb != idtlb_pte) { __asm__ volatile("idtlb %0" :: "r"(ixtlb)); } } __asm__ volatile("isync"); }