/* * Copyright (c) 2017, Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ #include #include #include #if defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR) #include #endif /* * xtensa_spill_reg_windows * * Spill all register windows. Not a C function, enter this via CALL0 * (so you have to save off A0, but no other registers need to be * spilled). On return, all registers not part of the current * function will be spilled to memory. The WINDOWSTART SR will have a * single 1 bit corresponding to the current frame at WINDOWBASE. */ .global xtensa_spill_reg_windows .align 4 xtensa_spill_reg_windows: SPILL_ALL_WINDOWS ret /* * xtensa_save_high_regs * * Call with CALL0, with A2/A3 available as scratch. Pushes the high * A4-A15 GPRs to the stack if needed (i.e. if those registers are not * part of wrapped-around frames higher up the call stack), returning * to the caller with the stack pointer HAVING BEEN MODIFIED to * contain them. */ .global xtensa_save_high_regs .align 4 xtensa_save_high_regs: /* Generate a rotated (modulo NREGS/4 bits!) WINDOWSTART in A2 * by duplicating the bits twice and shifting down by WINDOWBASE * bits. Now the LSB is the register quad at WINDOWBASE. */ rsr a2, WINDOWSTART slli a3, a2, (XCHAL_NUM_AREGS / 4) or a2, a2, a3 rsr a3, WINDOWBASE ssr a3 srl a2, a2 mov a3, a1 /* Stash our original stack pointer */ /* For the next three bits in WINDOWSTART (which correspond to * the A4-A7, A8-A11 and A12-A15 quads), if we find a one, * that means that the quad is owned by a wrapped-around call * in the registers, so we don't need to spill it or any * further registers from the GPRs and can skip to the end. */ bbsi a2, 1, _high_gpr_spill_done addi a1, a1, -16 s32i a4, a1, 0 s32i a5, a1, 4 s32i a6, a1, 8 s32i a7, a1, 12 bbsi a2, 2, _high_gpr_spill_done addi a1, a1, -16 s32i a8, a1, 0 s32i a9, a1, 4 s32i a10, a1, 8 s32i a11, a1, 12 bbsi a2, 3, _high_gpr_spill_done addi a1, a1, -16 s32i a12, a1, 0 s32i a13, a1, 4 s32i a14, a1, 8 s32i a15, a1, 12 _high_gpr_spill_done: /* Push the original stack pointer so we know at restore * time how many registers were spilled, then return, leaving the * modified SP in A1. */ addi a1, a1, -4 s32i a3, a1, 0 ret /* * xtensa_restore_high_regs * * Does the inverse of xtensa_save_high_regs, taking a stack pointer * in A1 that resulted and restoring the A4-A15 state (and the stack * pointer) to the state they had at the earlier call. Call with * CALL0, leaving A2/A3 available as scratch. */ .global xtensa_restore_high_regs .align 4 xtensa_restore_high_regs: /* pop our "original" stack pointer into a2, stash in a3 also */ l32i a2, a1, 0 addi a1, a1, 4 mov a3, a2 beq a1, a2, _high_restore_done addi a2, a2, -16 l32i a4, a2, 0 l32i a5, a2, 4 l32i a6, a2, 8 l32i a7, a2, 12 beq a1, a2, _high_restore_done addi a2, a2, -16 l32i a8, a2, 0 l32i a9, a2, 4 l32i a10, a2, 8 l32i a11, a2, 12 beq a1, a2, _high_restore_done addi a2, a2, -16 l32i a12, a2, 0 l32i a13, a2, 4 l32i a14, a2, 8 l32i a15, a2, 12 _high_restore_done: mov a1, a3 /* Original stack */ ret /* * _restore_context * * Arrive here via a jump. Enters into the restored context and does * not return. A1 should have a context pointer in it as received * from switch or an interrupt exit. Interrupts must be disabled, * and register windows should have been spilled. * * Note that exit from the restore is done with the RFI instruction, * using the EPCn/EPSn registers. Those will have been saved already * by any interrupt entry so they are save to use. Note that EPC1 and * RFE are NOT usable (they can't preserve PS). Per the ISA spec, all * RFI levels do the same thing and differ only in the special * registers used to hold PC/PS, but Qemu has been observed to behave * strangely when RFI doesn't "return" to a INTLEVEL strictly lower * than it started from. So we leverage the zsr.h framework to pick * the highest level available for our specific platform. */ .global _restore_context _restore_context: call0 xtensa_restore_high_regs l32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET wsr a0, ZSR_EPC l32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET wsr a0, ZSR_EPS #if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) FPU_REG_RESTORE #endif #if defined(CONFIG_XTENSA_HIFI_SHARING) .extern _xtensa_hifi_load call0 _xtensa_hifi_load #endif l32i a0, a1, ___xtensa_irq_bsa_t_sar_OFFSET wsr a0, SAR #if XCHAL_HAVE_LOOPS l32i a0, a1, ___xtensa_irq_bsa_t_lbeg_OFFSET wsr a0, LBEG l32i a0, a1, ___xtensa_irq_bsa_t_lend_OFFSET wsr a0, LEND l32i a0, a1, ___xtensa_irq_bsa_t_lcount_OFFSET wsr a0, LCOUNT #endif #if XCHAL_HAVE_S32C1I l32i a0, a1, ___xtensa_irq_bsa_t_scompare1_OFFSET wsr a0, SCOMPARE1 #endif #if XCHAL_HAVE_THREADPTR && \ (defined(CONFIG_USERSPACE) || defined(CONFIG_THREAD_LOCAL_STORAGE)) l32i a0, a1, ___xtensa_irq_bsa_t_threadptr_OFFSET wur a0, THREADPTR #endif rsync l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET l32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET l32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF rfi ZSR_RFI_LEVEL /* * void xtensa_arch_except(int reason_p); * * Implements hardware exception for Xtensa ARCH_EXCEPT to save * interrupted stack frame and reason_p for use in exception handler * and coredump */ .global xtensa_arch_except .global xtensa_arch_except_epc .align 4 xtensa_arch_except: entry a1, 16 xtensa_arch_except_epc: ill retw /* * void xtensa_arch_kernel_oops(int reason_p, void *ssf); * * Simply to raise hardware exception for Kernel OOPS. */ .global xtensa_arch_kernel_oops .global xtensa_arch_kernel_oops_epc .align 4 xtensa_arch_kernel_oops: entry a1, 16 xtensa_arch_kernel_oops_epc: ill retw /* * void xtensa_switch(void *new, void **old_return); * * Context switches into the previously-saved "new" handle, placing * the saved "old" handle into the address provided by old_return. */ .global xtensa_switch .align 4 xtensa_switch: entry a1, 16 SPILL_ALL_WINDOWS addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF /* Stash our A0/2/3 and the shift/loop registers into the base * save area so they get restored as they are now. A2/A3 * don't actually get used post-restore, but they need to be * stashed across the xtensa_save_high_regs call and this is a * convenient place. */ s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET ODD_REG_SAVE /* Stash our PS register contents and a "restore" PC. */ rsr a0, PS s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET movi a0, _switch_restore_pc s32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET #if defined(CONFIG_XTENSA_HIFI_SHARING) call0 _xtensa_hifi_save #endif /* Now the high registers */ call0 xtensa_save_high_regs #ifdef CONFIG_KERNEL_COHERENCE /* Flush the stack. The top of stack was stored for us by * arch_cohere_stacks(). It can be NULL for a dummy thread. */ rsr a0, ZSR_FLUSH beqz a0, noflush mov a3, a1 flushloop: dhwb a3, 0 addi a3, a3, XCHAL_DCACHE_LINESIZE blt a3, a0, flushloop noflush: #endif /* Restore the A3 argument we spilled earlier (via the base * save pointer pushed at the bottom of the stack) and set the * stack to the "new" context out of the A2 spill slot. */ l32i a2, a1, 0 l32i a3, a2, ___xtensa_irq_bsa_t_a3_OFFSET s32i a1, a3, 0 #ifdef CONFIG_USERSPACE /* Switch page tables */ rsr a6, ZSR_CPU l32i a6, a6, ___cpu_t_current_OFFSET #ifdef CONFIG_XTENSA_MMU call4 xtensa_swap_update_page_tables #endif #ifdef CONFIG_XTENSA_MPU call4 xtensa_mpu_map_write #endif l32i a2, a3, 0 l32i a2, a2, 0 #endif /* Switch stack pointer and restore. The jump to * _restore_context does not return as such, but we arrange * for the restored "next" address to be immediately after for * sanity. */ l32i a1, a2, ___xtensa_irq_bsa_t_a2_OFFSET #ifdef CONFIG_INSTRUMENT_THREAD_SWITCHING call4 z_thread_mark_switched_in #endif j _restore_context _switch_restore_pc: retw /* Define our entry handler to load the struct kernel_t from the * MISC0 special register, and to find the nest and irq_stack values * at the precomputed offsets. */ .align 4 _handle_excint: EXCINT_HANDLER ___cpu_t_nested_OFFSET, ___cpu_t_irq_stack_OFFSET /* Define the actual vectors for the hardware-defined levels with * DEF_EXCINT. These load a C handler address and jump to our handler * above. */ DEF_EXCINT 1, _handle_excint, xtensa_excint1_c /* In code below we are using XCHAL_NMILEVEL and XCHAL_NUM_INTLEVELS * (whichever is higher), since not all Xtensa configurations support * NMI. In such case we will use XCHAL_NUM_INTLEVELS. */ #if XCHAL_HAVE_NMI #define MAX_INTR_LEVEL XCHAL_NMILEVEL #elif XCHAL_HAVE_INTERRUPTS #define MAX_INTR_LEVEL XCHAL_NUM_INTLEVELS #else #error Xtensa core with no interrupt support is used #define MAX_INTR_LEVEL 0 #endif #if MAX_INTR_LEVEL >= 2 #if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 2)) DEF_EXCINT 2, _handle_excint, xtensa_int2_c #endif #endif #if MAX_INTR_LEVEL >= 3 #if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 3)) DEF_EXCINT 3, _handle_excint, xtensa_int3_c #endif #endif #if MAX_INTR_LEVEL >= 4 #if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 4)) DEF_EXCINT 4, _handle_excint, xtensa_int4_c #endif #endif #if MAX_INTR_LEVEL >= 5 #if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 5)) DEF_EXCINT 5, _handle_excint, xtensa_int5_c #endif #endif #if MAX_INTR_LEVEL >= 6 #if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 6)) DEF_EXCINT 6, _handle_excint, xtensa_int6_c #endif #endif #if MAX_INTR_LEVEL >= 7 #if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 7)) DEF_EXCINT 7, _handle_excint, xtensa_int7_c #endif #endif #if defined(CONFIG_GDBSTUB) DEF_EXCINT XCHAL_DEBUGLEVEL, _handle_excint, xtensa_debugint_c #endif /* The user exception vector is defined here, as we need to handle * MOVSP exceptions in assembly (the result has to be to unspill the * caller function of the code that took the exception, and that can't * be done in C). A prototype exists which mucks with the stack frame * from the C handler instead, but that would add a LARGE overhead to * some alloca() calls (those whent he caller has been spilled) just * to save these five cycles during other exceptions and L1 * interrupts. Maybe revisit at some point, with better benchmarking. * Note that _xt_alloca_exc is Xtensa-authored code which expects A0 * to have been saved to EXCSAVE1, we've modified it to use the zsr.h * API to get assigned a scratch register. */ .pushsection .UserExceptionVector.text, "ax" .global _Level1RealVector _Level1RealVector: wsr a0, ZSR_A0SAVE rsync rsr.exccause a0 #ifdef CONFIG_XTENSA_MMU beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_user #endif /* CONFIG_XTENSA_MMU */ #ifdef CONFIG_USERSPACE beqi a0, EXCCAUSE_SYSCALL, _syscall #endif /* CONFIG_USERSPACE */ #ifdef CONFIG_XTENSA_MMU addi a0, a0, -EXCCAUSE_DTLB_MISS beqz a0, _handle_tlb_miss_user rsr.exccause a0 #endif /* CONFIG_XTENSA_MMU */ bnei a0, EXCCAUSE_ALLOCA, _not_alloca j _xt_alloca_exc _not_alloca: rsr a0, ZSR_A0SAVE j _Level1Vector #ifdef CONFIG_XTENSA_MMU _handle_tlb_miss_user: /** * Handle TLB miss by loading the PTE page: * The way it works is, when we try to access an address that is not * mapped, we will have a miss. The HW then will try to get the * correspondent memory in the page table. As the page table is not * mapped in memory we will have a second miss, which will trigger * an exception. In the exception (here) what we do is to exploit * this hardware capability just trying to load the page table * (not mapped address), which will cause a miss, but then the hardware * will automatically map it again from the page table. This time * it will work since the page necessary to map the page table itself * are wired map. */ rsr.ptevaddr a0 l32i a0, a0, 0 rsr a0, ZSR_A0SAVE rfe #endif /* CONFIG_XTENSA_MMU */ #ifdef CONFIG_USERSPACE _syscall: rsr a0, ZSR_A0SAVE j xtensa_do_syscall #endif /* CONFIG_USERSPACE */ .popsection /* In theory you can have levels up to 15, but known hardware only uses 7. */ #if XCHAL_NMILEVEL > 7 #error More interrupts than expected. #endif /* We don't actually use "kernel mode" currently. Populate the vector * out of simple caution in case app code clears the UM bit by mistake. */ .pushsection .KernelExceptionVector.text, "ax" .global _KernelExceptionVector _KernelExceptionVector: #ifdef CONFIG_XTENSA_MMU wsr a0, ZSR_A0SAVE rsr.exccause a0 beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_kernel addi a0, a0, -EXCCAUSE_DTLB_MISS beqz a0, _handle_tlb_miss_kernel rsr a0, ZSR_A0SAVE #endif j _Level1Vector #ifdef CONFIG_XTENSA_MMU _handle_tlb_miss_kernel: /* The TLB miss handling is used only during xtensa_mmu_init() * where vecbase is at a different address, as the offset used * in the jump ('j') instruction will not jump to correct * address (... remember the vecbase is moved). * So we handle TLB misses in a very simple way here until * we move back to using UserExceptionVector above. */ rsr.ptevaddr a0 l32i a0, a0, 0 rsr a0, ZSR_A0SAVE rfe #endif .popsection #ifdef XCHAL_DOUBLEEXC_VECTOR_VADDR .pushsection .DoubleExceptionVector.text, "ax" .global _DoubleExceptionVector _DoubleExceptionVector: #ifdef CONFIG_XTENSA_MMU wsr a0, ZSR_DBLEXC rsync rsr.exccause a0 addi a0, a0, -EXCCAUSE_DTLB_MISS beqz a0, _handle_tlb_miss_dblexc rsr a0, ZSR_DBLEXC j _Level1Vector #else #if defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR) 1: /* Tell simulator to stop executing here, instead of trying to do * an infinite loop (see below). Greatly help with using tracing in * simulator so that traces will not have infinite iterations of * jumps. */ movi a3, 1 movi a2, SYS_exit simcall #elif XCHAL_HAVE_DEBUG /* Signals an unhandled double exception */ 1: break 1, 4 #else 1: #endif j 1b #endif /* CONFIG_XTENSA_MMU */ #ifdef CONFIG_XTENSA_MMU _handle_tlb_miss_dblexc: /* Handle all data TLB misses here. * These data TLB misses are mostly caused by preloading * page table entries in the level 1 exception handler. * Failure to load the PTE will result in another exception * with different failure (exccause), which can be handled * when the CPU re-enters the double exception handler. */ rsr.ptevaddr a0 l32i a0, a0, 0 rsr a0, ZSR_DBLEXC rfde #endif .popsection #endif