zephyr/arch/xtensa/core/xtensa_asm2_util.S
Yong Cong Sin bbe5e1e6eb build: namespace the generated headers with zephyr/
Namespaced the generated headers with `zephyr` to prevent
potential conflict with other headers.

Introduce a temporary Kconfig `LEGACY_GENERATED_INCLUDE_PATH`
that is enabled by default. This allows the developers to
continue the use of the old include paths for the time being
until it is deprecated and eventually removed. The Kconfig will
generate a build-time warning message, similar to the
`CONFIG_TIMER_RANDOM_GENERATOR`.

Updated the includes path of in-tree sources accordingly.

Most of the changes here are scripted, check the PR for more
info.

Signed-off-by: Yong Cong Sin <ycsin@meta.com>
2024-05-28 22:03:55 +02:00

534 lines
14 KiB
ArmAsm

/*
* Copyright (c) 2017, Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <xtensa_asm2_s.h>
#include <zephyr/offsets.h>
#include <zephyr/zsr.h>
#if defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR)
#include <xtensa/simcall.h>
#endif
/*
* xtensa_spill_reg_windows
*
* Spill all register windows. Not a C function, enter this via CALL0
* (so you have to save off A0, but no other registers need to be
* spilled). On return, all registers not part of the current
* function will be spilled to memory. The WINDOWSTART SR will have a
* single 1 bit corresponding to the current frame at WINDOWBASE.
*/
.global xtensa_spill_reg_windows
.align 4
xtensa_spill_reg_windows:
SPILL_ALL_WINDOWS
ret
/*
* xtensa_save_high_regs
*
* Call with CALL0, with A2/A3 available as scratch. Pushes the high
* A4-A15 GPRs to the stack if needed (i.e. if those registers are not
* part of wrapped-around frames higher up the call stack), returning
* to the caller with the stack pointer HAVING BEEN MODIFIED to
* contain them.
*/
.global xtensa_save_high_regs
.align 4
xtensa_save_high_regs:
/* Generate a rotated (modulo NREGS/4 bits!) WINDOWSTART in A2
* by duplicating the bits twice and shifting down by WINDOWBASE
* bits. Now the LSB is the register quad at WINDOWBASE.
*/
rsr a2, WINDOWSTART
slli a3, a2, (XCHAL_NUM_AREGS / 4)
or a2, a2, a3
rsr a3, WINDOWBASE
ssr a3
srl a2, a2
mov a3, a1 /* Stash our original stack pointer */
/* For the next three bits in WINDOWSTART (which correspond to
* the A4-A7, A8-A11 and A12-A15 quads), if we find a one,
* that means that the quad is owned by a wrapped-around call
* in the registers, so we don't need to spill it or any
* further registers from the GPRs and can skip to the end.
*/
bbsi a2, 1, _high_gpr_spill_done
addi a1, a1, -16
s32i a4, a1, 0
s32i a5, a1, 4
s32i a6, a1, 8
s32i a7, a1, 12
bbsi a2, 2, _high_gpr_spill_done
addi a1, a1, -16
s32i a8, a1, 0
s32i a9, a1, 4
s32i a10, a1, 8
s32i a11, a1, 12
bbsi a2, 3, _high_gpr_spill_done
addi a1, a1, -16
s32i a12, a1, 0
s32i a13, a1, 4
s32i a14, a1, 8
s32i a15, a1, 12
_high_gpr_spill_done:
/* Push the original stack pointer so we know at restore
* time how many registers were spilled, then return, leaving the
* modified SP in A1.
*/
addi a1, a1, -4
s32i a3, a1, 0
ret
/*
* xtensa_restore_high_regs
*
* Does the inverse of xtensa_save_high_regs, taking a stack pointer
* in A1 that resulted and restoring the A4-A15 state (and the stack
* pointer) to the state they had at the earlier call. Call with
* CALL0, leaving A2/A3 available as scratch.
*/
.global xtensa_restore_high_regs
.align 4
xtensa_restore_high_regs:
/* pop our "original" stack pointer into a2, stash in a3 also */
l32i a2, a1, 0
addi a1, a1, 4
mov a3, a2
beq a1, a2, _high_restore_done
addi a2, a2, -16
l32i a4, a2, 0
l32i a5, a2, 4
l32i a6, a2, 8
l32i a7, a2, 12
beq a1, a2, _high_restore_done
addi a2, a2, -16
l32i a8, a2, 0
l32i a9, a2, 4
l32i a10, a2, 8
l32i a11, a2, 12
beq a1, a2, _high_restore_done
addi a2, a2, -16
l32i a12, a2, 0
l32i a13, a2, 4
l32i a14, a2, 8
l32i a15, a2, 12
_high_restore_done:
mov a1, a3 /* Original stack */
ret
/*
* _restore_context
*
* Arrive here via a jump. Enters into the restored context and does
* not return. A1 should have a context pointer in it as received
* from switch or an interrupt exit. Interrupts must be disabled,
* and register windows should have been spilled.
*
* Note that exit from the restore is done with the RFI instruction,
* using the EPCn/EPSn registers. Those will have been saved already
* by any interrupt entry so they are save to use. Note that EPC1 and
* RFE are NOT usable (they can't preserve PS). Per the ISA spec, all
* RFI levels do the same thing and differ only in the special
* registers used to hold PC/PS, but Qemu has been observed to behave
* strangely when RFI doesn't "return" to a INTLEVEL strictly lower
* than it started from. So we leverage the zsr.h framework to pick
* the highest level available for our specific platform.
*/
.global _restore_context
_restore_context:
call0 xtensa_restore_high_regs
l32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET
wsr a0, ZSR_EPC
l32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET
wsr a0, ZSR_EPS
#if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING)
FPU_REG_RESTORE
#endif
#if defined(CONFIG_XTENSA_HIFI_SHARING)
.extern _xtensa_hifi_load
call0 _xtensa_hifi_load
#endif
l32i a0, a1, ___xtensa_irq_bsa_t_sar_OFFSET
wsr a0, SAR
#if XCHAL_HAVE_LOOPS
l32i a0, a1, ___xtensa_irq_bsa_t_lbeg_OFFSET
wsr a0, LBEG
l32i a0, a1, ___xtensa_irq_bsa_t_lend_OFFSET
wsr a0, LEND
l32i a0, a1, ___xtensa_irq_bsa_t_lcount_OFFSET
wsr a0, LCOUNT
#endif
#if XCHAL_HAVE_S32C1I
l32i a0, a1, ___xtensa_irq_bsa_t_scompare1_OFFSET
wsr a0, SCOMPARE1
#endif
#if XCHAL_HAVE_THREADPTR && \
(defined(CONFIG_USERSPACE) || defined(CONFIG_THREAD_LOCAL_STORAGE))
l32i a0, a1, ___xtensa_irq_bsa_t_threadptr_OFFSET
wur a0, THREADPTR
#endif
rsync
l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET
l32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET
l32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET
addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF
rfi ZSR_RFI_LEVEL
/*
* void xtensa_arch_except(int reason_p);
*
* Implements hardware exception for Xtensa ARCH_EXCEPT to save
* interrupted stack frame and reason_p for use in exception handler
* and coredump
*/
.global xtensa_arch_except
.global xtensa_arch_except_epc
.align 4
xtensa_arch_except:
entry a1, 16
xtensa_arch_except_epc:
ill
retw
/*
* void xtensa_arch_kernel_oops(int reason_p, void *ssf);
*
* Simply to raise hardware exception for Kernel OOPS.
*/
.global xtensa_arch_kernel_oops
.global xtensa_arch_kernel_oops_epc
.align 4
xtensa_arch_kernel_oops:
entry a1, 16
xtensa_arch_kernel_oops_epc:
ill
retw
/*
* void xtensa_switch(void *new, void **old_return);
*
* Context switches into the previously-saved "new" handle, placing
* the saved "old" handle into the address provided by old_return.
*/
.global xtensa_switch
.align 4
xtensa_switch:
entry a1, 16
SPILL_ALL_WINDOWS
addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF
/* Stash our A0/2/3 and the shift/loop registers into the base
* save area so they get restored as they are now. A2/A3
* don't actually get used post-restore, but they need to be
* stashed across the xtensa_save_high_regs call and this is a
* convenient place.
*/
s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET
s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET
s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET
ODD_REG_SAVE
/* Stash our PS register contents and a "restore" PC. */
rsr a0, PS
s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET
movi a0, _switch_restore_pc
s32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET
#if defined(CONFIG_XTENSA_HIFI_SHARING)
call0 _xtensa_hifi_save
#endif
/* Now the high registers */
call0 xtensa_save_high_regs
#ifdef CONFIG_KERNEL_COHERENCE
/* Flush the stack. The top of stack was stored for us by
* arch_cohere_stacks(). It can be NULL for a dummy thread.
*/
rsr a0, ZSR_FLUSH
beqz a0, noflush
mov a3, a1
flushloop:
dhwb a3, 0
addi a3, a3, XCHAL_DCACHE_LINESIZE
blt a3, a0, flushloop
noflush:
#endif
/* Restore the A3 argument we spilled earlier (via the base
* save pointer pushed at the bottom of the stack) and set the
* stack to the "new" context out of the A2 spill slot.
*/
l32i a2, a1, 0
l32i a3, a2, ___xtensa_irq_bsa_t_a3_OFFSET
s32i a1, a3, 0
#ifdef CONFIG_USERSPACE
/* Switch page tables */
rsr a6, ZSR_CPU
l32i a6, a6, ___cpu_t_current_OFFSET
#ifdef CONFIG_XTENSA_MMU
call4 xtensa_swap_update_page_tables
#endif
#ifdef CONFIG_XTENSA_MPU
call4 xtensa_mpu_map_write
#endif
l32i a2, a3, 0
l32i a2, a2, 0
#endif
/* Switch stack pointer and restore. The jump to
* _restore_context does not return as such, but we arrange
* for the restored "next" address to be immediately after for
* sanity.
*/
l32i a1, a2, ___xtensa_irq_bsa_t_a2_OFFSET
#ifdef CONFIG_INSTRUMENT_THREAD_SWITCHING
call4 z_thread_mark_switched_in
#endif
j _restore_context
_switch_restore_pc:
retw
/* Define our entry handler to load the struct kernel_t from the
* MISC0 special register, and to find the nest and irq_stack values
* at the precomputed offsets.
*/
.align 4
_handle_excint:
EXCINT_HANDLER ___cpu_t_nested_OFFSET, ___cpu_t_irq_stack_OFFSET
/* Define the actual vectors for the hardware-defined levels with
* DEF_EXCINT. These load a C handler address and jump to our handler
* above.
*/
DEF_EXCINT 1, _handle_excint, xtensa_excint1_c
/* In code below we are using XCHAL_NMILEVEL and XCHAL_NUM_INTLEVELS
* (whichever is higher), since not all Xtensa configurations support
* NMI. In such case we will use XCHAL_NUM_INTLEVELS.
*/
#if XCHAL_HAVE_NMI
#define MAX_INTR_LEVEL XCHAL_NMILEVEL
#elif XCHAL_HAVE_INTERRUPTS
#define MAX_INTR_LEVEL XCHAL_NUM_INTLEVELS
#else
#error Xtensa core with no interrupt support is used
#define MAX_INTR_LEVEL 0
#endif
#if MAX_INTR_LEVEL >= 2
#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 2))
DEF_EXCINT 2, _handle_excint, xtensa_int2_c
#endif
#endif
#if MAX_INTR_LEVEL >= 3
#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 3))
DEF_EXCINT 3, _handle_excint, xtensa_int3_c
#endif
#endif
#if MAX_INTR_LEVEL >= 4
#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 4))
DEF_EXCINT 4, _handle_excint, xtensa_int4_c
#endif
#endif
#if MAX_INTR_LEVEL >= 5
#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 5))
DEF_EXCINT 5, _handle_excint, xtensa_int5_c
#endif
#endif
#if MAX_INTR_LEVEL >= 6
#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 6))
DEF_EXCINT 6, _handle_excint, xtensa_int6_c
#endif
#endif
#if MAX_INTR_LEVEL >= 7
#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 7))
DEF_EXCINT 7, _handle_excint, xtensa_int7_c
#endif
#endif
#if defined(CONFIG_GDBSTUB)
DEF_EXCINT XCHAL_DEBUGLEVEL, _handle_excint, xtensa_debugint_c
#endif
/* The user exception vector is defined here, as we need to handle
* MOVSP exceptions in assembly (the result has to be to unspill the
* caller function of the code that took the exception, and that can't
* be done in C). A prototype exists which mucks with the stack frame
* from the C handler instead, but that would add a LARGE overhead to
* some alloca() calls (those whent he caller has been spilled) just
* to save these five cycles during other exceptions and L1
* interrupts. Maybe revisit at some point, with better benchmarking.
* Note that _xt_alloca_exc is Xtensa-authored code which expects A0
* to have been saved to EXCSAVE1, we've modified it to use the zsr.h
* API to get assigned a scratch register.
*/
.pushsection .UserExceptionVector.text, "ax"
.global _Level1RealVector
_Level1RealVector:
wsr a0, ZSR_A0SAVE
rsync
rsr.exccause a0
#ifdef CONFIG_XTENSA_MMU
beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_user
#endif /* CONFIG_XTENSA_MMU */
#ifdef CONFIG_USERSPACE
beqi a0, EXCCAUSE_SYSCALL, _syscall
#endif /* CONFIG_USERSPACE */
#ifdef CONFIG_XTENSA_MMU
addi a0, a0, -EXCCAUSE_DTLB_MISS
beqz a0, _handle_tlb_miss_user
rsr.exccause a0
#endif /* CONFIG_XTENSA_MMU */
bnei a0, EXCCAUSE_ALLOCA, _not_alloca
j _xt_alloca_exc
_not_alloca:
rsr a0, ZSR_A0SAVE
j _Level1Vector
#ifdef CONFIG_XTENSA_MMU
_handle_tlb_miss_user:
/**
* Handle TLB miss by loading the PTE page:
* The way it works is, when we try to access an address that is not
* mapped, we will have a miss. The HW then will try to get the
* correspondent memory in the page table. As the page table is not
* mapped in memory we will have a second miss, which will trigger
* an exception. In the exception (here) what we do is to exploit
* this hardware capability just trying to load the page table
* (not mapped address), which will cause a miss, but then the hardware
* will automatically map it again from the page table. This time
* it will work since the page necessary to map the page table itself
* are wired map.
*/
rsr.ptevaddr a0
l32i a0, a0, 0
rsr a0, ZSR_A0SAVE
rfe
#endif /* CONFIG_XTENSA_MMU */
#ifdef CONFIG_USERSPACE
_syscall:
rsr a0, ZSR_A0SAVE
j xtensa_do_syscall
#endif /* CONFIG_USERSPACE */
.popsection
/* In theory you can have levels up to 15, but known hardware only uses 7. */
#if XCHAL_NMILEVEL > 7
#error More interrupts than expected.
#endif
/* We don't actually use "kernel mode" currently. Populate the vector
* out of simple caution in case app code clears the UM bit by mistake.
*/
.pushsection .KernelExceptionVector.text, "ax"
.global _KernelExceptionVector
_KernelExceptionVector:
#ifdef CONFIG_XTENSA_MMU
wsr a0, ZSR_A0SAVE
rsr.exccause a0
beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_kernel
addi a0, a0, -EXCCAUSE_DTLB_MISS
beqz a0, _handle_tlb_miss_kernel
rsr a0, ZSR_A0SAVE
#endif
j _Level1Vector
#ifdef CONFIG_XTENSA_MMU
_handle_tlb_miss_kernel:
/* The TLB miss handling is used only during xtensa_mmu_init()
* where vecbase is at a different address, as the offset used
* in the jump ('j') instruction will not jump to correct
* address (... remember the vecbase is moved).
* So we handle TLB misses in a very simple way here until
* we move back to using UserExceptionVector above.
*/
rsr.ptevaddr a0
l32i a0, a0, 0
rsr a0, ZSR_A0SAVE
rfe
#endif
.popsection
#ifdef XCHAL_DOUBLEEXC_VECTOR_VADDR
.pushsection .DoubleExceptionVector.text, "ax"
.global _DoubleExceptionVector
_DoubleExceptionVector:
#ifdef CONFIG_XTENSA_MMU
wsr a0, ZSR_DBLEXC
rsync
rsr.exccause a0
addi a0, a0, -EXCCAUSE_DTLB_MISS
beqz a0, _handle_tlb_miss_dblexc
rsr a0, ZSR_DBLEXC
j _Level1Vector
#else
#if defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR)
1:
/* Tell simulator to stop executing here, instead of trying to do
* an infinite loop (see below). Greatly help with using tracing in
* simulator so that traces will not have infinite iterations of
* jumps.
*/
movi a3, 1
movi a2, SYS_exit
simcall
#elif XCHAL_HAVE_DEBUG
/* Signals an unhandled double exception */
1: break 1, 4
#else
1:
#endif
j 1b
#endif /* CONFIG_XTENSA_MMU */
#ifdef CONFIG_XTENSA_MMU
_handle_tlb_miss_dblexc:
/* Handle all data TLB misses here.
* These data TLB misses are mostly caused by preloading
* page table entries in the level 1 exception handler.
* Failure to load the PTE will result in another exception
* with different failure (exccause), which can be handled
* when the CPU re-enters the double exception handler.
*/
rsr.ptevaddr a0
l32i a0, a0, 0
rsr a0, ZSR_DBLEXC
rfde
#endif
.popsection
#endif