diff --git a/arch/xtensa/core/CMakeLists.txt b/arch/xtensa/core/CMakeLists.txt index 38748f6fd30..af6fbbab4b5 100644 --- a/arch/xtensa/core/CMakeLists.txt +++ b/arch/xtensa/core/CMakeLists.txt @@ -11,6 +11,8 @@ zephyr_sources( xtensa_vectors.S xt_zephyr.S window_vectors.S + xtensa-asm2-util.S + xtensa-asm2.c ) zephyr_sources_ifndef(CONFIG_ATOMIC_OPERATIONS_C atomic.S) diff --git a/arch/xtensa/core/xtensa-asm2-util.S b/arch/xtensa/core/xtensa-asm2-util.S new file mode 100644 index 00000000000..189359978e6 --- /dev/null +++ b/arch/xtensa/core/xtensa-asm2-util.S @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include + +/* + * xtensa_save_high_regs + * + * Call with CALL0, with A2/A3 available as scratch. Pushes the high + * A4-A15 GPRs to the stack if needed (i.e. if those registers are not + * part of wrapped-around frames higher up the call stack), returning + * to the caller with the stack pointer HAVING BEEN MODIFIED to + * contain them. + */ +.global xtensa_save_high_regs +.align 4 +xtensa_save_high_regs: + /* Generate a rotated (modulo NREGS/4 bits!) WINDOWSTART in A2 + * by duplicating the bits twice and shifting down by WINDOWBASE + * bits. Now the LSB is the register quad at WINDOWBASE. + */ + rsr.WINDOWSTART a2 + slli a3, a2, (XCHAL_NUM_AREGS / 4) + or a2, a2, a3 + rsr.WINDOWBASE a3 + ssr a3 + srl a2, a2 + + mov a3, a1 /* Stash our original stack pointer */ + + /* For the next three bits in WINDOWSTART (which correspond to + * the A4-A7, A8-A11 and A12-A15 quads), if we find a one, + * that means that the quad is owned by a wrapped-around call + * in the registers, so we don't need to spill it or any + * further registers from the GPRs and can skip to the end. + */ + bbsi a2, 1, _high_gpr_spill_done + addi a1, a1, -16 + s32i a4, a1, 0 + s32i a5, a1, 4 + s32i a6, a1, 8 + s32i a7, a1, 12 + + bbsi a2, 2, _high_gpr_spill_done + addi a1, a1, -16 + s32i a8, a1, 0 + s32i a9, a1, 4 + s32i a10, a1, 8 + s32i a11, a1, 12 + + bbsi a2, 3, _high_gpr_spill_done + addi a1, a1, -16 + s32i a12, a1, 0 + s32i a13, a1, 4 + s32i a14, a1, 8 + s32i a15, a1, 12 + +_high_gpr_spill_done: + /* Push the original stack pointer so we know at restore + * time how many registers were spilled, then return, leaving the + * modified SP in A1. + */ + addi a1, a1, -4 + s32i a3, a1, 0 + + ret + +/* + * xtensa_restore_high_regs + * + * Does the inverse of xtensa_save_high_regs, taking a stack pointer + * in A1 that resulted and restoring the A4-A15 state (and the stack + * pointer) to the state they had at the earlier call. Call with + * CALL0, leaving A2/A3 available as scratch. + */ +.global xtensa_restore_high_regs +.align 4 +xtensa_restore_high_regs: + /* pop our "original" stack pointer into a2, stash in a3 also */ + l32i a2, a1, 0 + addi a1, a1, 4 + mov a3, a2 + + beq a1, a2, _high_restore_done + addi a2, a2, -16 + l32i a4, a2, 0 + l32i a5, a2, 4 + l32i a6, a2, 8 + l32i a7, a2, 12 + + beq a1, a2, _high_restore_done + addi a2, a2, -16 + l32i a8, a2, 0 + l32i a9, a2, 4 + l32i a10, a2, 8 + l32i a11, a2, 12 + + beq a1, a2, _high_restore_done + addi a2, a2, -16 + l32i a12, a2, 0 + l32i a13, a2, 4 + l32i a14, a2, 8 + l32i a15, a2, 12 + +_high_restore_done: + mov a1, a3 /* Original stack */ + ret + +/* + * _restore_context + * + * Arrive here via a jump. Enters into the restored context and does + * not return. A1 should have a context pointer in it as received + * from switch or an interrupt exit. Interrupts must be disabled, + * and register windows should have been spilled. + * + + * Note that exit from the restore is done with the RFI instruction, + * using the EPCn/EPSn registers. Those will have been saved already + * by any interrupt entry so they are save to use. Note that EPC1 and + * RFE are NOT usable (they can't preserve PS). Per the ISA spec, all + * RFI levels do the same thing and differ only in the special + * registers used to hold PC/PS, but Qemu has been observed to behave + * strangely when RFI doesn't "return" to a INTLEVEL strictly lower + * than it started from. So pick level 6 (the highest that works on + * Qemu, hardware doesn't care so it doesn't matter). In theory we + * should test to be able to support hardware with less than 6 levels, + * though... + */ +.global _restore_context +_restore_context: + call0 xtensa_restore_high_regs + + l32i a0, a1, BSA_PC_OFF + wsr.EPC6 a0 + l32i a0, a1, BSA_PS_OFF + wsr.EPS6 a0 + + l32i a0, a1, BSA_SAR_OFF + wsr.SAR a0 +#if XCHAL_HAVE_LOOPS + l32i a0, a1, BSA_LBEG_OFF + wsr.LBEG a0 + l32i a0, a1, BSA_LEND_OFF + wsr.LEND a0 + l32i a0, a1, BSA_LCOUNT_OFF + wsr.LCOUNT a0 +#endif + rsync + + l32i a0, a1, BSA_A0_OFF + l32i a2, a1, BSA_A2_OFF + l32i a3, a1, BSA_A3_OFF + addi a1, a1, BASE_SAVE_AREA_SIZE + + rfi 6 + +/* + * void xtensa_switch(void *new, void **old_return); + * + * Context switches into the prevoiusly-saved "new" handle, placing + * the saved "old" handle into the address provided by old_return. + */ +.global xtensa_switch +.align 4 +xtensa_switch: + entry a1, 16 + SPILL_ALL_WINDOWS + addi a1, a1, -BASE_SAVE_AREA_SIZE + + /* Stash our A0/2/3 and the shift/loop registers into the base + * save area so they get restored as they are now. A2/A3 + * don't actually get used post-restore, but they need to be + * stashed across the xtensa_save_high_regs call and this is a + * convenient place. + */ + s32i a0, a1, BSA_A0_OFF + s32i a2, a1, BSA_A2_OFF + s32i a3, a1, BSA_A3_OFF + ODD_REG_SAVE + + /* Stash our PS register contents and a "restore" PC. */ + rsr.PS a0 + s32i a0, a1, BSA_PS_OFF + movi a0, _switch_restore_pc + s32i a0, a1, BSA_PC_OFF + + /* Now the high registers */ + call0 xtensa_save_high_regs + + /* Restore the A3 argument we spilled earlier (via the base + * save pointer pushed at the bottom of the stack) and set the + * stack to the "new" context out of the A2 spill slot. + */ + l32i a2, a1, 0 + l32i a3, a2, BSA_A3_OFF + s32i a1, a3, 0 + + /* Switch stack pointer and restore. The jump to + * _restore_context does not return as such, but we arrange + * for the restored "next" address to be immediately after for + * sanity. + */ + l32i a1, a2, BSA_A2_OFF + j _restore_context +_switch_restore_pc: + retw diff --git a/arch/xtensa/core/xtensa-asm2.c b/arch/xtensa/core/xtensa-asm2.c new file mode 100644 index 00000000000..6fe0221dcf9 --- /dev/null +++ b/arch/xtensa/core/xtensa-asm2.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include +#include + +void *xtensa_init_stack(int *stack_top, + void (*entry)(void *, void *, void *), + void *arg1, void *arg2, void *arg3) +{ + /* We cheat and shave 16 bytes off, the top four words are the + * A0-A3 spill area for the caller of the entry function, + * which doesn't exist. It will never be touched, so we + * arrange to enter the function with a CALLINC of 1 and a + * stack pointer 16 bytes above the top, so its ENTRY at the + * start will decrement the stack pointer by 16. + */ + const int bsasz = BASE_SAVE_AREA_SIZE - 16; + void **bsa = (void **) (((char *) stack_top) - bsasz); + + memset(bsa, 0, bsasz); + + bsa[BSA_PC_OFF/4] = entry; + bsa[BSA_PS_OFF/4] = (void *)(PS_WOE | PS_UM | PS_CALLINC(1)); + + /* Arguments. Remember these start at A6, which will be + * rotated into A2 by the ENTRY instruction that begins the + * entry function. And A4-A7 and A8-A11 are optional quads + * that live below the BSA! + */ + bsa[-1] = arg2; /* a7 */ + bsa[-2] = arg1; /* a6 */ + bsa[-3] = 0; /* a5 */ + bsa[-4] = 0; /* a4 */ + + bsa[-5] = 0; /* a11 */ + bsa[-6] = 0; /* a10 */ + bsa[-7] = 0; /* a9 */ + bsa[-8] = arg3; /* a8 */ + + /* Finally push the BSA pointer and return the stack pointer + * as the handle + */ + bsa[-9] = bsa; + return &bsa[-9]; +} diff --git a/arch/xtensa/include/xtensa-asm2-context.h b/arch/xtensa/include/xtensa-asm2-context.h new file mode 100644 index 00000000000..53fe99b0ba0 --- /dev/null +++ b/arch/xtensa/include/xtensa-asm2-context.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _XTENSA_ASM2_CONTEXT_H +#define _XTENSA_ASM2_CONTEXT_H + +#include +#include + +/* + * Stack frame layout for a saved processor context, in memory order, + * high to low address: + * + * SP-0 <-- Interrupted stack pointer points here + * + * SP-4 Caller A3 spill slot \ + * SP-8 Caller A2 spill slot | + * SP-12 Caller A1 spill slot + (Part of ABI standard) + * SP-16 Caller A0 spill slot / + * + * SP-20 Saved A3 + * SP-24 Saved A2 + * SP-28 Unused (not "Saved A1" because the SP is saved externally as a handle) + * SP-32 Saved A0 + * + * SP-36 Saved PC (address to jump to following restore) + * SP-40 Saved/interrupted PS special register + * + * SP-44 Saved SAR special register + * + * SP-48 Saved LBEG special register (if loops enabled) + * SP-52 Saved LEND special register (if loops enabled) + * SP-56 Saved LCOUNT special register (if loops enabled) + * + * (The above fixed-size region is known as the "base save area" in the + * code below) + * + * - Saved A7 \ + * - Saved A6 | + * - Saved A5 +- If not in-use by another frame + * - Saved A4 / + * + * - Saved A11 \ + * - Saved A10 | + * - Saved A9 +- If not in-use by another frame + * - Saved A8 / + * + * - Saved A15 \ + * - Saved A14 | + * - Saved A13 +- If not in-use by another frame + * - Saved A12 / + * + * - Saved intermediate stack pointer (points to low word of base save + * area, i.e. the saved LCOUNT or SAR). The pointer to this value + * (i.e. the final stack pointer) is stored externally as the + * "restore handle" in the thread context. + * + * Essentially, you can recover a pointer to the BSA by loading *SP. + * Adding the fixed BSA size to that gets you back to the + * original/interrupted stack pointer. + */ + +#if XCHAL_HAVE_LOOPS +#define BASE_SAVE_AREA_SIZE 56 +#else +#define BASE_SAVE_AREA_SIZE 44 +#endif + +#define BSA_A3_OFF (BASE_SAVE_AREA_SIZE - 20) +#define BSA_A2_OFF (BASE_SAVE_AREA_SIZE - 24) +#define BSA_SCRATCH_OFF (BASE_SAVE_AREA_SIZE - 28) +#define BSA_A0_OFF (BASE_SAVE_AREA_SIZE - 32) +#define BSA_PC_OFF (BASE_SAVE_AREA_SIZE - 36) +#define BSA_PS_OFF (BASE_SAVE_AREA_SIZE - 40) +#define BSA_SAR_OFF (BASE_SAVE_AREA_SIZE - 44) +#define BSA_LBEG_OFF (BASE_SAVE_AREA_SIZE - 48) +#define BSA_LEND_OFF (BASE_SAVE_AREA_SIZE - 52) +#define BSA_LCOUNT_OFF (BASE_SAVE_AREA_SIZE - 56) + +#endif /* _XTENSA_ASM2_CONTEXT_H */ diff --git a/arch/xtensa/include/xtensa-asm2-s.h b/arch/xtensa/include/xtensa-asm2-s.h new file mode 100644 index 00000000000..c37d359fd0c --- /dev/null +++ b/arch/xtensa/include/xtensa-asm2-s.h @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "xtensa-asm2-context.h" + +/* Assembler header! This file contains macros designed to be included + * only by the assembler. + */ + +/* + * SPILL_ALL_WINDOWS + * + * Spills all windowed registers (i.e. registers not visible as + * A0-A15) to their ABI-defined spill regions on the stack. + * + * Unlike the Xtensa HAL implementation, this code requires that the + * EXCM and WOE bit be enabled in PS, and relies on repeated hardware + * exception handling to do the register spills. The trick is to do a + * noop write to the high registers, which the hardware will trap + * (into an overflow exception) in the case where those registers are + * already used by an existing call frame. Then it rotates the window + * and repeats until all but the A0-A3 registers of the original frame + * are guaranteed to be spilled, eventually rotating back around into + * the original frame. Advantages: + * + * - Vastly smaller code size + * + * - More easily maintained if changes are needed to window over/underflow + * exception handling. + * + * - Requires no scratch registers to do its work, so can be used safely in any + * context. + * + * - If the WOE bit is not enabled (for example, in code written for + * the CALL0 ABI), this becomes a silent noop and operates compatbily. + * + * - In memory protection situations, this relies on the existing + * exception handlers (and thus their use of the L/S32E + * instructions) to execute stores in the protected space. AFAICT, + * the HAL routine does not handle this situation and isn't safe: it + * will happily write through the "stack pointers" found in + * registers regardless of where they might point. + * + * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not + * just a little bit, it's MUCH faster. With a mostly full register + * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill + * registers with this vs. 279 (!) to do it with + * xthal_spill_windows(). Apparently Xtensa exception handling is + * really fast, and no one told their software people. + * + * Note that as with the Xtensa HAL spill routine, and unlike context + * switching code on most sane architectures, the intermediate states + * here will have an invalid stack pointer. That means that this code + * must not be preempted in any context (i.e. all Zephyr situations) + * where the interrupt code will need to use the stack to save the + * context. But unlike the HAL, which runs with exceptions masked via + * EXCM, this will not: hit needs the overflow handlers unmasked. Use + * INTLEVEL instead (which, happily, is what Zephyr's locking does + * anyway). + */ +.macro SPILL_ALL_WINDOWS +#if XCHAL_NUM_AREGS == 64 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 4 +#elif XCHAL_NUM_AREGS == 32 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a4, a4, a4 + rotw 2 +#else +#error Unrecognized XCHAL_NUM_AREGS +#endif +.endm + +/* + * ODD_REG_SAVE + * + * Stashes the oddball shift/loop context registers in the base save + * area pointed to by the current stack pointer. On exit, A0 will + * have been modified but A2/A3 have not, and the shift/loop + * instructions can be used freely (though note loops don't work in + * exceptions for other reasons!). + * + * Does not populate or modify the PS/PC save locations. + */ +.macro ODD_REG_SAVE + rsr.SAR a0 + s32i a0, a1, BSA_SAR_OFF +#if XCHAL_HAVE_LOOPS + rsr.LBEG a0 + s32i a0, a1, BSA_LBEG_OFF + rsr.LEND a0 + s32i a0, a1, BSA_LEND_OFF + rsr.LCOUNT a0 + s32i a0, a1, BSA_LCOUNT_OFF +#endif +.endm + +/* + * CROSS_STACK_CALL + * + * Sets the stack up carefully such that a "cross stack" call can spill + * correctly, then invokes an immediate handler. Note that: + * + * 0. When spilling a frame, functions find their callEE's stack pointer + * (to save A0-A3) from registers. But they find their + * already-spilled callER's stack pointer (to save higher GPRs) from + * their own stack memory. + * + * 1. The function that was interrupted ("interruptee") does not need to + * be spilled, because it already has been as part of the context + * save. So it doesn't need registers allocated for it anywhere. + * + * 2. Interruptee's caller needs to spill into the space below the + * interrupted stack frame, which means that the A1 register it finds + * below it needs to contain the old/interrupted stack and not the + * context saved one. + * + * 3. The ISR dispatcher (called "underneath" interruptee) needs to spill + * high registers into the space immediately above its own stack frame, + * so it needs to find a caller with the "new" stack pointer instead. + * + * We make this work by inserting TWO 4-register frames between + * "interruptee's caller" and "ISR dispatcher". The top one (which + * occupies the slot formerly held by "interruptee", whose registers + * were saved via external means) holds the "interrupted A1" and the + * bottom has the "top of the interrupt stack" which can be either the + * word above a new memory area (when handling an interrupt from user + * mode) OR the existing "post-context-save" stack pointer (when + * handling a nested interrupt). The code works either way. Because + * these are both only 4-registers, neither needs its own caller for + * spilling. + * + * The net cost is 32 wasted bytes on the interrupt stack frame to + * spill our two "phantom frames" (actually not quite, as we'd need a + * few of those words used somewhere for tracking the stack pointers + * anyway). But the benefit is that NO REGISTER FRAMES NEED TO BE + * SPILLED on interrupt entry. And if we return back into the same + * context we interrupted (a common case) no windows need to be + * explicitly spilled at all. And in fact in the case where the ISR + * uses significant depth on its own stack, the interrupted frames + * will be spilled naturally as a standard cost of a function call, + * giving register windows something like "zero cost interrupts". + * + * FIXME: a terrible awful really nifty idea to fix the stack waste + * problem would be to use a SINGLE frame between the two stacks, + * pre-spill it with one stack pointer for the "lower" call to see and + * leave the register SP in place for the "upper" frame to use. + * Would require modifying the Window{Over|Under}flow4 exceptions to + * know not to spill/fill these special frames, but that's not too + * hard, maybe... + * + * Enter this macro with a valid "context saved" pointer (i.e. SP + * should point to a stored pointer which points to one BSA below the + * interrupted/old stack) in A1, a handler function in A2, and a "new" + * stack pointer (i.e. a pointer to the word ABOVE the allocated stack + * area) in A3. On return A0/1 will be unchanged, A2 has the return + * value of the called function, and A3 is clobbered. A4-A15 become + * part of called frames and MUST NOT BE IN USE by the code that + * expands this macro. The called function gets the context save + * handle in A1 as it's first argument. + */ +.macro CROSS_STACK_CALL + mov a6, a3 /* place "new sp" in the next frame's A2 */ + mov a10, a1 /* pass "context handle" in 2nd frame's A2 */ + mov a3, a1 /* stash it locally in A3 too */ + mov a11, a2 /* handler in 2nd frame's A3, next frame's A7 */ + + /* Recover the interrupted SP from the BSA */ + l32i a1, a1, 0 + addi a1, a1, BASE_SAVE_AREA_SIZE + + call4 _xstack_call0_\@ + mov a1, a3 /* restore original SP */ + mov a2, a6 /* copy return value */ + j _xstack_returned_\@ +.align 4 +_xstack_call0_\@: + /* We want an ENTRY to set a bit in windowstart and do the + * rotation, but we want our own SP + */ + entry a1, 16 + mov a1, a2 + call4 _xstack_call1_\@ + mov a2, a6 /* copy return value */ + retw +.align 4 +_xstack_call1_\@: + /* Remember the handler is going to do our ENTRY, so the + * handler pointer is still in A6 (not A2) even though this is + * after the second CALL4. + */ + jx a7 +_xstack_returned_\@: +.endm + +/* Entry setup for all exceptions and interrupts. Arrive here with + * the stack pointer decremented across a base save area, A0-A3 and + * PS/PC already spilled to the stack in the BSA, and A2 containing a + * level-specific C handler function. + * + * This is a macro (to allow for unit testing) that expands to a + * handler body to which the vectors can jump. It takes two static + * (!) arguments: a special register name (which should be set up to + * point to some kind of per-CPU record struct) and offsets within + * that struct which contains an interrupt stack top and a "nest + * count" word. + */ +.macro EXCINT_HANDLER SR, NEST_OFF, INTSTACK_OFF + /* A2 contains our handler function which will get clobbered + * by the save. Stash it into the unused "a1" slot in the + * BSA and recover it immediately after. Kind of a hack. + */ + s32i a2, a1, BSA_SCRATCH_OFF + + call0 xtensa_save_high_regs + + l32i a2, a1, 0 + l32i a2, a2, BSA_SCRATCH_OFF + + /* Unmask EXCM bit so C code can spill/fill in window + * exceptions. Note interrupts are already fully masked by + * INTLEVEL, so this is safe. + */ + rsr.PS a0 + movi a3, ~16 + and a0, a0, a3 + wsr.PS a0 + rsync + + /* A1 already contains our saved stack, and A2 our handler. + * So all that's needed for CROSS_STACK_CALL is to put the + * "new" stack into A3. This can be either a copy of A1 or an + * entirely new area depending on whether we find a 1 in our + * SR[off] macro argument. + */ + rsr.\SR a3 + l32i a0, a3, \NEST_OFF + beqz a0, _switch_stacks_\@ + + /* Use the same stack, just copy A1 to A3 after incrementing NEST */ + addi a0, a0, 1 + s32i a0, a3, \NEST_OFF + mov a3, a1 + j _do_call_\@ + +_switch_stacks_\@: + addi a0, a0, 1 + s32i a0, a3, \NEST_OFF + l32i a3, a3, \INTSTACK_OFF + +_do_call_\@: + CROSS_STACK_CALL + + /* Decrement nest count */ + rsr.\SR a3 + l32i a0, a3, \NEST_OFF + addi a0, a0, -1 + s32i a0, a3, \NEST_OFF + + /* Last trick: the called function returned the "next" handle + * to restore to in A6 (the call4'd function's A2). If this + * is not the same handle as we started with, we need to do a + * register spill before restoring, for obvious reasons. + * Remember to mask interrupts (which have been unmasked + * during the handler execution) while we muck with the + * windows. The restore will unmask them as needed. + */ + beq a6, a1, _restore_\@ + rsil a0, XCHAL_NMILEVEL + SPILL_ALL_WINDOWS + mov a1, a6 + +_restore_\@: + j _restore_context +.endm + +/* Defines an exception/interrupt vector for a specified level. Saves + * off the interrupted A0-A3 registers and the per-level PS/PC + * registers to the stack before jumping to a handler (defined with + * EXCINT_HANDLER) to do the rest of the work. + * + * Arguments are a numeric interrupt level and symbol names for the + * entry code (defined via EXCINT_HANDLER) and a C handler for this + * particular level. + * + * FIXME: needs special handling for exceptions (level 1): it's "EPC" + * and not "EPC1" (though IIRC the assembler makes this work). + * And there is no EPS: instead PS is simply the interrupted PS + * with EXCM flipped from 0 to 1. + * + * FIXME: needs better locking. The hardware will NOT mask out "high + * priority" exceptions on arrival here, so we have to do it ourselves + * with RSIL. + */ +.macro DEF_EXCINT LVL, ENTRY_SYM, C_HANDLER_SYM +.pushsection .Level\LVL\()InterruptVector.text, "ax" +.global _Level\LVL\()Vector +_Level\LVL\()Vector: + addi a1, a1, -BASE_SAVE_AREA_SIZE + s32i a0, a1, BSA_A0_OFF + s32i a2, a1, BSA_A2_OFF + s32i a3, a1, BSA_A3_OFF + + rsr.EPS\LVL a0 + s32i a0, a1, BSA_PS_OFF + rsr.EPC\LVL a0 + s32i a0, a1, BSA_PC_OFF + + /* What's happening with this jump is that the L32R + * instruction to load a full 32 bit immediate must use an + * offset that is negative from PC. Normally the assembler + * fixes this up for you by putting the "literal pool" + * somewhere at the start of the section. But vectors start + * at a fixed address in their own section, and don't (in our + * current linker setup) have anywhere "definitely before + * vectors" to place immediates. Some platforms and apps will + * link by dumb luck, others won't. We add an extra jump just + * to clear space we know to be legal. + * + * The right way to fix this would be to use a "literal_prefix" + * to put the literals into a per-vector section, then link + * that section into the PREVIOUS vector's area right after + * the vector code. Requires touching a lot of linker scripts + * though. + */ + j _after_imms\LVL\() +.align 4 +_handle_excint_imm\LVL: + .word \ENTRY_SYM +_c_handler_imm\LVL: + .word \C_HANDLER_SYM +_after_imms\LVL: + l32r a2, _c_handler_imm\LVL + l32r a0, _handle_excint_imm\LVL + jx a0 +.popsection +.endm diff --git a/arch/xtensa/include/xtensa-asm2.h b/arch/xtensa/include/xtensa-asm2.h new file mode 100644 index 00000000000..69d62f2d2c3 --- /dev/null +++ b/arch/xtensa/include/xtensa-asm2.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _XTENSA_ASM2_H +#define _XTENSA_ASM2_H + +#include "xtensa-asm2-context.h" + +/** + * Initializes a stack area such that it can be "restored" later and + * begin running with the specified function and three arguments. The + * entry function takes three arguments to match the signature of + * Zephyr's k_thread_entry_t. Thread will start with EXCM clear and + * INTLEVEL set to zero (i.e. it's a user thread, we don't start with + * anything masked, so don't assume that!). + */ +void *xtensa_init_stack(int *stack_top, + void (*entry)(void *, void *, void *), + void *arg1, void *arg2, void *arg3); + +#endif /* _XTENSA_ASM2_H */