arc: improved saving of lp_<count,start,end> using GPRs

Since firq utilizes a 2nd register bank, and since all of those
many GPRs can be used, the strategy here is to save extra registers,
such as lp_count, lp_start, lp_end into callee saved registers.
These registers are safe to use because the C-ABI followed by the
compiler will cause these to be spilled to the stack if a C function
wants to use them. By selecting upper GPRs, r23-r25, it is very unlikely
the compiler will spill them. This improvement, therefore, can avoid a
d-cache miss since we are avoding memory altogether when saving these.
The struct firq_regs is no longer needed.

Change-Id: I7c0d061908a90376da7a0101b62e804647a20443
Signed-off-by: Chuck Jordan <cjordan@synopsys.com>
This commit is contained in:
Chuck Jordan 2016-05-27 12:25:00 -07:00 committed by Anas Nashif
commit 5f14d8fb01
3 changed files with 28 additions and 55 deletions

View file

@ -45,14 +45,16 @@ SECTION_VAR(NOINIT, _firq_stack)
* *
* The processor switches to a second register bank so registers from the * The processor switches to a second register bank so registers from the
* current bank do not have to be preserved yet. The only issue is the LP_START/ * current bank do not have to be preserved yet. The only issue is the LP_START/
* LP_COUNT/LP_END registers, which are not banked. * LP_COUNT/LP_END registers, which are not banked. These can be saved
* in available callee saved registers.
* *
* If all FIRQ ISRs are programmed such that there are no use of the LP * If all FIRQ ISRs are programmed such that there are no use of the LP
* registers (ie. no LPcc instruction), and CONFIG_ARC_STACK_CHECKING is * registers (ie. no LPcc instruction), and CONFIG_ARC_STACK_CHECKING is
* not set, then the kernel can be configured to remove the use of _firq_enter(). * not set, then the kernel can be configured to not save and restore them.
* *
* When entering a FIRQ, interrupts might as well be locked: the processor is * When entering a FIRQ, interrupts might as well be locked: the processor is
* running at its highest priority, and cannot be preempted by anything. * running at its highest priority, and cannot be interrupted by any other
* interrupt. An exception, however, can be taken.
* *
* Assumption by _isr_demux: r3 is untouched by _firq_enter. * Assumption by _isr_demux: r3 is untouched by _firq_enter.
* *
@ -61,6 +63,11 @@ SECTION_VAR(NOINIT, _firq_stack)
SECTION_FUNC(TEXT, _firq_enter) SECTION_FUNC(TEXT, _firq_enter)
/*
* ATTENTION:
* firq uses a 2nd register bank so GPRs do not need to be saved.
*/
#ifdef CONFIG_ARC_STACK_CHECKING #ifdef CONFIG_ARC_STACK_CHECKING
/* disable stack checking */ /* disable stack checking */
lr r2, [_ARC_V2_STATUS32] lr r2, [_ARC_V2_STATUS32]
@ -70,20 +77,13 @@ SECTION_FUNC(TEXT, _firq_enter)
#ifndef CONFIG_FIRQ_NO_LPCC #ifndef CONFIG_FIRQ_NO_LPCC
/* /*
* Unlike the rest of context switching code, r2 is loaded with something * Save LP_START/LP_COUNT/LP_END because called handler might use.
* else than 'current' in this routine: this is to preserve r3 so that it * Save these in callee saved registers to avoid using memory.
* does not have to be fetched again in _isr_demux. * These will be saved by the compiler if it needs to spill them.
*/ */
mov r23,lp_count
/* save LP_START/LP_COUNT/LP_END variables */ lr r24, [_ARC_V2_LP_START]
mov_s r1, _nanokernel lr r25, [_ARC_V2_LP_END]
st lp_count, [r1, __tNANO_firq_regs_OFFSET + __tFirqRegs_lp_count_OFFSET]
lr r2, [_ARC_V2_LP_START]
st_s r2, [r1, __tNANO_firq_regs_OFFSET + __tFirqRegs_lp_start_OFFSET]
lr r2, [_ARC_V2_LP_END]
st_s r2, [r1, __tNANO_firq_regs_OFFSET + __tFirqRegs_lp_end_OFFSET]
#endif #endif
j @_isr_demux j @_isr_demux
@ -97,30 +97,19 @@ SECTION_FUNC(TEXT, _firq_enter)
SECTION_FUNC(TEXT, _firq_exit) SECTION_FUNC(TEXT, _firq_exit)
#ifndef CONFIG_FIRQ_NO_LPCC
/* restore lp_count, lp_start, lp_end from r23-r25 */
mov lp_count,r23
sr r24, [_ARC_V2_LP_START]
sr r25, [_ARC_V2_LP_END]
#endif
mov_s r1, _nanokernel mov_s r1, _nanokernel
ld_s r2, [r1, __tNANO_current_OFFSET] ld_s r2, [r1, __tNANO_current_OFFSET]
#ifndef CONFIG_FIRQ_NO_LPCC
/* assumption: r1 contains _nanokernel, r2 contains the current thread */
/* restore LP_START/LP_COUNT/LP_END variables */
/* cannot load lp_count from memory */
ld_s r3, [r1, __tNANO_firq_regs_OFFSET + __tFirqRegs_lp_count_OFFSET]
mov lp_count, r3
ld_s r3, [r1, __tNANO_firq_regs_OFFSET + __tFirqRegs_lp_start_OFFSET]
sr r3, [_ARC_V2_LP_START]
ld_s r3, [r1, __tNANO_firq_regs_OFFSET + __tFirqRegs_lp_end_OFFSET]
sr r3, [_ARC_V2_LP_END]
/* exiting here: r1/r2 unchanged, r0/r3 destroyed */
#endif
#if CONFIG_NUM_IRQ_PRIO_LEVELS > 1 #if CONFIG_NUM_IRQ_PRIO_LEVELS > 1
/* check if we're a nested interrupt: if so, let the interrupted interrupt /* check if we're a nested interrupt: if so, let the interrupted
* handle the reschedule */ * interrupt handle the reschedule */
lr r3, [_ARC_V2_AUX_IRQ_ACT] lr r3, [_ARC_V2_AUX_IRQ_ACT]
@ -225,9 +214,9 @@ _firq_return_from_coop:
/* /*
* There are only two interrupt lock states: locked and unlocked. When * There are only two interrupt lock states: locked and unlocked. When
* entering _Swap(), they are always locked, so the IE bit is unset in * entering _Swap(), they are always locked, so the IE bit is unset in
* status32. If the incoming thread had them locked recursively, it means * status32. If the incoming thread had them locked recursively, it
* that the IE bit should stay unset. The only time the bit has to change * means that the IE bit should stay unset. The only time the bit
* is if they were not locked recursively. * has to change is if they were not locked recursively.
*/ */
and.f r3, r3, (1 << 4) and.f r3, r3, (1 << 4)
or.nz r0, r0, _ARC_V2_STATUS32_IE or.nz r0, r0, _ARC_V2_STATUS32_IE

View file

@ -39,7 +39,6 @@
/* ARCv2-specific tNANO structure member offsets */ /* ARCv2-specific tNANO structure member offsets */
GEN_OFFSET_SYM(tNANO, rirq_sp); GEN_OFFSET_SYM(tNANO, rirq_sp);
GEN_OFFSET_SYM(tNANO, firq_regs);
#ifdef CONFIG_SYS_POWER_MANAGEMENT #ifdef CONFIG_SYS_POWER_MANAGEMENT
GEN_OFFSET_SYM(tNANO, idle); GEN_OFFSET_SYM(tNANO, idle);
#endif #endif
@ -102,12 +101,6 @@ GEN_OFFSET_SYM(tCalleeSaved, fp);
GEN_OFFSET_SYM(tCalleeSaved, r30); GEN_OFFSET_SYM(tCalleeSaved, r30);
GEN_ABSOLUTE_SYM(__tCalleeSaved_SIZEOF, sizeof(tCalleeSaved)); GEN_ABSOLUTE_SYM(__tCalleeSaved_SIZEOF, sizeof(tCalleeSaved));
/* ARCv2-specific registers-saved-in-FIRQ structure member offsets */
GEN_OFFSET_SYM(tFirqRegs, lp_count);
GEN_OFFSET_SYM(tFirqRegs, lp_start);
GEN_OFFSET_SYM(tFirqRegs, lp_end);
GEN_ABSOLUTE_SYM(__tFirqRegs_SIZEOF, sizeof(tFirqRegs));
/* size of the struct tcs structure sans save area for floating point regs */ /* size of the struct tcs structure sans save area for floating point regs */
GEN_ABSOLUTE_SYM(__tTCS_NOFLOAT_SIZEOF, sizeof(tTCS)); GEN_ABSOLUTE_SYM(__tTCS_NOFLOAT_SIZEOF, sizeof(tTCS));

View file

@ -130,14 +130,6 @@ struct callee_saved {
}; };
typedef struct callee_saved tCalleeSaved; typedef struct callee_saved tCalleeSaved;
/* registers saved by software when taking a FIRQ */
struct firq_regs {
uint32_t lp_count;
uint32_t lp_start;
uint32_t lp_end;
};
typedef struct firq_regs tFirqRegs;
#endif /* _ASMLANGUAGE */ #endif /* _ASMLANGUAGE */
/* Bitmask definitions for the struct tcs->flags bit field */ /* Bitmask definitions for the struct tcs->flags bit field */
@ -226,7 +218,6 @@ struct s_NANO {
* there is no need to track it in _nanokernel. * there is no need to track it in _nanokernel.
*/ */
struct firq_regs firq_regs;
#if defined(CONFIG_NANO_TIMEOUTS) || defined(CONFIG_NANO_TIMERS) #if defined(CONFIG_NANO_TIMEOUTS) || defined(CONFIG_NANO_TIMERS)
sys_dlist_t timeout_q; sys_dlist_t timeout_q;
int32_t task_timeout; int32_t task_timeout;