arch/xtensa: Optimize cache management on context switch
Making context switch cache-coherent in SMP is hard. The KERNEL_COHERENCE handling was conservatively invalidating the stack region of a thread that was being switched in. This was because it might have (1) run on this CPU in the past, but (2) run most recently on a different CPU. In that case we might have stale data still in our local dcache! But this has performance impact in the (very common!) case of a thread being switched out briefly and then back in (e.g. k_sleep() for a small duration). It will come back having lost all of its cached stack context, and will have to fetch all that information back from shared SRAM! Treat this by tracking a "last_cpu" for each thread in the arch part of the thread struct. If we're coming back to the same CPU we left, we know we can skip the invalidate. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
parent
d490358e28
commit
64a3159dee
3 changed files with 18 additions and 6 deletions
|
@ -20,6 +20,14 @@ void *xtensa_init_stack(struct k_thread *thread, int *stack_top,
|
||||||
void (*entry)(void *, void *, void *),
|
void (*entry)(void *, void *, void *),
|
||||||
void *arg1, void *arg2, void *arg3)
|
void *arg1, void *arg2, void *arg3)
|
||||||
{
|
{
|
||||||
|
/* Not-a-cpu ID Ensures that the first time this is run, the
|
||||||
|
* stack will be invalidated. That covers the edge case of
|
||||||
|
* restarting a thread on a stack that had previously been run
|
||||||
|
* on one CPU, but then initialized on this one, and
|
||||||
|
* potentially run THERE and not HERE.
|
||||||
|
*/
|
||||||
|
thread->arch.last_cpu = -1;
|
||||||
|
|
||||||
/* We cheat and shave 16 bytes off, the top four words are the
|
/* We cheat and shave 16 bytes off, the top four words are the
|
||||||
* A0-A3 spill area for the caller of the entry function,
|
* A0-A3 spill area for the caller of the entry function,
|
||||||
* which doesn't exist. It will never be touched, so we
|
* which doesn't exist. It will never be touched, so we
|
||||||
|
|
|
@ -65,6 +65,8 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
|
||||||
void *old_switch_handle,
|
void *old_switch_handle,
|
||||||
struct k_thread *new_thread)
|
struct k_thread *new_thread)
|
||||||
{
|
{
|
||||||
|
int32_t curr_cpu = _current_cpu->id;
|
||||||
|
|
||||||
size_t ostack = old_thread->stack_info.start;
|
size_t ostack = old_thread->stack_info.start;
|
||||||
size_t osz = old_thread->stack_info.size;
|
size_t osz = old_thread->stack_info.size;
|
||||||
size_t osp = (size_t) old_switch_handle;
|
size_t osp = (size_t) old_switch_handle;
|
||||||
|
@ -84,10 +86,9 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
|
||||||
|
|
||||||
/* The "live" area (the region between the switch handle,
|
/* The "live" area (the region between the switch handle,
|
||||||
* which is the stack pointer, and the top of the stack
|
* which is the stack pointer, and the top of the stack
|
||||||
* memory) of the inbound stack needs to be invalidated: it
|
* memory) of the inbound stack needs to be invalidated if we
|
||||||
* may contain data that was modified on another CPU since the
|
* last ran on another cpu: it may contain data that was
|
||||||
* last time this CPU ran the thread, and our cache may be
|
* modified there, and our cache may be stale.
|
||||||
* stale.
|
|
||||||
*
|
*
|
||||||
* The corresponding "dead area" of the inbound stack can be
|
* The corresponding "dead area" of the inbound stack can be
|
||||||
* ignored. We may have cached data in that region, but by
|
* ignored. We may have cached data in that region, but by
|
||||||
|
@ -96,7 +97,10 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
|
||||||
* uninitialized data error) so our stale cache will be
|
* uninitialized data error) so our stale cache will be
|
||||||
* automatically overwritten as needed.
|
* automatically overwritten as needed.
|
||||||
*/
|
*/
|
||||||
z_xtensa_cache_inv((void *)nsp, (nstack + nsz) - nsp);
|
if (curr_cpu != new_thread->arch.last_cpu) {
|
||||||
|
z_xtensa_cache_inv((void *)nsp, (nstack + nsz) - nsp);
|
||||||
|
}
|
||||||
|
old_thread->arch.last_cpu = curr_cpu;
|
||||||
|
|
||||||
/* Dummy threads appear at system initialization, but don't
|
/* Dummy threads appear at system initialization, but don't
|
||||||
* have stack_info data and will never be saved. Ignore.
|
* have stack_info data and will never be saved. Ignore.
|
||||||
|
|
|
@ -21,7 +21,7 @@ struct _callee_saved {
|
||||||
typedef struct _callee_saved _callee_saved_t;
|
typedef struct _callee_saved _callee_saved_t;
|
||||||
|
|
||||||
struct _thread_arch {
|
struct _thread_arch {
|
||||||
char dummy;
|
uint32_t last_cpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct _thread_arch _thread_arch_t;
|
typedef struct _thread_arch _thread_arch_t;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue