diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h index 3537a24d21d..311df0f2f06 100644 --- a/kernel/include/kswap.h +++ b/kernel/include/kswap.h @@ -29,6 +29,26 @@ void z_smp_release_global_lock(struct k_thread *thread); /* context switching and scheduling-related routines */ #ifdef CONFIG_USE_SWITCH +/* There is an unavoidable SMP race when threads swap -- their thread + * record is in the queue (and visible to other CPUs) before + * arch_switch() finishes saving state. We must spin for the switch + * handle before entering a new thread. See docs on arch_switch(). + * + * Note: future SMP architectures may need a fence/barrier or cache + * invalidation here. Current ones don't, and sadly Zephyr doesn't + * have a framework for that yet. + */ +static inline void wait_for_switch(struct k_thread *thread) +{ +#ifdef CONFIG_SMP + volatile void **shp = (void *)&thread->switch_handle; + + while (*shp == NULL) { + k_busy_wait(1); + } +#endif +} + /* New style context switching. arch_switch() is a lower level * primitive that doesn't know about the scheduler or return value. * Needed for SMP, where the scheduler requires spinlocking that we @@ -57,8 +77,25 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key, k_spin_release(lock); } +#ifdef CONFIG_SMP + /* Null out the switch handle, see wait_for_switch() above. + * Note that we set it back to a non-null value if we are not + * switching! The value itself doesn't matter, because by + * definition _current is running and has no saved state. + */ + volatile void **shp = (void *)&old_thread->switch_handle; + + *shp = NULL; +#endif + new_thread = z_get_next_ready_thread(); +#ifdef CONFIG_SMP + if (new_thread == old_thread) { + *shp = old_thread; + } +#endif + if (new_thread != old_thread) { sys_trace_thread_switched_out(); #ifdef CONFIG_TIMESLICING @@ -77,6 +114,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key, } #endif _current = new_thread; + wait_for_switch(new_thread); arch_switch(new_thread->switch_handle, &old_thread->switch_handle); diff --git a/kernel/sched.c b/kernel/sched.c index d1b923b7848..f04bcbb0b53 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -757,6 +757,8 @@ void *z_get_next_switch_handle(void *interrupted) !IS_ENABLED(CONFIG_SCHED_IPI_SUPPORTED)) { z_sched_ipi(); } + + wait_for_switch(_current); return _current->switch_handle; } #endif diff --git a/kernel/thread.c b/kernel/thread.c index 527980d34c1..b57dc31d96b 100644 --- a/kernel/thread.c +++ b/kernel/thread.c @@ -525,6 +525,11 @@ void z_setup_new_thread(struct k_thread *new_thread, arch_new_thread(new_thread, stack, stack_size, entry, p1, p2, p3, prio, options); +#ifdef CONFIG_SMP + /* switch handle must be non-null except when inside z_swap() */ + new_thread->switch_handle = new_thread; +#endif + #ifdef CONFIG_THREAD_USERSPACE_LOCAL_DATA #ifndef CONFIG_THREAD_USERSPACE_LOCAL_DATA_ARCH_DEFER_SETUP /* don't set again if the arch's own code in arch_new_thread() has