kernel: handle thread self-aborts on idle thread
Fixes races where threads on another CPU are joining the exiting thread, since it could still be running when the joiners wake up on a different CPU. Fixes problems where the thread object is still being used by the kernel when the fn_abort() function is called, preventing the thread object from being recycled or freed back to a slab pool. Fixes a race where a thread is aborted from one CPU while it self-aborts on another CPU, that was currently worked around with a busy-wait. Precedent for doing this comes from FreeRTOS, which also performs final thread cleanup in the idle thread. Some logic in z_thread_single_abort() rearranged such that when we release sched_spinlock, the thread object pointer is never dereferenced by the kernel again; join waiters or fn_abort() logic may free it immediately. An assertion added to z_thread_single_abort() to ensure it never gets called with thread == _current outside of an ISR. Some logic has been added to ensure z_thread_single_abort() tasks don't run more than once. Fixes: #26486 Related to: #23063 #23062 Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
e3796b95a2
commit
f5a7e1a108
9 changed files with 159 additions and 74 deletions
|
@ -26,8 +26,6 @@
|
|||
|
||||
void z_impl_k_thread_abort(k_tid_t thread)
|
||||
{
|
||||
z_thread_single_abort(thread);
|
||||
|
||||
if (_current == thread) {
|
||||
if (arch_is_in_isr()) {
|
||||
/* ARM is unlike most arches in that this is true
|
||||
|
@ -43,10 +41,12 @@ void z_impl_k_thread_abort(k_tid_t thread)
|
|||
*/
|
||||
SCB->ICSR |= SCB_ICSR_PENDSVSET_Msk;
|
||||
} else {
|
||||
z_swap_unlocked();
|
||||
z_self_abort(); /* Never returns */
|
||||
}
|
||||
}
|
||||
|
||||
z_thread_single_abort(thread);
|
||||
|
||||
/* The abort handler might have altered the ready queue. */
|
||||
z_reschedule_unlocked();
|
||||
}
|
||||
|
|
|
@ -490,8 +490,6 @@ void z_impl_k_thread_abort(k_tid_t thread)
|
|||
|
||||
key = irq_lock();
|
||||
|
||||
z_thread_single_abort(thread);
|
||||
|
||||
if (_current == thread) {
|
||||
if (tstatus->aborted == 0) { /* LCOV_EXCL_BR_LINE */
|
||||
tstatus->aborted = 1;
|
||||
|
@ -510,13 +508,16 @@ void z_impl_k_thread_abort(k_tid_t thread)
|
|||
__func__);
|
||||
|
||||
if (arch_is_in_isr()) {
|
||||
z_thread_single_abort(thread);
|
||||
return;
|
||||
}
|
||||
|
||||
(void)z_swap_irqlock(key);
|
||||
z_self_abort();
|
||||
CODE_UNREACHABLE; /* LCOV_EXCL_LINE */
|
||||
}
|
||||
|
||||
z_thread_single_abort(thread);
|
||||
|
||||
if (tstatus->aborted == 0) {
|
||||
PC_DEBUG("%s aborting now [%i] %i\n",
|
||||
__func__,
|
||||
|
|
|
@ -58,12 +58,9 @@
|
|||
/* Thread is suspended */
|
||||
#define _THREAD_SUSPENDED (BIT(4))
|
||||
|
||||
/* Thread is being aborted (SMP only) */
|
||||
/* Thread is being aborted */
|
||||
#define _THREAD_ABORTING (BIT(5))
|
||||
|
||||
/* Thread was aborted in interrupt context (SMP only) */
|
||||
#define _THREAD_ABORTED_IN_ISR (BIT(6))
|
||||
|
||||
/* Thread is present in the ready queue */
|
||||
#define _THREAD_QUEUED (BIT(7))
|
||||
|
||||
|
@ -112,6 +109,9 @@ struct _cpu {
|
|||
/* one assigned idle thread per CPU */
|
||||
struct k_thread *idle_thread;
|
||||
|
||||
/* If non-null, self-aborted thread that needs cleanup */
|
||||
struct k_thread *pending_abort;
|
||||
|
||||
#if (CONFIG_NUM_METAIRQ_PRIORITIES > 0) && (CONFIG_NUM_COOP_PRIORITIES > 0)
|
||||
/* Coop thread preempted by current metairq, or NULL */
|
||||
struct k_thread *metairq_preempted;
|
||||
|
|
|
@ -164,6 +164,7 @@ config IDLE_STACK_SIZE
|
|||
default 2048 if COVERAGE_GCOV
|
||||
default 1024 if XTENSA
|
||||
default 512 if RISCV
|
||||
default 384 if DYNAMIC_OBJECTS
|
||||
default 320 if ARC || (ARM && CPU_HAS_FPU)
|
||||
default 256
|
||||
help
|
||||
|
|
|
@ -11,6 +11,10 @@
|
|||
#include <wait_q.h>
|
||||
#include <power/power.h>
|
||||
#include <stdbool.h>
|
||||
#include <logging/log.h>
|
||||
#include <ksched.h>
|
||||
|
||||
LOG_MODULE_DECLARE(os);
|
||||
|
||||
#ifdef CONFIG_TICKLESS_IDLE_THRESH
|
||||
#define IDLE_THRESH CONFIG_TICKLESS_IDLE_THRESH
|
||||
|
@ -137,9 +141,10 @@ void z_sys_power_save_idle_exit(int32_t ticks)
|
|||
#define IDLE_YIELD_IF_COOP() do { } while (false)
|
||||
#endif
|
||||
|
||||
void idle(void *unused1, void *unused2, void *unused3)
|
||||
void idle(void *p1, void *unused2, void *unused3)
|
||||
{
|
||||
ARG_UNUSED(unused1);
|
||||
struct _cpu *cpu = p1;
|
||||
|
||||
ARG_UNUSED(unused2);
|
||||
ARG_UNUSED(unused3);
|
||||
|
||||
|
@ -152,6 +157,38 @@ void idle(void *unused1, void *unused2, void *unused3)
|
|||
#endif
|
||||
|
||||
while (true) {
|
||||
/* Lock interrupts to atomically check if to_abort is non-NULL,
|
||||
* and if so clear it
|
||||
*/
|
||||
int key = arch_irq_lock();
|
||||
struct k_thread *to_abort = cpu->pending_abort;
|
||||
|
||||
if (to_abort) {
|
||||
cpu->pending_abort = NULL;
|
||||
arch_irq_unlock(key);
|
||||
|
||||
/* Safe to unlock interrupts here. We've atomically
|
||||
* checked and stashed cpu->pending_abort into a stack
|
||||
* variable. If we get preempted here and another
|
||||
* thread aborts, cpu->pending abort will get set
|
||||
* again and we'll handle it when the loop iteration
|
||||
* is continued below.
|
||||
*/
|
||||
LOG_DBG("idle %p aborting thread %p",
|
||||
_current, to_abort);
|
||||
|
||||
z_thread_single_abort(to_abort);
|
||||
|
||||
/* We have to invoke this scheduler now. If we got
|
||||
* here, the idle thread preempted everything else
|
||||
* in order to abort the thread, and we now need to
|
||||
* figure out what to do next, it's not necessarily
|
||||
* the case that there are no other runnable threads.
|
||||
*/
|
||||
z_reschedule_unlocked();
|
||||
continue;
|
||||
}
|
||||
arch_irq_unlock(key);
|
||||
#if SMP_FALLBACK
|
||||
k_busy_wait(100);
|
||||
k_yield();
|
||||
|
|
|
@ -64,6 +64,7 @@ void z_sched_ipi(void);
|
|||
void z_sched_start(struct k_thread *thread);
|
||||
void z_ready_thread(struct k_thread *thread);
|
||||
void z_thread_single_abort(struct k_thread *thread);
|
||||
FUNC_NORETURN void z_self_abort(void);
|
||||
|
||||
static inline void z_pend_curr_unlocked(_wait_q_t *wait_q, k_timeout_t timeout)
|
||||
{
|
||||
|
|
|
@ -294,8 +294,9 @@ static void init_idle_thread(int i)
|
|||
#endif /* CONFIG_THREAD_NAME */
|
||||
|
||||
z_setup_new_thread(thread, stack,
|
||||
CONFIG_IDLE_STACK_SIZE, idle, NULL, NULL, NULL,
|
||||
K_LOWEST_THREAD_PRIO, K_ESSENTIAL, tname);
|
||||
CONFIG_IDLE_STACK_SIZE, idle, &_kernel.cpus[i],
|
||||
NULL, NULL, K_LOWEST_THREAD_PRIO, K_ESSENTIAL,
|
||||
tname);
|
||||
z_mark_thread_as_started(thread);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
|
116
kernel/sched.c
116
kernel/sched.c
|
@ -182,7 +182,16 @@ static ALWAYS_INLINE struct k_thread *_priq_dumb_mask_best(sys_dlist_t *pq)
|
|||
|
||||
static ALWAYS_INLINE struct k_thread *next_up(void)
|
||||
{
|
||||
struct k_thread *thread = _priq_run_best(&_kernel.ready_q.runq);
|
||||
struct k_thread *thread;
|
||||
|
||||
/* If a thread self-aborted we need the idle thread to clean it up
|
||||
* before any other thread can run on this CPU
|
||||
*/
|
||||
if (_current_cpu->pending_abort != NULL) {
|
||||
return _current_cpu->idle_thread;
|
||||
}
|
||||
|
||||
thread = _priq_run_best(&_kernel.ready_q.runq);
|
||||
|
||||
#if (CONFIG_NUM_METAIRQ_PRIORITIES > 0) && (CONFIG_NUM_COOP_PRIORITIES > 0)
|
||||
/* MetaIRQs must always attempt to return back to a
|
||||
|
@ -366,7 +375,7 @@ static void update_metairq_preempt(struct k_thread *thread)
|
|||
!is_preempt(_current)) {
|
||||
/* Record new preemption */
|
||||
_current_cpu->metairq_preempted = _current;
|
||||
} else if (!is_metairq(thread)) {
|
||||
} else if (!is_metairq(thread) && !z_is_idle_thread_object(thread)) {
|
||||
/* Returning from existing preemption */
|
||||
_current_cpu->metairq_preempted = NULL;
|
||||
}
|
||||
|
@ -497,12 +506,25 @@ static _wait_q_t *pended_on(struct k_thread *thread)
|
|||
|
||||
void z_thread_single_abort(struct k_thread *thread)
|
||||
{
|
||||
void (*fn_abort)(void) = NULL;
|
||||
|
||||
__ASSERT(!(thread->base.user_options & K_ESSENTIAL),
|
||||
"essential thread aborted");
|
||||
__ASSERT(thread != _current || arch_is_in_isr(),
|
||||
"self-abort detected");
|
||||
|
||||
if (thread->fn_abort != NULL) {
|
||||
thread->fn_abort();
|
||||
/* Prevent any of the further logic in this function from running more
|
||||
* than once
|
||||
*/
|
||||
k_spinlock_key_t key = k_spin_lock(&sched_spinlock);
|
||||
if ((thread->base.thread_state &
|
||||
(_THREAD_ABORTING | _THREAD_DEAD)) != 0) {
|
||||
LOG_DBG("Thread %p already dead or on the way out", thread);
|
||||
k_spin_unlock(&sched_spinlock, key);
|
||||
return;
|
||||
}
|
||||
thread->base.thread_state |= _THREAD_ABORTING;
|
||||
k_spin_unlock(&sched_spinlock, key);
|
||||
|
||||
(void)z_abort_thread_timeout(thread);
|
||||
|
||||
|
@ -511,6 +533,7 @@ void z_thread_single_abort(struct k_thread *thread)
|
|||
}
|
||||
|
||||
LOCKED(&sched_spinlock) {
|
||||
LOG_DBG("Cleanup aborting thread %p", thread);
|
||||
struct k_thread *waiter;
|
||||
|
||||
if (z_is_thread_ready(thread)) {
|
||||
|
@ -529,37 +552,6 @@ void z_thread_single_abort(struct k_thread *thread)
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t mask = _THREAD_DEAD;
|
||||
|
||||
/* If the abort is happening in interrupt context,
|
||||
* that means that execution will never return to the
|
||||
* thread's stack and that the abort is known to be
|
||||
* complete. Otherwise the thread still runs a bit
|
||||
* until it can swap, requiring a delay.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_SMP) && arch_is_in_isr()) {
|
||||
mask |= _THREAD_ABORTED_IN_ISR;
|
||||
}
|
||||
|
||||
thread->base.thread_state |= mask;
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* Clear initialized state so that this thread object may be
|
||||
* re-used and triggers errors if API calls are made on it from
|
||||
* user threads
|
||||
*
|
||||
* For a whole host of reasons this is not ideal and should be
|
||||
* iterated on.
|
||||
*/
|
||||
z_object_uninit(thread->stack_obj);
|
||||
z_object_uninit(thread);
|
||||
|
||||
/* Revoke permissions on thread's ID so that it may be
|
||||
* recycled
|
||||
*/
|
||||
z_thread_perms_all_clear(thread);
|
||||
#endif
|
||||
|
||||
/* Wake everybody up who was trying to join with this thread.
|
||||
* A reschedule is invoked later by k_thread_abort().
|
||||
*/
|
||||
|
@ -572,8 +564,49 @@ void z_thread_single_abort(struct k_thread *thread)
|
|||
arch_thread_return_value_set(waiter, 0);
|
||||
ready_thread(waiter);
|
||||
}
|
||||
|
||||
if (z_is_idle_thread_object(_current)) {
|
||||
update_cache(1);
|
||||
}
|
||||
|
||||
thread->base.thread_state |= _THREAD_DEAD;
|
||||
|
||||
/* Read this here from the thread struct now instead of
|
||||
* after we unlock
|
||||
*/
|
||||
fn_abort = thread->fn_abort;
|
||||
|
||||
/* Keep inside the spinlock as these may use the contents
|
||||
* of the thread object. As soon as we release this spinlock,
|
||||
* the thread object could be destroyed at any time.
|
||||
*/
|
||||
sys_trace_thread_abort(thread);
|
||||
z_thread_monitor_exit(thread);
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* Revoke permissions on thread's ID so that it may be
|
||||
* recycled
|
||||
*/
|
||||
z_thread_perms_all_clear(thread);
|
||||
|
||||
/* Clear initialized state so that this thread object may be
|
||||
* re-used and triggers errors if API calls are made on it from
|
||||
* user threads
|
||||
*/
|
||||
z_object_uninit(thread->stack_obj);
|
||||
z_object_uninit(thread);
|
||||
#endif
|
||||
/* Kernel should never look at the thread object again past
|
||||
* this point unless another thread API is called. If the
|
||||
* object doesn't get corrupted, we'll catch other
|
||||
* k_thread_abort()s on this object, although this is
|
||||
* somewhat undefined behavoir. It must be safe to call
|
||||
* k_thread_create() or free the object at this point.
|
||||
*/
|
||||
}
|
||||
|
||||
if (fn_abort != NULL) {
|
||||
fn_abort();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1343,9 +1376,6 @@ void z_sched_abort(struct k_thread *thread)
|
|||
* it locally. Not all architectures support that, alas. If
|
||||
* we don't have it, we need to wait for some other interrupt.
|
||||
*/
|
||||
key = k_spin_lock(&sched_spinlock);
|
||||
thread->base.thread_state |= _THREAD_ABORTING;
|
||||
k_spin_unlock(&sched_spinlock, key);
|
||||
#ifdef CONFIG_SCHED_IPI_SUPPORTED
|
||||
arch_sched_ipi();
|
||||
#endif
|
||||
|
@ -1369,16 +1399,6 @@ void z_sched_abort(struct k_thread *thread)
|
|||
k_busy_wait(100);
|
||||
}
|
||||
}
|
||||
|
||||
/* If the thread self-aborted (e.g. its own exit raced with
|
||||
* this external abort) then even though it is flagged DEAD,
|
||||
* it's still running until its next swap and thus the thread
|
||||
* object is still in use. We have to resort to a fallback
|
||||
* delay in that circumstance.
|
||||
*/
|
||||
if ((thread->base.thread_state & _THREAD_ABORTED_IN_ISR) == 0U) {
|
||||
k_busy_wait(THREAD_ABORT_DELAY_US);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -21,25 +21,49 @@
|
|||
#include <ksched.h>
|
||||
#include <sys/__assert.h>
|
||||
#include <syscall_handler.h>
|
||||
#include <logging/log.h>
|
||||
LOG_MODULE_DECLARE(os);
|
||||
|
||||
FUNC_NORETURN void z_self_abort(void)
|
||||
{
|
||||
/* Self-aborting threads don't clean themselves up, we
|
||||
* have the idle thread for the current CPU do it.
|
||||
*/
|
||||
int key;
|
||||
struct _cpu *cpu;
|
||||
|
||||
/* Lock local IRQs to prevent us from migrating to another CPU
|
||||
* while we set this up
|
||||
*/
|
||||
key = arch_irq_lock();
|
||||
cpu = _current_cpu;
|
||||
__ASSERT(cpu->pending_abort == NULL, "already have a thread to abort");
|
||||
cpu->pending_abort = _current;
|
||||
|
||||
LOG_DBG("%p self-aborting, handle on idle thread %p",
|
||||
_current, cpu->idle_thread);
|
||||
|
||||
k_thread_suspend(_current);
|
||||
z_swap_irqlock(key);
|
||||
__ASSERT(false, "should never get here");
|
||||
CODE_UNREACHABLE;
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_ARCH_HAS_THREAD_ABORT)
|
||||
void z_impl_k_thread_abort(k_tid_t thread)
|
||||
{
|
||||
if (thread == _current && !arch_is_in_isr()) {
|
||||
/* Thread is self-exiting, idle thread on this CPU will do
|
||||
* the cleanup
|
||||
*/
|
||||
z_self_abort();
|
||||
}
|
||||
|
||||
z_thread_single_abort(thread);
|
||||
|
||||
/* If we're in an interrupt handler, we reschedule on the way out
|
||||
* anyway, nothing needs to be done here.
|
||||
*/
|
||||
if (!arch_is_in_isr()) {
|
||||
if (thread == _current) {
|
||||
/* Direct use of swap: reschedule doesn't have a test
|
||||
* for "is _current dead" and we don't want one for
|
||||
* performance reasons.
|
||||
*/
|
||||
z_swap_unlocked();
|
||||
} else {
|
||||
/* Don't need to do this if we're in an ISR */
|
||||
z_reschedule_unlocked();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue