kernel: handle thread self-aborts on idle thread

Fixes races where threads on another CPU are joining the
exiting thread, since it could still be running when
the joiners wake up on a different CPU.

Fixes problems where the thread object is still being
used by the kernel when the fn_abort() function is called,
preventing the thread object from being recycled or
freed back to a slab pool.

Fixes a race where a thread is aborted from one CPU while
it self-aborts on another CPU, that was currently worked
around with a busy-wait.

Precedent for doing this comes from FreeRTOS, which also
performs final thread cleanup in the idle thread.

Some logic in z_thread_single_abort() rearranged such that
when we release sched_spinlock, the thread object pointer
is never dereferenced by the kernel again; join waiters
or fn_abort() logic may free it immediately.

An assertion added to z_thread_single_abort() to ensure
it never gets called with thread == _current outside of an ISR.

Some logic has been added to ensure z_thread_single_abort()
tasks don't run more than once.

Fixes: #26486
Related to: #23063 #23062

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
Andrew Boie 2020-09-02 09:20:38 -07:00 committed by Anas Nashif
commit f5a7e1a108
9 changed files with 159 additions and 74 deletions

View file

@ -26,8 +26,6 @@
void z_impl_k_thread_abort(k_tid_t thread)
{
z_thread_single_abort(thread);
if (_current == thread) {
if (arch_is_in_isr()) {
/* ARM is unlike most arches in that this is true
@ -43,10 +41,12 @@ void z_impl_k_thread_abort(k_tid_t thread)
*/
SCB->ICSR |= SCB_ICSR_PENDSVSET_Msk;
} else {
z_swap_unlocked();
z_self_abort(); /* Never returns */
}
}
z_thread_single_abort(thread);
/* The abort handler might have altered the ready queue. */
z_reschedule_unlocked();
}

View file

@ -490,8 +490,6 @@ void z_impl_k_thread_abort(k_tid_t thread)
key = irq_lock();
z_thread_single_abort(thread);
if (_current == thread) {
if (tstatus->aborted == 0) { /* LCOV_EXCL_BR_LINE */
tstatus->aborted = 1;
@ -510,13 +508,16 @@ void z_impl_k_thread_abort(k_tid_t thread)
__func__);
if (arch_is_in_isr()) {
z_thread_single_abort(thread);
return;
}
(void)z_swap_irqlock(key);
z_self_abort();
CODE_UNREACHABLE; /* LCOV_EXCL_LINE */
}
z_thread_single_abort(thread);
if (tstatus->aborted == 0) {
PC_DEBUG("%s aborting now [%i] %i\n",
__func__,

View file

@ -58,12 +58,9 @@
/* Thread is suspended */
#define _THREAD_SUSPENDED (BIT(4))
/* Thread is being aborted (SMP only) */
/* Thread is being aborted */
#define _THREAD_ABORTING (BIT(5))
/* Thread was aborted in interrupt context (SMP only) */
#define _THREAD_ABORTED_IN_ISR (BIT(6))
/* Thread is present in the ready queue */
#define _THREAD_QUEUED (BIT(7))
@ -112,6 +109,9 @@ struct _cpu {
/* one assigned idle thread per CPU */
struct k_thread *idle_thread;
/* If non-null, self-aborted thread that needs cleanup */
struct k_thread *pending_abort;
#if (CONFIG_NUM_METAIRQ_PRIORITIES > 0) && (CONFIG_NUM_COOP_PRIORITIES > 0)
/* Coop thread preempted by current metairq, or NULL */
struct k_thread *metairq_preempted;

View file

@ -164,6 +164,7 @@ config IDLE_STACK_SIZE
default 2048 if COVERAGE_GCOV
default 1024 if XTENSA
default 512 if RISCV
default 384 if DYNAMIC_OBJECTS
default 320 if ARC || (ARM && CPU_HAS_FPU)
default 256
help

View file

@ -11,6 +11,10 @@
#include <wait_q.h>
#include <power/power.h>
#include <stdbool.h>
#include <logging/log.h>
#include <ksched.h>
LOG_MODULE_DECLARE(os);
#ifdef CONFIG_TICKLESS_IDLE_THRESH
#define IDLE_THRESH CONFIG_TICKLESS_IDLE_THRESH
@ -137,9 +141,10 @@ void z_sys_power_save_idle_exit(int32_t ticks)
#define IDLE_YIELD_IF_COOP() do { } while (false)
#endif
void idle(void *unused1, void *unused2, void *unused3)
void idle(void *p1, void *unused2, void *unused3)
{
ARG_UNUSED(unused1);
struct _cpu *cpu = p1;
ARG_UNUSED(unused2);
ARG_UNUSED(unused3);
@ -152,6 +157,38 @@ void idle(void *unused1, void *unused2, void *unused3)
#endif
while (true) {
/* Lock interrupts to atomically check if to_abort is non-NULL,
* and if so clear it
*/
int key = arch_irq_lock();
struct k_thread *to_abort = cpu->pending_abort;
if (to_abort) {
cpu->pending_abort = NULL;
arch_irq_unlock(key);
/* Safe to unlock interrupts here. We've atomically
* checked and stashed cpu->pending_abort into a stack
* variable. If we get preempted here and another
* thread aborts, cpu->pending abort will get set
* again and we'll handle it when the loop iteration
* is continued below.
*/
LOG_DBG("idle %p aborting thread %p",
_current, to_abort);
z_thread_single_abort(to_abort);
/* We have to invoke this scheduler now. If we got
* here, the idle thread preempted everything else
* in order to abort the thread, and we now need to
* figure out what to do next, it's not necessarily
* the case that there are no other runnable threads.
*/
z_reschedule_unlocked();
continue;
}
arch_irq_unlock(key);
#if SMP_FALLBACK
k_busy_wait(100);
k_yield();

View file

@ -64,6 +64,7 @@ void z_sched_ipi(void);
void z_sched_start(struct k_thread *thread);
void z_ready_thread(struct k_thread *thread);
void z_thread_single_abort(struct k_thread *thread);
FUNC_NORETURN void z_self_abort(void);
static inline void z_pend_curr_unlocked(_wait_q_t *wait_q, k_timeout_t timeout)
{

View file

@ -294,8 +294,9 @@ static void init_idle_thread(int i)
#endif /* CONFIG_THREAD_NAME */
z_setup_new_thread(thread, stack,
CONFIG_IDLE_STACK_SIZE, idle, NULL, NULL, NULL,
K_LOWEST_THREAD_PRIO, K_ESSENTIAL, tname);
CONFIG_IDLE_STACK_SIZE, idle, &_kernel.cpus[i],
NULL, NULL, K_LOWEST_THREAD_PRIO, K_ESSENTIAL,
tname);
z_mark_thread_as_started(thread);
#ifdef CONFIG_SMP

View file

@ -182,7 +182,16 @@ static ALWAYS_INLINE struct k_thread *_priq_dumb_mask_best(sys_dlist_t *pq)
static ALWAYS_INLINE struct k_thread *next_up(void)
{
struct k_thread *thread = _priq_run_best(&_kernel.ready_q.runq);
struct k_thread *thread;
/* If a thread self-aborted we need the idle thread to clean it up
* before any other thread can run on this CPU
*/
if (_current_cpu->pending_abort != NULL) {
return _current_cpu->idle_thread;
}
thread = _priq_run_best(&_kernel.ready_q.runq);
#if (CONFIG_NUM_METAIRQ_PRIORITIES > 0) && (CONFIG_NUM_COOP_PRIORITIES > 0)
/* MetaIRQs must always attempt to return back to a
@ -366,7 +375,7 @@ static void update_metairq_preempt(struct k_thread *thread)
!is_preempt(_current)) {
/* Record new preemption */
_current_cpu->metairq_preempted = _current;
} else if (!is_metairq(thread)) {
} else if (!is_metairq(thread) && !z_is_idle_thread_object(thread)) {
/* Returning from existing preemption */
_current_cpu->metairq_preempted = NULL;
}
@ -497,12 +506,25 @@ static _wait_q_t *pended_on(struct k_thread *thread)
void z_thread_single_abort(struct k_thread *thread)
{
void (*fn_abort)(void) = NULL;
__ASSERT(!(thread->base.user_options & K_ESSENTIAL),
"essential thread aborted");
__ASSERT(thread != _current || arch_is_in_isr(),
"self-abort detected");
if (thread->fn_abort != NULL) {
thread->fn_abort();
/* Prevent any of the further logic in this function from running more
* than once
*/
k_spinlock_key_t key = k_spin_lock(&sched_spinlock);
if ((thread->base.thread_state &
(_THREAD_ABORTING | _THREAD_DEAD)) != 0) {
LOG_DBG("Thread %p already dead or on the way out", thread);
k_spin_unlock(&sched_spinlock, key);
return;
}
thread->base.thread_state |= _THREAD_ABORTING;
k_spin_unlock(&sched_spinlock, key);
(void)z_abort_thread_timeout(thread);
@ -511,6 +533,7 @@ void z_thread_single_abort(struct k_thread *thread)
}
LOCKED(&sched_spinlock) {
LOG_DBG("Cleanup aborting thread %p", thread);
struct k_thread *waiter;
if (z_is_thread_ready(thread)) {
@ -529,37 +552,6 @@ void z_thread_single_abort(struct k_thread *thread)
}
}
uint32_t mask = _THREAD_DEAD;
/* If the abort is happening in interrupt context,
* that means that execution will never return to the
* thread's stack and that the abort is known to be
* complete. Otherwise the thread still runs a bit
* until it can swap, requiring a delay.
*/
if (IS_ENABLED(CONFIG_SMP) && arch_is_in_isr()) {
mask |= _THREAD_ABORTED_IN_ISR;
}
thread->base.thread_state |= mask;
#ifdef CONFIG_USERSPACE
/* Clear initialized state so that this thread object may be
* re-used and triggers errors if API calls are made on it from
* user threads
*
* For a whole host of reasons this is not ideal and should be
* iterated on.
*/
z_object_uninit(thread->stack_obj);
z_object_uninit(thread);
/* Revoke permissions on thread's ID so that it may be
* recycled
*/
z_thread_perms_all_clear(thread);
#endif
/* Wake everybody up who was trying to join with this thread.
* A reschedule is invoked later by k_thread_abort().
*/
@ -572,8 +564,49 @@ void z_thread_single_abort(struct k_thread *thread)
arch_thread_return_value_set(waiter, 0);
ready_thread(waiter);
}
if (z_is_idle_thread_object(_current)) {
update_cache(1);
}
thread->base.thread_state |= _THREAD_DEAD;
/* Read this here from the thread struct now instead of
* after we unlock
*/
fn_abort = thread->fn_abort;
/* Keep inside the spinlock as these may use the contents
* of the thread object. As soon as we release this spinlock,
* the thread object could be destroyed at any time.
*/
sys_trace_thread_abort(thread);
z_thread_monitor_exit(thread);
#ifdef CONFIG_USERSPACE
/* Revoke permissions on thread's ID so that it may be
* recycled
*/
z_thread_perms_all_clear(thread);
/* Clear initialized state so that this thread object may be
* re-used and triggers errors if API calls are made on it from
* user threads
*/
z_object_uninit(thread->stack_obj);
z_object_uninit(thread);
#endif
/* Kernel should never look at the thread object again past
* this point unless another thread API is called. If the
* object doesn't get corrupted, we'll catch other
* k_thread_abort()s on this object, although this is
* somewhat undefined behavoir. It must be safe to call
* k_thread_create() or free the object at this point.
*/
}
if (fn_abort != NULL) {
fn_abort();
}
}
@ -1343,9 +1376,6 @@ void z_sched_abort(struct k_thread *thread)
* it locally. Not all architectures support that, alas. If
* we don't have it, we need to wait for some other interrupt.
*/
key = k_spin_lock(&sched_spinlock);
thread->base.thread_state |= _THREAD_ABORTING;
k_spin_unlock(&sched_spinlock, key);
#ifdef CONFIG_SCHED_IPI_SUPPORTED
arch_sched_ipi();
#endif
@ -1369,16 +1399,6 @@ void z_sched_abort(struct k_thread *thread)
k_busy_wait(100);
}
}
/* If the thread self-aborted (e.g. its own exit raced with
* this external abort) then even though it is flagged DEAD,
* it's still running until its next swap and thus the thread
* object is still in use. We have to resort to a fallback
* delay in that circumstance.
*/
if ((thread->base.thread_state & _THREAD_ABORTED_IN_ISR) == 0U) {
k_busy_wait(THREAD_ABORT_DELAY_US);
}
}
#endif

View file

@ -21,25 +21,49 @@
#include <ksched.h>
#include <sys/__assert.h>
#include <syscall_handler.h>
#include <logging/log.h>
LOG_MODULE_DECLARE(os);
FUNC_NORETURN void z_self_abort(void)
{
/* Self-aborting threads don't clean themselves up, we
* have the idle thread for the current CPU do it.
*/
int key;
struct _cpu *cpu;
/* Lock local IRQs to prevent us from migrating to another CPU
* while we set this up
*/
key = arch_irq_lock();
cpu = _current_cpu;
__ASSERT(cpu->pending_abort == NULL, "already have a thread to abort");
cpu->pending_abort = _current;
LOG_DBG("%p self-aborting, handle on idle thread %p",
_current, cpu->idle_thread);
k_thread_suspend(_current);
z_swap_irqlock(key);
__ASSERT(false, "should never get here");
CODE_UNREACHABLE;
}
#if !defined(CONFIG_ARCH_HAS_THREAD_ABORT)
void z_impl_k_thread_abort(k_tid_t thread)
{
if (thread == _current && !arch_is_in_isr()) {
/* Thread is self-exiting, idle thread on this CPU will do
* the cleanup
*/
z_self_abort();
}
z_thread_single_abort(thread);
/* If we're in an interrupt handler, we reschedule on the way out
* anyway, nothing needs to be done here.
*/
if (!arch_is_in_isr()) {
if (thread == _current) {
/* Direct use of swap: reschedule doesn't have a test
* for "is _current dead" and we don't want one for
* performance reasons.
*/
z_swap_unlocked();
} else {
/* Don't need to do this if we're in an ISR */
z_reschedule_unlocked();
}
}
}
#endif