zephyr/arch/posix/core/posix_core.c
Alberto Escolar Piedras c044b2e916 posix arch: Fix very rare segfault on program termination
In some very rare cases (< 1/1000 runs), in very loaded machines,
a race in the glibc pthread_cancel() seems to be triggered.

In this the cancelled thread cleanup overtakes the pthread_cancel()
code, and frees the pthread structure before pthread_cancel()
has finished, resulting in a dereference into already
free'd memory, and therefore a segfault.
Calling pthread_cancel() during cleanup is not required beyond
preventing a valgrind memory leak report (all threads will be
stopped immediately on exit).
Therefore we stop doing this, to avoid this very rare crashes.

This issue was reproduced in Ubuntu 22.04, with its default
gcc 11.3.0 and glibc 2.35.
The issue may also have been seen very rarely in Zephyr's CI.

Signed-off-by: Alberto Escolar Piedras <alberto.escolar.piedras@nordicsemi.no>
2023-07-27 17:02:36 +02:00

540 lines
15 KiB
C

/*
* Copyright (c) 2017 Oticon A/S
* Copyright (c) 2023 Nordic Semiconductor ASA
*
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Here is where things actually happen for the POSIX arch
*
* We isolate all functions here, to ensure they can be compiled as
* independently as possible to the remainder of Zephyr to avoid name clashes
* as Zephyr does provide functions with the same names as the POSIX threads
* functions
*/
/**
* Principle of operation:
*
* The Zephyr OS and its app run as a set of native pthreads.
* The Zephyr OS only sees one of this thread executing at a time.
* Which is running is controlled using {cond|mtx}_threads and
* currently_allowed_thread.
*
* The main part of the execution of each thread will occur in a fully
* synchronous and deterministic manner, and only when commanded by the Zephyr
* kernel.
* But the creation of a thread will spawn a new pthread whose start
* is asynchronous to the rest, until synchronized in posix_wait_until_allowed()
* below.
* Similarly aborting and canceling threads execute a tail in a quite
* asynchronous manner.
*
* This implementation is meant to be portable in between POSIX systems.
* A table (threads_table) is used to abstract the native pthreads.
* And index in this table is used to identify threads in the IF to the kernel.
*
*/
#include <pthread.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "posix_core.h"
#include "posix_arch_internal.h"
#define PREFIX "POSIX arch core: "
#define ERPREFIX PREFIX"error on "
#define NO_MEM_ERR PREFIX"Can't allocate memory\n"
#define PC_ENABLE_CANCEL 0 /* See Note.c1 */
#define PC_ALLOC_CHUNK_SIZE 64
#define PC_REUSE_ABORTED_ENTRIES 0
/* tests/kernel/threads/scheduling/schedule_api fails when setting
* PC_REUSE_ABORTED_ENTRIES => don't set it by now
*/
static int threads_table_size;
struct threads_table_el {
enum {NOTUSED = 0, USED, ABORTING, ABORTED, FAILED} state;
bool running; /* Is this the currently running thread */
pthread_t thread; /* Actual pthread_t as returned by native kernel */
int thead_cnt; /* For debugging: Unique, consecutive, thread number */
/* Pointer to the status kept in the Zephyr thread stack */
posix_thread_status_t *t_status;
};
static struct threads_table_el *threads_table;
static int thread_create_count; /* For debugging. Thread creation counter */
/*
* Conditional variable to block/awake all threads during swaps()
* (we only need 1 mutex and 1 cond variable for all threads)
*/
static pthread_cond_t cond_threads = PTHREAD_COND_INITIALIZER;
/* Mutex for the conditional variable posix_core_cond_threads */
static pthread_mutex_t mtx_threads = PTHREAD_MUTEX_INITIALIZER;
/* Token which tells which process is allowed to run now */
static int currently_allowed_thread;
static bool terminate; /* Are we terminating the program == cleaning up */
static void posix_wait_until_allowed(int this_th_nbr);
static void *posix_thread_starter(void *arg);
static void posix_preexit_cleanup(void);
extern void posix_arch_thread_entry(void *pa_thread_status);
/**
* Helper function, run by a thread is being aborted
*/
static void abort_tail(int this_th_nbr)
{
PC_DEBUG("Thread [%i] %i: %s: Aborting (exiting) (rel mut)\n",
threads_table[this_th_nbr].thead_cnt,
this_th_nbr,
__func__);
threads_table[this_th_nbr].running = false;
threads_table[this_th_nbr].state = ABORTED;
posix_preexit_cleanup();
pthread_exit(NULL);
}
/**
* Helper function to block this thread until it is allowed again
* (somebody calls posix_let_run() with this thread number
*
* Note that we go out of this function (the while loop below)
* with the mutex locked by this particular thread.
* In normal circumstances, the mutex is only unlocked internally in
* pthread_cond_wait() while waiting for cond_threads to be signaled
*/
static void posix_wait_until_allowed(int this_th_nbr)
{
threads_table[this_th_nbr].running = false;
PC_DEBUG("Thread [%i] %i: %s: Waiting to be allowed to run (rel mut)\n",
threads_table[this_th_nbr].thead_cnt,
this_th_nbr,
__func__);
while (this_th_nbr != currently_allowed_thread) {
pthread_cond_wait(&cond_threads, &mtx_threads);
if (threads_table &&
(threads_table[this_th_nbr].state == ABORTING)) {
abort_tail(this_th_nbr);
}
}
threads_table[this_th_nbr].running = true;
PC_DEBUG("Thread [%i] %i: %s(): I'm allowed to run! (hav mut)\n",
threads_table[this_th_nbr].thead_cnt,
this_th_nbr,
__func__);
}
/**
* Helper function to let the thread <next_allowed_th> run
* Note: posix_let_run() can only be called with the mutex locked
*/
static void posix_let_run(int next_allowed_th)
{
PC_DEBUG("%s: We let thread [%i] %i run\n",
__func__,
threads_table[next_allowed_th].thead_cnt,
next_allowed_th);
currently_allowed_thread = next_allowed_th;
/*
* We let all threads know one is able to run now (it may even be us
* again if fancied)
* Note that as we hold the mutex, they are going to be blocked until
* we reach our own posix_wait_until_allowed() while loop
*/
PC_SAFE_CALL(pthread_cond_broadcast(&cond_threads));
}
static void posix_preexit_cleanup(void)
{
/*
* Release the mutex so the next allowed thread can run
*/
PC_SAFE_CALL(pthread_mutex_unlock(&mtx_threads));
/* We detach ourselves so nobody needs to join to us */
pthread_detach(pthread_self());
}
/**
* Let the ready thread run and block this thread until it is allowed again
*
* called from arch_swap() which does the picking from the kernel structures
*/
void posix_swap(int next_allowed_thread_nbr, int this_th_nbr)
{
posix_let_run(next_allowed_thread_nbr);
if (threads_table[this_th_nbr].state == ABORTING) {
PC_DEBUG("Thread [%i] %i: %s: Aborting curr.\n",
threads_table[this_th_nbr].thead_cnt,
this_th_nbr,
__func__);
abort_tail(this_th_nbr);
} else {
posix_wait_until_allowed(this_th_nbr);
}
}
/**
* Let the ready thread (main) run, and exit this thread (init)
*
* Called from arch_switch_to_main_thread() which does the picking from the
* kernel structures
*
* Note that we could have just done a swap(), but that would have left the
* init thread lingering. Instead here we exit the init thread after enabling
* the new one
*/
void posix_main_thread_start(int next_allowed_thread_nbr)
{
posix_let_run(next_allowed_thread_nbr);
PC_DEBUG("%s: Init thread dying now (rel mut)\n",
__func__);
posix_preexit_cleanup();
pthread_exit(NULL);
}
/**
* Handler called when any thread is cancelled or exits
*/
static void posix_cleanup_handler(void *arg)
{
/*
* If we are not terminating, this is just an aborted thread,
* and the mutex was already released
* Otherwise, release the mutex so other threads which may be
* caught waiting for it could terminate
*/
if (!terminate) {
return;
}
#if POSIX_ARCH_DEBUG_PRINTS
posix_thread_status_t *ptr = (posix_thread_status_t *) arg;
PC_DEBUG("Thread %i: %s: Canceling (rel mut)\n",
ptr->thread_idx,
__func__);
#endif
PC_SAFE_CALL(pthread_mutex_unlock(&mtx_threads));
/* We detach ourselves so nobody needs to join to us */
pthread_detach(pthread_self());
}
/**
* Helper function to start a Zephyr thread as a POSIX thread:
* It will block the thread until a arch_swap() is called for it
*
* Spawned from posix_new_thread() below
*/
static void *posix_thread_starter(void *arg)
{
int thread_idx = (intptr_t)arg;
PC_DEBUG("Thread [%i] %i: %s: Starting\n",
threads_table[thread_idx].thead_cnt,
thread_idx,
__func__);
/*
* We block until all other running threads reach the while loop
* in posix_wait_until_allowed() and they release the mutex
*/
PC_SAFE_CALL(pthread_mutex_lock(&mtx_threads));
/*
* The program may have been finished before this thread ever got to run
*/
/* LCOV_EXCL_START */ /* See Note1 */
if (!threads_table) {
posix_cleanup_handler(arg);
pthread_exit(NULL);
}
/* LCOV_EXCL_STOP */
pthread_cleanup_push(posix_cleanup_handler, arg);
PC_DEBUG("Thread [%i] %i: %s: After start mutex (hav mut)\n",
threads_table[thread_idx].thead_cnt,
thread_idx,
__func__);
/*
* The thread would try to execute immediately, so we block it
* until allowed
*/
posix_wait_until_allowed(thread_idx);
posix_thread_status_t *ptr = threads_table[thread_idx].t_status;
posix_arch_thread_entry(ptr);
/*
* We only reach this point if the thread actually returns which should
* not happen. But we handle it gracefully just in case
*/
/* LCOV_EXCL_START */
posix_print_trace(PREFIX"Thread [%i] %i [%lu] ended!?!\n",
threads_table[thread_idx].thead_cnt,
thread_idx,
pthread_self());
threads_table[thread_idx].running = false;
threads_table[thread_idx].state = FAILED;
pthread_cleanup_pop(1);
return NULL;
/* LCOV_EXCL_STOP */
}
/**
* Return the first free entry index in the threads table
*/
static int ttable_get_empty_slot(void)
{
for (int i = 0; i < threads_table_size; i++) {
if ((threads_table[i].state == NOTUSED)
|| (PC_REUSE_ABORTED_ENTRIES
&& (threads_table[i].state == ABORTED))) {
return i;
}
}
/*
* else, we run out table without finding an index
* => we expand the table
*/
threads_table = realloc(threads_table,
(threads_table_size + PC_ALLOC_CHUNK_SIZE)
* sizeof(struct threads_table_el));
if (threads_table == NULL) { /* LCOV_EXCL_BR_LINE */
posix_print_error_and_exit(NO_MEM_ERR); /* LCOV_EXCL_LINE */
}
/* Clear new piece of table */
(void)memset(&threads_table[threads_table_size], 0,
PC_ALLOC_CHUNK_SIZE * sizeof(struct threads_table_el));
threads_table_size += PC_ALLOC_CHUNK_SIZE;
/* The first newly created entry is good: */
return threads_table_size - PC_ALLOC_CHUNK_SIZE;
}
/**
* Called from arch_new_thread(),
* Create a new POSIX thread for the new Zephyr thread.
* arch_new_thread() picks from the kernel structures what it is that we need
* to call with what parameters
*/
int posix_new_thread(void *ptr)
{
int t_slot;
t_slot = ttable_get_empty_slot();
threads_table[t_slot].state = USED;
threads_table[t_slot].running = false;
threads_table[t_slot].thead_cnt = thread_create_count++;
threads_table[t_slot].t_status = ptr;
/*
* Note: If you are here due to a valgrind reported memory leak in
* pthread_create() please use the provided valgrind.supp suppression file.
*/
PC_SAFE_CALL(pthread_create(&threads_table[t_slot].thread,
NULL,
posix_thread_starter,
(void *)(intptr_t)t_slot));
PC_DEBUG("%s created thread [%i] %i [%lu]\n",
__func__,
threads_table[t_slot].thead_cnt,
t_slot,
threads_table[t_slot].thread);
return t_slot;
}
/*
* Initialize the posix architecture
*
* Prepare whatever needs to be prepared to be able to start threads
*/
void posix_arch_init(void)
{
thread_create_count = 0;
currently_allowed_thread = -1;
threads_table = calloc(PC_ALLOC_CHUNK_SIZE,
sizeof(struct threads_table_el));
if (threads_table == NULL) { /* LCOV_EXCL_BR_LINE */
posix_print_error_and_exit(NO_MEM_ERR); /* LCOV_EXCL_LINE */
}
threads_table_size = PC_ALLOC_CHUNK_SIZE;
PC_SAFE_CALL(pthread_mutex_lock(&mtx_threads));
}
/*
* Free any allocated memory by the posix core and clean up.
* Note that this function cannot be called from a SW thread
* (the CPU is assumed halted. Otherwise we will cancel ourselves)
*
* This function cannot guarantee the threads will be cancelled before the HW
* thread exists. The only way to do that, would be to wait for each of them in
* a join (without detaching them, but that could lead to locks in some
* convoluted cases. As a call to this function can come from an ASSERT or other
* error termination, we better do not assume things are working fine.
* => we prefer the supposed memory leak report from valgrind, and ensure we
* will not hang
*/
void posix_arch_clean_up(void)
{
if (!threads_table) { /* LCOV_EXCL_BR_LINE */
return; /* LCOV_EXCL_LINE */
}
terminate = true;
#if (PC_ENABLE_CANCEL)
for (int i = 0; i < threads_table_size; i++) {
if (threads_table[i].state != USED) {
continue;
}
/* LCOV_EXCL_START */
if (pthread_cancel(threads_table[i].thread)) {
posix_print_warning(
PREFIX"cleanup: could not stop thread %i\n",
i);
}
/* LCOV_EXCL_STOP */
}
#endif
free(threads_table);
threads_table = NULL;
}
void posix_abort_thread(int thread_idx)
{
if (thread_idx == currently_allowed_thread) {
PC_DEBUG("Thread [%i] %i: %s Marked myself "
"as aborting\n",
threads_table[thread_idx].thead_cnt,
thread_idx,
__func__);
} else {
if (threads_table[thread_idx].state != USED) { /* LCOV_EXCL_BR_LINE */
/* The thread may have been already aborted before */
return; /* LCOV_EXCL_LINE */
}
PC_DEBUG("Aborting not scheduled thread [%i] %i\n",
threads_table[thread_idx].thead_cnt,
thread_idx);
}
threads_table[thread_idx].state = ABORTING;
/*
* Note: the native thread will linger in RAM until it catches the
* mutex or awakes on the condition.
* Note that even if we would pthread_cancel() the thread here, that
* would be the case, but with a pthread_cancel() the mutex state would
* be uncontrolled
*/
}
int posix_arch_get_unique_thread_id(int thread_idx)
{
return threads_table[thread_idx].thead_cnt;
}
/*
* Notes about coverage:
*
* Note1:
*
* This condition will only be triggered in very unlikely cases
* (once every few full regression runs).
* It is therefore excluded from the coverage report to avoid confusing
* developers.
*
* Background: This arch creates a pthread as soon as the Zephyr kernel creates
* a Zephyr thread. A pthread creation is an asynchronous process handled by the
* host kernel.
*
* This architecture normally keeps only 1 thread executing at a time.
* But part of the pre-initialization during creation of a new thread
* and some cleanup at the tail of the thread termination are executed
* in parallel to other threads.
* That is, the execution of those code paths is a bit indeterministic.
*
* Only when the Zephyr kernel attempts to swap to a new thread does this
* architecture need to wait until its pthread is ready and initialized
* (has reached posix_wait_until_allowed())
*
* In some cases (tests) threads are created which are never actually needed
* (typically the idle thread). That means the test may finish before this
* thread's underlying pthread has reached posix_wait_until_allowed().
*
* In this unlikely cases the initialization or cleanup of the thread follows
* non-typical code paths.
* This code paths are there to ensure things work always, no matter
* the load of the host. Without them, very rare & mysterious segfault crashes
* would occur.
* But as they are very atypical and only triggered with some host loads,
* they will be covered in the coverage reports only rarely.
*
* Note2:
*
* Some other code will never or only very rarely trigger and is therefore
* excluded with LCOV_EXCL_LINE
*
*
* Notes about (memory) cleanup:
*
* Note.c1:
*
* In some very rare cases in very loaded machines, a race in the glibc pthread_cancel()
* seems to be triggered.
* In this, the cancelled thread cleanup overtakes the pthread_cancel() code, and frees the
* pthread structure before pthread_cancel() has finished, resulting in a dereference into already
* free'd memory, and therefore a segfault.
* Calling pthread_cancel() during cleanup is not required beyond preventing a valgrind
* memory leak report (all threads will be canceled immediately on exit).
* Therefore we do not do this, to avoid this very rare crashes.
*/