zephyr/arch/posix/core/posix_core.c

/*
 * Copyright (c) 2017 Oticon A/S
 * Copyright (c) 2023 Nordic Semiconductor ASA
 *
 * SPDX-License-Identifier: Apache-2.0
 */

/**
 * Here is where things actually happen for the POSIX arch
 *
 * We isolate all functions here, to ensure they can be compiled as
 * independently as possible to the remainder of Zephyr to avoid name clashes
 * as Zephyr does provide functions with the same names as the POSIX threads
 * functions
 */
/**
 * Principle of operation:
 *
 * The Zephyr OS and its app run as a set of native pthreads.
 * The Zephyr OS only sees one of this thread executing at a time.
 * Which is running is controlled using {cond|mtx}_threads and
 * currently_allowed_thread.
 *
 * The main part of the execution of each thread will occur in a fully
 * synchronous and deterministic manner, and only when commanded by the Zephyr
 * kernel.
 * But the creation of a thread will spawn a new pthread whose start
 * is asynchronous to the rest, until synchronized in posix_wait_until_allowed()
 * below.
 * Similarly aborting and canceling threads execute a tail in a quite
 * asynchronous manner.
 *
 * This implementation is meant to be portable in between POSIX systems.
 * A table (threads_table) is used to abstract the native pthreads.
 * And index in this table is used to identify threads in the IF to the kernel.
 *
 */

#include <pthread.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>

#include "posix_core.h"
#include "posix_arch_internal.h"

#define PREFIX     "POSIX arch core: "
#define ERPREFIX   PREFIX"error on "
#define NO_MEM_ERR PREFIX"Can't allocate memory\n"

#define PC_ENABLE_CANCEL 0 /* See Note.c1 */
#define PC_ALLOC_CHUNK_SIZE 64
#define PC_REUSE_ABORTED_ENTRIES 0
/* tests/kernel/threads/scheduling/schedule_api fails when setting
 * PC_REUSE_ABORTED_ENTRIES => don't set it by now
 */

static int threads_table_size;
struct threads_table_el {
	enum {NOTUSED = 0, USED, ABORTING, ABORTED, FAILED} state;
	bool running;     /* Is this the currently running thread */
	pthread_t thread; /* Actual pthread_t as returned by native kernel */
	int thead_cnt; /* For debugging: Unique, consecutive, thread number */
	/* Pointer to the status kept in the Zephyr thread stack */
	posix_thread_status_t *t_status;
};

static struct threads_table_el *threads_table;

static int thread_create_count; /* For debugging. Thread creation counter */

/*
 * Conditional variable to block/awake all threads during swaps()
 * (we only need 1 mutex and 1 cond variable for all threads)
 */
static pthread_cond_t cond_threads = PTHREAD_COND_INITIALIZER;
/* Mutex for the conditional variable posix_core_cond_threads */
static pthread_mutex_t mtx_threads = PTHREAD_MUTEX_INITIALIZER;
/* Token which tells which process is allowed to run now */
static int currently_allowed_thread;

static bool terminate; /* Are we terminating the program == cleaning up */

static void posix_wait_until_allowed(int this_th_nbr);
static void *posix_thread_starter(void *arg);
static void posix_preexit_cleanup(void);
extern void posix_arch_thread_entry(void *pa_thread_status);

/**
 * Helper function, run by a thread is being aborted
 */
static void abort_tail(int this_th_nbr)
{
	PC_DEBUG("Thread [%i] %i: %s: Aborting (exiting) (rel mut)\n",
		threads_table[this_th_nbr].thead_cnt,
		this_th_nbr,
		__func__);

	threads_table[this_th_nbr].running = false;
	threads_table[this_th_nbr].state = ABORTED;
	posix_preexit_cleanup();
	pthread_exit(NULL);
}

/**
 *  Helper function to block this thread until it is allowed again
 *  (somebody calls posix_let_run() with this thread number
 *
 * Note that we go out of this function (the while loop below)
 * with the mutex locked by this particular thread.
 * In normal circumstances, the mutex is only unlocked internally in
 * pthread_cond_wait() while waiting for cond_threads to be signaled
 */
static void posix_wait_until_allowed(int this_th_nbr)
{
	threads_table[this_th_nbr].running = false;

	PC_DEBUG("Thread [%i] %i: %s: Waiting to be allowed to run (rel mut)\n",
		threads_table[this_th_nbr].thead_cnt,
		this_th_nbr,
		__func__);

	while (this_th_nbr != currently_allowed_thread) {
		pthread_cond_wait(&cond_threads, &mtx_threads);

		if (threads_table &&
		    (threads_table[this_th_nbr].state == ABORTING)) {
			abort_tail(this_th_nbr);
		}
	}

	threads_table[this_th_nbr].running = true;

	PC_DEBUG("Thread [%i] %i: %s(): I'm allowed to run! (hav mut)\n",
		threads_table[this_th_nbr].thead_cnt,
		this_th_nbr,
		__func__);
}


/**
 * Helper function to let the thread <next_allowed_th> run
 * Note: posix_let_run() can only be called with the mutex locked
 */
static void posix_let_run(int next_allowed_th)
{
	PC_DEBUG("%s: We let thread [%i] %i run\n",
		__func__,
		threads_table[next_allowed_th].thead_cnt,
		next_allowed_th);


	currently_allowed_thread = next_allowed_th;

	/*
	 * We let all threads know one is able to run now (it may even be us
	 * again if fancied)
	 * Note that as we hold the mutex, they are going to be blocked until
	 * we reach our own posix_wait_until_allowed() while loop
	 */
	PC_SAFE_CALL(pthread_cond_broadcast(&cond_threads));
}


static void posix_preexit_cleanup(void)
{
	/*
	 * Release the mutex so the next allowed thread can run
	 */
	PC_SAFE_CALL(pthread_mutex_unlock(&mtx_threads));

	/* We detach ourselves so nobody needs to join to us */
	pthread_detach(pthread_self());
}


/**
 * Let the ready thread run and block this thread until it is allowed again
 *
 * called from arch_swap() which does the picking from the kernel structures
 */
void posix_swap(int next_allowed_thread_nbr, int this_th_nbr)
{
	posix_let_run(next_allowed_thread_nbr);

	if (threads_table[this_th_nbr].state == ABORTING) {
		PC_DEBUG("Thread [%i] %i: %s: Aborting curr.\n",
			threads_table[this_th_nbr].thead_cnt,
			this_th_nbr,
			__func__);
		abort_tail(this_th_nbr);
	} else {
		posix_wait_until_allowed(this_th_nbr);
	}
}

/**
 * Let the ready thread (main) run, and exit this thread (init)
 *
 * Called from arch_switch_to_main_thread() which does the picking from the
 * kernel structures
 *
 * Note that we could have just done a swap(), but that would have left the
 * init thread lingering. Instead here we exit the init thread after enabling
 * the new one
 */
void posix_main_thread_start(int next_allowed_thread_nbr)
{
	posix_let_run(next_allowed_thread_nbr);
	PC_DEBUG("%s: Init thread dying now (rel mut)\n",
		__func__);
	posix_preexit_cleanup();
	pthread_exit(NULL);
}

/**
 * Handler called when any thread is cancelled or exits
 */
static void posix_cleanup_handler(void *arg)
{
	/*
	 * If we are not terminating, this is just an aborted thread,
	 * and the mutex was already released
	 * Otherwise, release the mutex so other threads which may be
	 * caught waiting for it could terminate
	 */

	if (!terminate) {
		return;
	}

#if POSIX_ARCH_DEBUG_PRINTS
	posix_thread_status_t *ptr = (posix_thread_status_t *) arg;

	PC_DEBUG("Thread %i: %s: Canceling (rel mut)\n",
		ptr->thread_idx,
		__func__);
#endif


	PC_SAFE_CALL(pthread_mutex_unlock(&mtx_threads));

	/* We detach ourselves so nobody needs to join to us */
	pthread_detach(pthread_self());
}

/**
 * Helper function to start a Zephyr thread as a POSIX thread:
 *  It will block the thread until a arch_swap() is called for it
 *
 * Spawned from posix_new_thread() below
 */
static void *posix_thread_starter(void *arg)
{
	int thread_idx = (intptr_t)arg;

	PC_DEBUG("Thread [%i] %i: %s: Starting\n",
		threads_table[thread_idx].thead_cnt,
		thread_idx,
		__func__);

	/*
	 * We block until all other running threads reach the while loop
	 * in posix_wait_until_allowed() and they release the mutex
	 */
	PC_SAFE_CALL(pthread_mutex_lock(&mtx_threads));

	/*
	 * The program may have been finished before this thread ever got to run
	 */
	/* LCOV_EXCL_START */ /* See Note1 */
	if (!threads_table) {
		posix_cleanup_handler(arg);
		pthread_exit(NULL);
	}
	/* LCOV_EXCL_STOP */

	pthread_cleanup_push(posix_cleanup_handler, arg);

	PC_DEBUG("Thread [%i] %i: %s: After start mutex (hav mut)\n",
		threads_table[thread_idx].thead_cnt,
		thread_idx,
		__func__);

	/*
	 * The thread would try to execute immediately, so we block it
	 * until allowed
	 */
	posix_wait_until_allowed(thread_idx);

	posix_thread_status_t *ptr = threads_table[thread_idx].t_status;

	posix_arch_thread_entry(ptr);

	/*
	 * We only reach this point if the thread actually returns which should
	 * not happen. But we handle it gracefully just in case
	 */
	/* LCOV_EXCL_START */
	posix_print_trace(PREFIX"Thread [%i] %i [%lu] ended!?!\n",
			threads_table[thread_idx].thead_cnt,
			thread_idx,
			pthread_self());


	threads_table[thread_idx].running = false;
	threads_table[thread_idx].state = FAILED;

	pthread_cleanup_pop(1);

	return NULL;
	/* LCOV_EXCL_STOP */
}

/**
 * Return the first free entry index in the threads table
 */
static int ttable_get_empty_slot(void)
{

	for (int i = 0; i < threads_table_size; i++) {
		if ((threads_table[i].state == NOTUSED)
			|| (PC_REUSE_ABORTED_ENTRIES
			&& (threads_table[i].state == ABORTED))) {
			return i;
		}
	}

	/*
	 * else, we run out table without finding an index
	 * => we expand the table
	 */

	threads_table = realloc(threads_table,
				(threads_table_size + PC_ALLOC_CHUNK_SIZE)
				* sizeof(struct threads_table_el));
	if (threads_table == NULL) { /* LCOV_EXCL_BR_LINE */
		posix_print_error_and_exit(NO_MEM_ERR); /* LCOV_EXCL_LINE */
	}

	/* Clear new piece of table */
	(void)memset(&threads_table[threads_table_size], 0,
		     PC_ALLOC_CHUNK_SIZE * sizeof(struct threads_table_el));

	threads_table_size += PC_ALLOC_CHUNK_SIZE;

	/* The first newly created entry is good: */
	return threads_table_size - PC_ALLOC_CHUNK_SIZE;
}

/**
 * Called from arch_new_thread(),
 * Create a new POSIX thread for the new Zephyr thread.
 * arch_new_thread() picks from the kernel structures what it is that we need
 * to call with what parameters
 */
int posix_new_thread(void *ptr)
{
	int t_slot;

	t_slot = ttable_get_empty_slot();
	threads_table[t_slot].state = USED;
	threads_table[t_slot].running = false;
	threads_table[t_slot].thead_cnt = thread_create_count++;
	threads_table[t_slot].t_status = ptr;

	/*
	 * Note: If you are here due to a valgrind reported memory leak in
	 * pthread_create() please use the provided valgrind.supp suppression file.
	 */
	PC_SAFE_CALL(pthread_create(&threads_table[t_slot].thread,
				  NULL,
				  posix_thread_starter,
				  (void *)(intptr_t)t_slot));

	PC_DEBUG("%s created thread [%i] %i [%lu]\n",
		__func__,
		threads_table[t_slot].thead_cnt,
		t_slot,
		threads_table[t_slot].thread);

	return t_slot;
}

/*
 * Initialize the posix architecture
 *
 * Prepare whatever needs to be prepared to be able to start threads
 */
void posix_arch_init(void)
{
	thread_create_count = 0;

	currently_allowed_thread = -1;

	threads_table = calloc(PC_ALLOC_CHUNK_SIZE,
				sizeof(struct threads_table_el));
	if (threads_table == NULL) { /* LCOV_EXCL_BR_LINE */
		posix_print_error_and_exit(NO_MEM_ERR); /* LCOV_EXCL_LINE */
	}

	threads_table_size = PC_ALLOC_CHUNK_SIZE;


	PC_SAFE_CALL(pthread_mutex_lock(&mtx_threads));
}

/*
 * Free any allocated memory by the posix core and clean up.
 * Note that this function cannot be called from a SW thread
 * (the CPU is assumed halted. Otherwise we will cancel ourselves)
 *
 * This function cannot guarantee the threads will be cancelled before the HW
 * thread exists. The only way to do that, would be  to wait for each of them in
 * a join (without detaching them, but that could lead to locks in some
 * convoluted cases. As a call to this function can come from an ASSERT or other
 * error termination, we better do not assume things are working fine.
 * => we prefer the supposed memory leak report from valgrind, and ensure we
 * will not hang
 */
void posix_arch_clean_up(void)
{

	if (!threads_table) { /* LCOV_EXCL_BR_LINE */
		return; /* LCOV_EXCL_LINE */
	}

	terminate = true;

#if (PC_ENABLE_CANCEL)
	for (int i = 0; i < threads_table_size; i++) {
		if (threads_table[i].state != USED) {
			continue;
		}

		/* LCOV_EXCL_START */
		if (pthread_cancel(threads_table[i].thread)) {
			posix_print_warning(
				PREFIX"cleanup: could not stop thread %i\n",
				i);
		}
		/* LCOV_EXCL_STOP */
	}
#endif

	free(threads_table);
	threads_table = NULL;
}

void posix_abort_thread(int thread_idx)
{
	if (thread_idx == currently_allowed_thread) {
		PC_DEBUG("Thread [%i] %i: %s Marked myself "
			"as aborting\n",
			threads_table[thread_idx].thead_cnt,
			thread_idx,
			__func__);
	} else {
		if (threads_table[thread_idx].state != USED) { /* LCOV_EXCL_BR_LINE */
			/* The thread may have been already aborted before */
			return; /* LCOV_EXCL_LINE */
		}

		PC_DEBUG("Aborting not scheduled thread [%i] %i\n",
			threads_table[thread_idx].thead_cnt,
			thread_idx);
	}

	threads_table[thread_idx].state = ABORTING;
	/*
	 * Note: the native thread will linger in RAM until it catches the
	 * mutex or awakes on the condition.
	 * Note that even if we would pthread_cancel() the thread here, that
	 * would be the case, but with a pthread_cancel() the mutex state would
	 * be uncontrolled
	 */

}

int posix_arch_get_unique_thread_id(int thread_idx)
{
	return threads_table[thread_idx].thead_cnt;
}

/*
 * Notes about coverage:
 *
 * Note1:
 *
 * This condition will only be triggered in very unlikely cases
 * (once every few full regression runs).
 * It is therefore excluded from the coverage report to avoid confusing
 * developers.
 *
 * Background: This arch creates a pthread as soon as the Zephyr kernel creates
 * a Zephyr thread. A pthread creation is an asynchronous process handled by the
 * host kernel.
 *
 * This architecture normally keeps only 1 thread executing at a time.
 * But part of the pre-initialization during creation of a new thread
 * and some cleanup at the tail of the thread termination are executed
 * in parallel to other threads.
 * That is, the execution of those code paths is a bit indeterministic.
 *
 * Only when the Zephyr kernel attempts to swap to a new thread does this
 * architecture need to wait until its pthread is ready and initialized
 * (has reached posix_wait_until_allowed())
 *
 * In some cases (tests) threads are created which are never actually needed
 * (typically the idle thread). That means the test may finish before this
 * thread's underlying pthread has reached posix_wait_until_allowed().
 *
 * In this unlikely cases the initialization or cleanup of the thread follows
 * non-typical code paths.
 * This code paths are there to ensure things work always, no matter
 * the load of the host. Without them, very rare & mysterious segfault crashes
 * would occur.
 * But as they are very atypical and only triggered with some host loads,
 * they will be covered in the coverage reports only rarely.
 *
 * Note2:
 *
 * Some other code will never or only very rarely trigger and is therefore
 * excluded with LCOV_EXCL_LINE
 *
 *
 * Notes about (memory) cleanup:
 *
 * Note.c1:
 *
 * In some very rare cases in very loaded machines, a race in the glibc pthread_cancel()
 * seems to be triggered.
 * In this, the cancelled thread cleanup overtakes the pthread_cancel() code, and frees the
 * pthread structure before pthread_cancel() has finished, resulting in a dereference into already
 * free'd memory, and therefore a segfault.
 * Calling pthread_cancel() during cleanup is not required beyond preventing a valgrind
 * memory leak report (all threads will be canceled immediately on exit).
 * Therefore we do not do this, to avoid this very rare crashes.
 */