From 1acd8c2996c3a1ae0691247deff8c32519307f17 Mon Sep 17 00:00:00 2001 From: Andy Ross Date: Thu, 3 May 2018 14:51:49 -0700 Subject: [PATCH] kernel: Scheduler rewrite This replaces the existing scheduler (but not priority handling) implementation with a somewhat simpler one. Behavior as to thread selection does not change. New features: + Unifies SMP and uniprocessing selection code (with the sole exception of the "cache" trick not being possible in SMP). + The old static multi-queue implementation is gone and has been replaced with a build-time choice of either a "dumb" list implementation (faster and significantly smaller for apps with only a few threads) or a balanced tree queue which scales well to arbitrary numbers of threads and priority levels. This is controlled via the CONFIG_SCHED_DUMB kconfig variable. + The balanced tree implementation is usable symmetrically for the wait_q abstraction, fixing a scalability glitch Zephyr had when many threads were waiting on a single object. This can be selected via CONFIG_WAITQ_FAST. Signed-off-by: Andy Ross --- include/kernel.h | 33 +- include/sched_priq.h | 47 ++ kernel/Kconfig | 28 + kernel/include/kernel_structs.h | 20 +- kernel/include/ksched.h | 21 + kernel/include/kswap.h | 4 +- kernel/include/wait_q.h | 28 +- kernel/init.c | 11 +- kernel/pipes.c | 6 +- kernel/sched.c | 886 +++++++++++++++----------------- 10 files changed, 600 insertions(+), 484 deletions(-) create mode 100644 include/sched_priq.h diff --git a/include/kernel.h b/include/kernel.h index 1419a1f2f4c..419a1b84106 100644 --- a/include/kernel.h +++ b/include/kernel.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -91,12 +93,26 @@ extern "C" { #define K_HIGHEST_APPLICATION_THREAD_PRIO (K_HIGHEST_THREAD_PRIO) #define K_LOWEST_APPLICATION_THREAD_PRIO (K_LOWEST_THREAD_PRIO - 1) +#ifdef CONFIG_WAITQ_FAST + +typedef struct { + struct _priq_rb waitq; +} _wait_q_t; + +extern int _priq_rb_lessthan(struct rbnode *a, struct rbnode *b); + +#define _WAIT_Q_INIT(wait_q) { { { .lessthan_fn = _priq_rb_lessthan } } } + +#else + typedef struct { sys_dlist_t waitq; } _wait_q_t; #define _WAIT_Q_INIT(wait_q) { SYS_DLIST_STATIC_INIT(&(wait_q)->waitq) } +#endif + #ifdef CONFIG_OBJECT_TRACING #define _OBJECT_TRACING_NEXT_PTR(type) struct type *__next #define _OBJECT_TRACING_INIT .__next = NULL, @@ -405,7 +421,17 @@ struct __thread_entry { struct _thread_base { /* this thread's entry in a ready/wait queue */ - sys_dnode_t k_q_node; + union { + sys_dlist_t qnode_dlist; + struct rbnode qnode_rb; + }; + +#ifdef CONFIG_WAITQ_FAST + /* wait queue on which the thread is pended (needed only for + * trees, not dumb lists) + */ + _wait_q_t *pended_on; +#endif /* user facing 'thread options'; values defined in include/kernel.h */ u8_t user_options; @@ -440,13 +466,12 @@ struct _thread_base { u16_t preempt; }; + u32_t order_key; + #ifdef CONFIG_SMP /* True for the per-CPU idle threads */ u8_t is_idle; - /* Non-zero when actively running on a CPU */ - u8_t active; - /* CPU index on which thread was last run */ u8_t cpu; diff --git a/include/sched_priq.h b/include/sched_priq.h new file mode 100644 index 00000000000..e83b5f685ad --- /dev/null +++ b/include/sched_priq.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _sched_priq__h_ +#define _sched_priq__h_ + +#include +#include +#include + +/* Two abstractions are defined here for "thread priority queues". + * + * One is a "dumb" list implementation appropriate for systems with + * small numbers of threads and sensitive to code size. It is stored + * in sorted order, taking an O(N) cost every time a thread is added + * to the list. This corresponds to the way the original _wait_q_t + * abstraction worked and is very fast as long as the number of + * threads is small. + * + * The other is a balanced tree "fast" implementation with rather + * larger code size (due to the data structure itself, the code here + * is just stubs) and higher constant-factor performance overhead, but + * much better O(logN) scaling in the presence of large number of + * threads. + * + * Each can be used for either the wait_q or system ready queue, + * configurable at build time. + */ + +struct k_thread; + +struct k_thread *_priq_dumb_best(sys_dlist_t *pq); +void _priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread); +void _priq_dumb_add(sys_dlist_t *pq, struct k_thread *thread); + +struct _priq_rb { + struct rbtree tree; + int next_order_key; +}; + +void _priq_rb_add(struct _priq_rb *pq, struct k_thread *thread); +void _priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread); +struct k_thread *_priq_rb_best(struct _priq_rb *pq); + +#endif /* _sched_priq__h_ */ diff --git a/kernel/Kconfig b/kernel/Kconfig index 62e9652bf3d..90ce807aca3 100644 --- a/kernel/Kconfig +++ b/kernel/Kconfig @@ -169,6 +169,34 @@ config APPLICATION_MEMORY will have the project-level application objects and any libraries including the C library in it. +config WAITQ_FAST + bool + prompt "Use scalable wait_q implementation" + default n + help + When selected, the wait_q abstraction used in IPC primitives + to pend threads for wakeup later will be implemented with a + balanced tree instead of a linear list. Choose this if you + expect to have many threads waiting on individual + primitives, or if you have already included the red/black + tree code in the build for some other purpose (in which case + this results in less code size increase than the default + implementation). + +config SCHED_DUMB + bool + prompt "Use a simple linked list scheduler" + default y + help + When selected, the scheduler ready queue will be implemented + as a simple unordered list, with very fast constant time + performance for single threads and very low code size. + Choose this on systems with constrained code size that will + never see more than a small number (3, maybe) of runnable + threads in the queue at any given time. On most platforms + (that are not otherwise using the red/black tree) this + results in a savings of ~2k of code size. + menu "Kernel Debugging and Metrics" config INIT_STACKS diff --git a/kernel/include/kernel_structs.h b/kernel/include/kernel_structs.h index 083615e1075..5c3cca1e825 100644 --- a/kernel/include/kernel_structs.h +++ b/kernel/include/kernel_structs.h @@ -12,6 +12,7 @@ #if !defined(_ASMLANGUAGE) #include #include +#include #include #endif @@ -27,7 +28,6 @@ * defined. */ - /* states: common uses low bits, arch-specific use high bits */ /* Not a real thread */ @@ -48,6 +48,9 @@ /* Thread is actively looking at events to see if they are ready */ #define _THREAD_POLLING (1 << 5) +/* Thread is present in the ready queue */ +#define _THREAD_QUEUED (1 << 6) + /* end - states */ #ifdef CONFIG_STACK_SENTINEL @@ -69,13 +72,13 @@ struct _ready_q { #ifndef CONFIG_SMP /* always contains next thread to run: cannot be NULL */ struct k_thread *cache; - - /* bitmap of priorities that contain at least one ready thread */ - u32_t prio_bmap[K_NUM_PRIO_BITMAPS]; #endif - /* ready queues, one per priority */ - sys_dlist_t q[K_NUM_PRIORITIES]; +#ifdef CONFIG_SCHED_DUMB + sys_dlist_t runq; +#else + struct _priq_rb runq; +#endif }; typedef struct _ready_q _ready_q_t; @@ -90,6 +93,9 @@ struct _cpu { /* currently scheduled thread */ struct k_thread *current; + /* one assigned idle thread per CPU */ + struct k_thread *idle_thread; + int id; }; @@ -158,8 +164,10 @@ typedef struct _kernel _kernel_t; extern struct _kernel _kernel; #ifdef CONFIG_SMP +#define _current_cpu (_arch_curr_cpu()) #define _current (_arch_curr_cpu()->current) #else +#define _current_cpu (&_kernel.cpus[0]) #define _current _kernel.current #endif diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h index 5d296649746..65d280450f2 100644 --- a/kernel/include/ksched.h +++ b/kernel/include/ksched.h @@ -33,6 +33,7 @@ #define _ASSERT_VALID_PRIO(prio, entry_point) __ASSERT((prio) == -1, "") #endif +void _sched_init(void); void _add_thread_to_ready_q(struct k_thread *thread); void _move_thread_to_end_of_prio_q(struct k_thread *thread); void _remove_thread_from_ready_q(struct k_thread *thread); @@ -111,6 +112,11 @@ static inline int _is_thread_polling(struct k_thread *thread) return _is_thread_state_set(thread, _THREAD_POLLING); } +static inline int _is_thread_queued(struct k_thread *thread) +{ + return _is_thread_state_set(thread, _THREAD_QUEUED); +} + static inline void _mark_thread_as_suspended(struct k_thread *thread) { thread->base.thread_state |= _THREAD_SUSPENDED; @@ -126,6 +132,11 @@ static inline void _mark_thread_as_started(struct k_thread *thread) thread->base.thread_state &= ~_THREAD_PRESTART; } +static inline void _mark_thread_as_pending(struct k_thread *thread) +{ + thread->base.thread_state |= _THREAD_PENDING; +} + static inline void _mark_thread_as_not_pending(struct k_thread *thread) { thread->base.thread_state &= ~_THREAD_PENDING; @@ -152,6 +163,16 @@ static inline void _mark_thread_as_not_polling(struct k_thread *thread) _reset_thread_states(thread, _THREAD_POLLING); } +static inline void _mark_thread_as_queued(struct k_thread *thread) +{ + _set_thread_states(thread, _THREAD_QUEUED); +} + +static inline void _mark_thread_as_not_queued(struct k_thread *thread) +{ + _reset_thread_states(thread, _THREAD_QUEUED); +} + static inline int _is_under_prio_ceiling(int prio) { return prio >= CONFIG_PRIORITY_CEILING; diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h index 67bf94b4b2a..7a0f7d4c991 100644 --- a/kernel/include/kswap.h +++ b/kernel/include/kswap.h @@ -58,12 +58,10 @@ static inline unsigned int _Swap(unsigned int key) new_thread = _get_next_ready_thread(); if (new_thread != old_thread) { + old_thread->swap_retval = -EAGAIN; #ifdef CONFIG_SMP - old_thread->base.active = 0; - new_thread->base.active = 1; - new_thread->base.cpu = _arch_curr_cpu()->id; _smp_release_global_lock(new_thread); diff --git a/kernel/include/wait_q.h b/kernel/include/wait_q.h index e59bc9f26d2..e8c53850756 100644 --- a/kernel/include/wait_q.h +++ b/kernel/include/wait_q.h @@ -11,7 +11,9 @@ #include #include +#include #include +#include #ifdef __cplusplus extern "C" { @@ -42,8 +44,30 @@ static ALWAYS_INLINE int _abort_thread_timeout(struct k_thread *thread) #define _get_next_timeout_expiry() (K_FOREVER) #endif +#ifdef CONFIG_WAITQ_FAST + #define _WAIT_Q_FOR_EACH(wq, thread_ptr) \ - SYS_DLIST_FOR_EACH_CONTAINER(&((wq)->waitq), thread_ptr, base.k_q_node) + RB_FOR_EACH_CONTAINER(&(wq)->waitq.tree, thread_ptr, base.qnode_rb) + +static inline void _waitq_init(_wait_q_t *w) +{ + w->waitq = (struct _priq_rb) { + .tree = { + .lessthan_fn = _priq_rb_lessthan + } + }; +} + +static inline struct k_thread *_waitq_head(_wait_q_t *w) +{ + return (void *)rb_get_min(&w->waitq.tree); +} + +#else /* !CONFIG_WAITQ_FAST: */ + +#define _WAIT_Q_FOR_EACH(wq, thread_ptr) \ + SYS_DLIST_FOR_EACH_CONTAINER(&((wq)->waitq), thread_ptr, \ + base.qnode_dlist) static inline void _waitq_init(_wait_q_t *w) { @@ -55,6 +79,8 @@ static inline struct k_thread *_waitq_head(_wait_q_t *w) return (void *)sys_dlist_peek_head(&w->waitq); } +#endif /* !CONFIG_WAIT_Q_FAST */ + #ifdef __cplusplus } #endif diff --git a/kernel/init.c b/kernel/init.c index 5f2bdb53df2..954fc10b63b 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -273,7 +273,6 @@ static void init_idle_thread(struct k_thread *thr, k_thread_stack_t *stack) IDLE_STACK_SIZE, idle, NULL, NULL, NULL, K_LOWEST_THREAD_PRIO, K_ESSENTIAL); _mark_thread_as_started(thr); - _ready_thread(thr); } #endif @@ -327,11 +326,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread) _IntLibInit(); - /* ready the init/main and idle threads */ - - for (int ii = 0; ii < K_NUM_PRIORITIES; ii++) { - sys_dlist_init(&_ready_q.q[ii]); - } + _sched_init(); #ifndef CONFIG_SMP /* @@ -355,10 +350,12 @@ static void prepare_multithreading(struct k_thread *dummy_thread) #ifdef CONFIG_MULTITHREADING init_idle_thread(_idle_thread, _idle_stack); + _kernel.cpus[0].idle_thread = _idle_thread; #endif #if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 1 init_idle_thread(_idle_thread1, _idle_stack1); + _kernel.cpus[1].idle_thread = _idle_thread1; _kernel.cpus[1].id = 1; _kernel.cpus[1].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack1) + CONFIG_ISR_STACK_SIZE; @@ -366,6 +363,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread) #if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 2 init_idle_thread(_idle_thread2, _idle_stack2); + _kernel.cpus[2].idle_thread = _idle_thread2; _kernel.cpus[2].id = 2; _kernel.cpus[2].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack2) + CONFIG_ISR_STACK_SIZE; @@ -373,6 +371,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread) #if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 3 init_idle_thread(_idle_thread3, _idle_stack3); + _kernel.cpus[3].idle_thread = _idle_thread3; _kernel.cpus[3].id = 3; _kernel.cpus[3].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack3) + CONFIG_ISR_STACK_SIZE; diff --git a/kernel/pipes.c b/kernel/pipes.c index 9b3c7d433b1..61622cbad9b 100644 --- a/kernel/pipes.c +++ b/kernel/pipes.c @@ -316,14 +316,12 @@ static bool pipe_xfer_prepare(sys_dlist_t *xfer_list, size_t min_xfer, s32_t timeout) { - sys_dnode_t *node; struct k_thread *thread; struct k_pipe_desc *desc; size_t num_bytes = 0; if (timeout == K_NO_WAIT) { - SYS_DLIST_FOR_EACH_NODE(&wait_q->waitq, node) { - thread = (struct k_thread *)node; + _WAIT_Q_FOR_EACH(wait_q, thread) { desc = (struct k_pipe_desc *)thread->base.swap_data; num_bytes += desc->bytes_to_xfer; @@ -367,7 +365,7 @@ static bool pipe_xfer_prepare(sys_dlist_t *xfer_list, * Add it to the transfer list. */ _unpend_thread(thread); - sys_dlist_append(xfer_list, &thread->base.k_q_node); + sys_dlist_append(xfer_list, &thread->base.qnode_dlist); } *waiter = (num_bytes > bytes_to_xfer) ? thread : NULL; diff --git a/kernel/sched.c b/kernel/sched.c index 9bb2a8a0486..7003ea07ac5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1,29 +1,46 @@ /* - * Copyright (c) 2016-2017 Wind River Systems, Inc. + * Copyright (c) 2018 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ - #include -#include -#include #include +#include +#include #include -#include -#include #include +#include +#include -/* the only struct _kernel instance */ -struct _kernel _kernel = {0}; - -#ifndef CONFIG_SMP -extern k_tid_t const _idle_thread; +#ifdef CONFIG_SCHED_DUMB +#define _priq_run_add _priq_dumb_add +#define _priq_run_remove _priq_dumb_remove +#define _priq_run_best _priq_dumb_best +#else +#define _priq_run_add _priq_rb_add +#define _priq_run_remove _priq_rb_remove +#define _priq_run_best _priq_rb_best #endif -static inline int _is_thread_dummy(struct k_thread *thread) -{ - return _is_thread_state_set(thread, _THREAD_DUMMY); -} +#ifdef CONFIG_WAITQ_FAST +#define _priq_wait_add _priq_rb_add +#define _priq_wait_remove _priq_rb_remove +#define _priq_wait_best _priq_rb_best +#else +#define _priq_wait_add _priq_dumb_add +#define _priq_wait_remove _priq_dumb_remove +#define _priq_wait_best _priq_dumb_best +#endif + +/* the only struct _kernel instance */ +struct _kernel _kernel; + +static struct k_spinlock sched_lock; + +#define LOCKED(lck) for (k_spinlock_key_t __i = {}, \ + __key = k_spin_lock(lck); \ + !__i.key; \ + k_spin_unlock(lck, __key), __i.key = 1) static inline int _is_preempt(struct k_thread *thread) { @@ -35,206 +52,252 @@ static inline int _is_preempt(struct k_thread *thread) #endif } -static inline void _mark_thread_as_pending(struct k_thread *thread) +static inline int _is_thread_dummy(struct k_thread *thread) { - thread->base.thread_state |= _THREAD_PENDING; + return !!(thread->base.thread_state & _THREAD_DUMMY); +} + +static inline int _is_idle(struct k_thread *thread) +{ +#ifdef CONFIG_SMP + return thread->base.is_idle; +#else + extern struct k_thread * const _idle_thread; + + return thread == _idle_thread; +#endif +} + +#ifdef _NON_OPTIMIZED_TICKS_PER_SEC +s32_t _ms_to_ticks(s32_t ms) +{ + s64_t ms_ticks_per_sec = (s64_t)ms * sys_clock_ticks_per_sec; + + return (s32_t)ceiling_fraction(ms_ticks_per_sec, MSEC_PER_SEC); +} +#endif + +static struct k_thread *next_up(void) +{ +#ifndef CONFIG_SMP + /* In uniprocessor mode, we can leave the current thread in + * the queue (actually we have to, otherwise the assembly + * context switch code for all architectures would be + * responsible for putting it back in _Swap and ISR return!), + * which makes this choice simple. + */ + struct k_thread *th = _priq_run_best(&_kernel.ready_q.runq); + + return th ? th : _current_cpu->idle_thread; +#else + /* Under SMP, the "cache" mechanism for selecting the next + * thread doesn't work, so we have more work to do to test + * _current against the best choice from the queue. + */ + int active = !_is_thread_prevented_from_running(_current); + int queued = _is_thread_queued(_current); + + struct k_thread *th = _priq_run_best(&_kernel.ready_q.runq); + + /* Idle thread if nothing else */ + if (!th) { + th = _current_cpu->idle_thread; + } + + /* Stay with current unless it's already been put back in the + * queue and something better is available (c.f. timeslicing, + * yield) + */ + if (active && !queued && !_is_t1_higher_prio_than_t2(th, _current)) { + th = _current; + } + + /* Put _current back into the queue if necessary */ + if (th != _current && !queued) { + _priq_run_add(&_kernel.ready_q.runq, _current); + } + + /* Remove the thread we're about to run from the queue (which + * potentially might not be there, but that's OK) + */ + _priq_run_remove(&_kernel.ready_q.runq, th); + + return th; +#endif +} + +static void update_cache(void) +{ +#ifndef CONFIG_SMP + _kernel.ready_q.cache = next_up(); +#endif +} + +void _add_thread_to_ready_q(struct k_thread *thread) +{ + LOCKED(&sched_lock) { + _priq_run_add(&_kernel.ready_q.runq, thread); + _mark_thread_as_queued(thread); + update_cache(); + } +} + +void _move_thread_to_end_of_prio_q(struct k_thread *thread) +{ + LOCKED(&sched_lock) { + _priq_run_remove(&_kernel.ready_q.runq, thread); + _priq_run_add(&_kernel.ready_q.runq, thread); + _mark_thread_as_queued(thread); + update_cache(); + } +} + +void _remove_thread_from_ready_q(struct k_thread *thread) +{ + LOCKED(&sched_lock) { + if (_is_thread_queued(thread)) { + _priq_run_remove(&_kernel.ready_q.runq, thread); + _mark_thread_as_not_queued(thread); + update_cache(); + } + } +} + +static void pend(struct k_thread *thread, _wait_q_t *wait_q, s32_t timeout) +{ + _remove_thread_from_ready_q(thread); + _mark_thread_as_pending(thread); + + /* The timeout handling is currently synchronized external to + * the scheduler using the legacy global lock. Should fix + * that. + */ + if (timeout != K_FOREVER) { + s32_t ticks = _TICK_ALIGN + _ms_to_ticks(timeout); + int key = irq_lock(); + + _add_thread_timeout(thread, wait_q, ticks); + irq_unlock(key); + } + + if (wait_q) { +#ifdef CONFIG_WAITQ_FAST + thread->base.pended_on = wait_q; +#endif + _priq_wait_add(&wait_q->waitq, thread); + } #ifdef CONFIG_KERNEL_EVENT_LOGGER_THREAD _sys_k_event_logger_thread_pend(thread); #endif } -static inline int _is_idle_thread_ptr(k_tid_t thread) +void _pend_thread(struct k_thread *thread, _wait_q_t *wait_q, s32_t timeout) { -#ifdef CONFIG_SMP - return thread->base.is_idle; + __ASSERT_NO_MSG(thread == _current || _is_thread_dummy(thread)); + pend(thread, wait_q, timeout); +} + +static _wait_q_t *pended_on(struct k_thread *thread) +{ +#ifdef CONFIG_WAITQ_FAST + __ASSERT_NO_MSG(thread->base.pended_on); + + return thread->base.pended_on; #else - return thread == _idle_thread; + ARG_UNUSED(thread); + return NULL; #endif } -static inline int _get_ready_q_q_index(int prio) +struct k_thread *_find_first_thread_to_unpend(_wait_q_t *wait_q, + struct k_thread *from) { - return prio + _NUM_COOP_PRIO; + ARG_UNUSED(from); + + struct k_thread *ret = NULL; + + LOCKED(&sched_lock) { + ret = _priq_wait_best(&wait_q->waitq); + } + + return ret; } -static inline int _get_ready_q_prio_bmap_index(int prio) +void _unpend_thread_no_timeout(struct k_thread *thread) { - return (prio + _NUM_COOP_PRIO) >> 5; + LOCKED(&sched_lock) { + _priq_wait_remove(&pended_on(thread)->waitq, thread); + _mark_thread_as_not_pending(thread); + } + +#if defined(CONFIG_ASSERT) && defined(CONFIG_WAITQ_FAST) + thread->base.pended_on = NULL; +#endif } -static inline int _get_ready_q_prio_bit(int prio) +int _pend_current_thread(int key, _wait_q_t *wait_q, s32_t timeout) { - return (1u << ((prio + _NUM_COOP_PRIO) & 0x1f)); + pend(_current, wait_q, timeout); + return _Swap(key); } -#ifdef CONFIG_SMP -int _get_highest_ready_prio(void); -#else -static inline int _get_highest_ready_prio(void) +struct k_thread *_unpend_first_thread(_wait_q_t *wait_q) { - int bitmap = 0; - u32_t ready_range; + struct k_thread *t = _unpend1_no_timeout(wait_q); -#if (K_NUM_PRIORITIES <= 32) - ready_range = _ready_q.prio_bmap[0]; -#else - for (;; bitmap++) { + if (t) { + _abort_thread_timeout(t); + } - __ASSERT(bitmap < K_NUM_PRIO_BITMAPS, "prio out-of-range\n"); + return t; +} - if (_ready_q.prio_bmap[bitmap]) { - ready_range = _ready_q.prio_bmap[bitmap]; - break; +void _unpend_thread(struct k_thread *thread) +{ + _unpend_thread_no_timeout(thread); + _abort_thread_timeout(thread); +} + +/* FIXME: this API is glitchy when used in SMP. If the thread is + * currently scheduled on the other CPU, it will silently set it's + * priority but nothing will cause a reschedule until the next + * interrupt. An audit seems to show that all current usage is to set + * priorities on either _current or a pended thread, though, so it's + * fine for now. + */ +void _thread_priority_set(struct k_thread *thread, int prio) +{ + int need_sched = 0; + + LOCKED(&sched_lock) { + need_sched = _is_thread_ready(thread); + + if (need_sched) { + _priq_run_remove(&_kernel.ready_q.runq, thread); + thread->base.prio = prio; + _priq_run_add(&_kernel.ready_q.runq, thread); + update_cache(); + } else { + thread->base.prio = prio; } } -#endif - int abs_prio = (find_lsb_set(ready_range) - 1) + (bitmap << 5); - - __ASSERT(abs_prio < K_NUM_PRIORITIES, "prio out-of-range\n"); - - return abs_prio - _NUM_COOP_PRIO; -} -#endif - -/* set the bit corresponding to prio in ready q bitmap */ -#if defined(CONFIG_MULTITHREADING) && !defined(CONFIG_SMP) -static void set_ready_q_prio_bit(int prio) -{ - int bmap_index = _get_ready_q_prio_bmap_index(prio); - u32_t *bmap = &_ready_q.prio_bmap[bmap_index]; - - *bmap |= _get_ready_q_prio_bit(prio); -} - -/* clear the bit corresponding to prio in ready q bitmap */ -static void clear_ready_q_prio_bit(int prio) -{ - int bmap_index = _get_ready_q_prio_bmap_index(prio); - u32_t *bmap = &_ready_q.prio_bmap[bmap_index]; - - *bmap &= ~_get_ready_q_prio_bit(prio); -} -#endif - -#if !defined(CONFIG_SMP) && defined(CONFIG_MULTITHREADING) -/* - * Find the next thread to run when there is no thread in the cache and update - * the cache. - */ -static struct k_thread *get_ready_q_head(void) -{ - int prio = _get_highest_ready_prio(); - int q_index = _get_ready_q_q_index(prio); - sys_dlist_t *list = &_ready_q.q[q_index]; - - __ASSERT(!sys_dlist_is_empty(list), - "no thread to run (prio: %d, queue index: %u)!\n", - prio, q_index); - - struct k_thread *thread = - (struct k_thread *)sys_dlist_peek_head_not_empty(list); - - return thread; -} -#endif - -/* - * Add thread to the ready queue, in the slot for its priority; the thread - * must not be on a wait queue. - * - * This function, along with _move_thread_to_end_of_prio_q(), are the _only_ - * places where a thread is put on the ready queue. - * - * Interrupts must be locked when calling this function. - */ - -void _add_thread_to_ready_q(struct k_thread *thread) -{ - __ASSERT(_is_prio_higher(thread->base.prio, K_LOWEST_THREAD_PRIO) || - ((thread->base.prio == K_LOWEST_THREAD_PRIO) && - (thread == _idle_thread)), - "thread %p prio too low (is %d, cannot be lower than %d)", - thread, thread->base.prio, - thread == _idle_thread ? K_LOWEST_THREAD_PRIO : - K_LOWEST_APPLICATION_THREAD_PRIO); - - __ASSERT(!_is_prio_higher(thread->base.prio, K_HIGHEST_THREAD_PRIO), - "thread %p prio too high (id %d, cannot be higher than %d)", - thread, thread->base.prio, K_HIGHEST_THREAD_PRIO); - -#ifdef CONFIG_MULTITHREADING - int q_index = _get_ready_q_q_index(thread->base.prio); - sys_dlist_t *q = &_ready_q.q[q_index]; - -# ifndef CONFIG_SMP - set_ready_q_prio_bit(thread->base.prio); -# endif - sys_dlist_append(q, &thread->base.k_q_node); - -# ifndef CONFIG_SMP - struct k_thread **cache = &_ready_q.cache; - - *cache = _is_t1_higher_prio_than_t2(thread, *cache) ? thread : *cache; -# endif -#else - sys_dlist_append(&_ready_q.q[0], &thread->base.k_q_node); - _ready_q.prio_bmap[0] = 1; -# ifndef CONFIG_SMP - _ready_q.cache = thread; -# endif -#endif -} - -/* - * This function, along with _move_thread_to_end_of_prio_q(), are the _only_ - * places where a thread is taken off the ready queue. - * - * Interrupts must be locked when calling this function. - */ - -void _remove_thread_from_ready_q(struct k_thread *thread) -{ -#if defined(CONFIG_MULTITHREADING) && !defined(CONFIG_SMP) - int q_index = _get_ready_q_q_index(thread->base.prio); - sys_dlist_t *q = &_ready_q.q[q_index]; - - sys_dlist_remove(&thread->base.k_q_node); - if (sys_dlist_is_empty(q)) { - clear_ready_q_prio_bit(thread->base.prio); + if (need_sched) { + _reschedule(irq_lock()); } - - struct k_thread **cache = &_ready_q.cache; - - *cache = *cache == thread ? get_ready_q_head() : *cache; -#else -# if !defined(CONFIG_SMP) - _ready_q.prio_bmap[0] = 0; - _ready_q.cache = NULL; -# endif - sys_dlist_remove(&thread->base.k_q_node); -#endif } -/* Releases the irq_lock and swaps to a higher priority thread if one - * is available, returning the _Swap() return value, otherwise zero. - * Does not swap away from a thread at a cooperative (unpreemptible) - * priority unless "yield" is true. - */ int _reschedule(int key) { - K_DEBUG("rescheduling threads\n"); - if (!_is_in_isr() && _is_preempt(_current) && - _is_prio_higher(_get_highest_ready_prio(), _current->base.prio)) { - K_DEBUG("context-switching out %p\n", _current); + _get_next_ready_thread() != _current) { return _Swap(key); - } else { - irq_unlock(key); - return 0; } + + irq_unlock(key); + return 0; } void k_sched_lock(void) @@ -261,82 +324,197 @@ void k_sched_unlock(void) #endif } -/* convert milliseconds to ticks */ - -#ifdef _NON_OPTIMIZED_TICKS_PER_SEC -s32_t _ms_to_ticks(s32_t ms) +#ifdef CONFIG_SMP +struct k_thread *_get_next_ready_thread(void) { - s64_t ms_ticks_per_sec = (s64_t)ms * sys_clock_ticks_per_sec; + struct k_thread *ret = 0; - return (s32_t)ceiling_fraction(ms_ticks_per_sec, MSEC_PER_SEC); + LOCKED(&sched_lock) { + ret = next_up(); + } + + return ret; } #endif -/* Pend the specified thread: it must *not* be in the ready queue. It - * must be either _current or a DUMMY thread (i.e. this is NOT an API - * for pending another thread that might be running!). It must be - * called with interrupts locked - */ -void _pend_thread(struct k_thread *thread, _wait_q_t *wait_q, s32_t timeout) +#ifdef CONFIG_USE_SWITCH +void *_get_next_switch_handle(void *interrupted) { - __ASSERT(thread == _current || _is_thread_dummy(thread), - "Can only pend _current or DUMMY"); - -#ifdef CONFIG_MULTITHREADING - sys_dlist_t *wait_q_list = (sys_dlist_t *)wait_q; - struct k_thread *pending; - - if (!wait_q_list) { - goto inserted; + if (!_is_preempt(_current) && + !(_current->base.thread_state & _THREAD_DEAD)) { + return interrupted; } - SYS_DLIST_FOR_EACH_CONTAINER(wait_q_list, pending, base.k_q_node) { - if (_is_t1_higher_prio_than_t2(thread, pending)) { - sys_dlist_insert_before(wait_q_list, - &pending->base.k_q_node, - &thread->base.k_q_node); - goto inserted; + _current->switch_handle = interrupted; + + LOCKED(&sched_lock) { + struct k_thread *next = next_up(); + + if (next != _current) { + _current = next; } } - sys_dlist_append(wait_q_list, &thread->base.k_q_node); + _check_stack_sentinel(); -inserted: - _mark_thread_as_pending(thread); - - if (timeout != K_FOREVER) { - s32_t ticks = _TICK_ALIGN + _ms_to_ticks(timeout); - - _add_thread_timeout(thread, wait_q, ticks); - } + return _current->switch_handle; +} #endif -} -void _unpend_thread_no_timeout(struct k_thread *thread) +void _priq_dumb_add(sys_dlist_t *pq, struct k_thread *thread) { - __ASSERT(thread->base.thread_state & _THREAD_PENDING, ""); + struct k_thread *t; - sys_dlist_remove(&thread->base.k_q_node); - _mark_thread_as_not_pending(thread); -} + __ASSERT_NO_MSG(!_is_idle(thread)); -void _unpend_thread(struct k_thread *thread) -{ - _unpend_thread_no_timeout(thread); - _abort_thread_timeout(thread); -} - -struct k_thread *_unpend_first_thread(_wait_q_t *wait_q) -{ - struct k_thread *t = _unpend1_no_timeout(wait_q); - - if (t) { - _abort_thread_timeout(t); + SYS_DLIST_FOR_EACH_CONTAINER(pq, t, base.qnode_dlist) { + if (_is_t1_higher_prio_than_t2(thread, t)) { + sys_dlist_insert_before(pq, &t->base.qnode_dlist, + &thread->base.qnode_dlist); + return; + } } - return t; + sys_dlist_append(pq, &thread->base.qnode_dlist); } +void _priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread) +{ + __ASSERT_NO_MSG(!_is_idle(thread)); + + sys_dlist_remove(&thread->base.qnode_dlist); +} + +struct k_thread *_priq_dumb_best(sys_dlist_t *pq) +{ + return CONTAINER_OF(sys_dlist_peek_head(pq), + struct k_thread, base.qnode_dlist); +} + +int _priq_rb_lessthan(struct rbnode *a, struct rbnode *b) +{ + struct k_thread *ta, *tb; + + ta = CONTAINER_OF(a, struct k_thread, base.qnode_rb); + tb = CONTAINER_OF(b, struct k_thread, base.qnode_rb); + + if (_is_t1_higher_prio_than_t2(ta, tb)) { + return 1; + } else if (_is_t1_higher_prio_than_t2(tb, ta)) { + return 0; + } else { + return ta->base.order_key < tb->base.order_key ? 1 : 0; + } +} + +void _priq_rb_add(struct _priq_rb *pq, struct k_thread *thread) +{ + struct k_thread *t; + + __ASSERT_NO_MSG(!_is_idle(thread)); + + thread->base.order_key = pq->next_order_key++; + + /* Renumber at wraparound. This is tiny code, and in practice + * will almost never be hit on real systems. BUT on very + * long-running systems where a priq never completely empties + * AND that contains very large numbers of threads, it can be + * a latency glitch to loop over all the threads like this. + */ + if (!pq->next_order_key) { + RB_FOR_EACH_CONTAINER(&pq->tree, t, base.qnode_rb) { + t->base.order_key = pq->next_order_key++; + } + } + + rb_insert(&pq->tree, &thread->base.qnode_rb); +} + +void _priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread) +{ + __ASSERT_NO_MSG(!_is_idle(thread)); + + rb_remove(&pq->tree, &thread->base.qnode_rb); + + if (!pq->tree.root) { + pq->next_order_key = 0; + } +} + +struct k_thread *_priq_rb_best(struct _priq_rb *pq) +{ + struct rbnode *n = rb_get_min(&pq->tree); + + return CONTAINER_OF(n, struct k_thread, base.qnode_rb); +} + +#ifdef CONFIG_TIMESLICING +extern s32_t _time_slice_duration; /* Measured in ms */ +extern s32_t _time_slice_elapsed; /* Measured in ms */ +extern int _time_slice_prio_ceiling; + +void k_sched_time_slice_set(s32_t duration_in_ms, int prio) +{ + __ASSERT(duration_in_ms >= 0, ""); + __ASSERT((prio >= 0) && (prio < CONFIG_NUM_PREEMPT_PRIORITIES), ""); + + _time_slice_duration = duration_in_ms; + _time_slice_elapsed = 0; + _time_slice_prio_ceiling = prio; +} + +int _is_thread_time_slicing(struct k_thread *thread) +{ + int ret = 0; + + /* Should fix API. Doesn't make sense for non-running threads + * to call this + */ + __ASSERT_NO_MSG(thread == _current); + + if (_time_slice_duration <= 0 || !_is_preempt(thread) || + _is_prio_higher(thread->base.prio, _time_slice_prio_ceiling)) { + return 0; + } + + + LOCKED(&sched_lock) { + struct k_thread *next = _priq_run_best(&_kernel.ready_q.runq); + + if (next) { + ret = thread->base.prio == next->base.prio; + } + } + + return ret; +} + +/* Must be called with interrupts locked */ +/* Should be called only immediately before a thread switch */ +void _update_time_slice_before_swap(void) +{ +#ifdef CONFIG_TICKLESS_KERNEL + if (!_is_thread_time_slicing(_get_next_ready_thread())) { + return; + } + + u32_t remaining = _get_remaining_program_time(); + + if (!remaining || (_time_slice_duration < remaining)) { + _set_time(_time_slice_duration); + } else { + /* Account previous elapsed time and reprogram + * timer with remaining time + */ + _set_time(remaining); + } + +#endif + /* Restart time slice count at new thread switch */ + _time_slice_elapsed = 0; +} +#endif /* CONFIG_TIMESLICING */ + int _unpend_all(_wait_q_t *waitq) { int need_sched = 0; @@ -351,16 +529,17 @@ int _unpend_all(_wait_q_t *waitq) return need_sched; } - -/* Block the current thread and swap to the next. Releases the - * irq_lock, does a _Swap and returns the return value set at wakeup - * time - */ -int _pend_current_thread(int key, _wait_q_t *wait_q, s32_t timeout) +void _sched_init(void) { - _remove_thread_from_ready_q(_current); - _pend_thread(_current, wait_q, timeout); - return _Swap(key); +#ifdef CONFIG_SCHED_DUMB + sys_dlist_init(&_kernel.ready_q.runq); +#else + _kernel.ready_q.runq = (struct _priq_rb) { + .tree = { + .lessthan_fn = _priq_rb_lessthan, + } + }; +#endif } int _impl_k_thread_priority_get(k_tid_t thread) @@ -383,10 +562,8 @@ void _impl_k_thread_priority_set(k_tid_t tid, int prio) __ASSERT(!_is_in_isr(), ""); struct k_thread *thread = (struct k_thread *)tid; - int key = irq_lock(); _thread_priority_set(thread, prio); - _reschedule(key); } #ifdef CONFIG_USERSPACE @@ -406,49 +583,16 @@ Z_SYSCALL_HANDLER(k_thread_priority_set, thread_p, prio) } #endif -/* - * Interrupts must be locked when calling this function. - * - * This function, along with _add_thread_to_ready_q() and - * _remove_thread_from_ready_q(), are the _only_ places where a thread is - * taken off or put on the ready queue. - */ -void _move_thread_to_end_of_prio_q(struct k_thread *thread) -{ -#ifdef CONFIG_MULTITHREADING - int q_index = _get_ready_q_q_index(thread->base.prio); - sys_dlist_t *q = &_ready_q.q[q_index]; - - if (sys_dlist_is_tail(q, &thread->base.k_q_node)) { - return; - } - - sys_dlist_remove(&thread->base.k_q_node); - sys_dlist_append(q, &thread->base.k_q_node); - -# ifndef CONFIG_SMP - struct k_thread **cache = &_ready_q.cache; - - *cache = *cache == thread ? get_ready_q_head() : *cache; -# endif -#endif -} - void _impl_k_yield(void) { __ASSERT(!_is_in_isr(), ""); - int key = irq_lock(); + if (!_is_idle(_current)) { + _move_thread_to_end_of_prio_q(_current); + } - _move_thread_to_end_of_prio_q(_current); - - if (_current == _get_next_ready_thread()) { - irq_unlock(key); -#ifdef CONFIG_STACK_SENTINEL - _check_stack_sentinel(); -#endif - } else { - _Swap(key); + if (_get_next_ready_thread() != _current) { + _Swap(irq_lock()); } } @@ -537,69 +681,6 @@ k_tid_t _impl_k_current_get(void) Z_SYSCALL_HANDLER0_SIMPLE(k_current_get); #endif -#ifdef CONFIG_TIMESLICING -extern s32_t _time_slice_duration; /* Measured in ms */ -extern s32_t _time_slice_elapsed; /* Measured in ms */ -extern int _time_slice_prio_ceiling; - -void k_sched_time_slice_set(s32_t duration_in_ms, int prio) -{ - __ASSERT(duration_in_ms >= 0, ""); - __ASSERT((prio >= 0) && (prio < CONFIG_NUM_PREEMPT_PRIORITIES), ""); - - _time_slice_duration = duration_in_ms; - _time_slice_elapsed = 0; - _time_slice_prio_ceiling = prio; -} - -int _is_thread_time_slicing(struct k_thread *thread) -{ - /* - * Time slicing is done on the thread if following conditions are met - * - * Time slice duration should be set > 0 - * Should not be the idle thread - * Priority should be higher than time slice priority ceiling - * There should be multiple threads active with same priority - */ - - if (!(_time_slice_duration > 0) || (_is_idle_thread_ptr(thread)) - || _is_prio_higher(thread->base.prio, _time_slice_prio_ceiling)) { - return 0; - } - - int q_index = _get_ready_q_q_index(thread->base.prio); - sys_dlist_t *q = &_ready_q.q[q_index]; - - return sys_dlist_has_multiple_nodes(q); -} - -/* Must be called with interrupts locked */ -/* Should be called only immediately before a thread switch */ -void _update_time_slice_before_swap(void) -{ -#ifdef CONFIG_TICKLESS_KERNEL - if (!_is_thread_time_slicing(_get_next_ready_thread())) { - return; - } - - u32_t remaining = _get_remaining_program_time(); - - if (!remaining || (_time_slice_duration < remaining)) { - _set_time(_time_slice_duration); - } else { - /* Account previous elapsed time and reprogram - * timer with remaining time - */ - _set_time(remaining); - } - -#endif - /* Restart time slice count at new thread switch */ - _time_slice_elapsed = 0; -} -#endif /* CONFIG_TIMESLICING */ - int _impl_k_is_preempt_thread(void) { return !_is_in_isr() && _is_preempt(_current); @@ -608,118 +689,3 @@ int _impl_k_is_preempt_thread(void) #ifdef CONFIG_USERSPACE Z_SYSCALL_HANDLER0_SIMPLE(k_is_preempt_thread); #endif - -#ifdef CONFIG_SMP -int _get_highest_ready_prio(void) -{ - int p; - - for (p = 0; p < ARRAY_SIZE(_kernel.ready_q.q); p++) { - if (!sys_dlist_is_empty(&_kernel.ready_q.q[p])) { - break; - } - } - - __ASSERT(p < K_NUM_PRIORITIES, "No ready prio"); - - return p - _NUM_COOP_PRIO; -} - -struct k_thread *_get_next_ready_thread(void) -{ - int p, mycpu = _arch_curr_cpu()->id; - - for (p = 0; p < ARRAY_SIZE(_ready_q.q); p++) { - sys_dlist_t *list = &_ready_q.q[p]; - sys_dnode_t *node; - - for (node = list->tail; node != list; node = node->prev) { - struct k_thread *th = (struct k_thread *)node; - - /* Skip threads that are already running elsewhere! */ - if (th->base.active && th->base.cpu != mycpu) { - continue; - } - - return th; - } - } - - __ASSERT(0, "No ready thread found for cpu %d\n", mycpu); - return NULL; -} -#endif - -#ifdef CONFIG_USE_SWITCH -void *_get_next_switch_handle(void *interrupted) -{ - if (!_is_preempt(_current) && - !(_current->base.thread_state & _THREAD_DEAD)) { - return interrupted; - } - - int key = irq_lock(); - struct k_thread *new_thread = _get_next_ready_thread(); - -#ifdef CONFIG_SMP - _current->base.active = 0; - new_thread->base.active = 1; -#endif - - irq_unlock(key); - - _current->switch_handle = interrupted; - _current = new_thread; - - void *ret = new_thread->switch_handle; - -#ifdef CONFIG_SMP - _smp_reacquire_global_lock(_current); -#endif - - _check_stack_sentinel(); - - return ret; -} -#endif - -void _thread_priority_set(struct k_thread *thread, int prio) -{ - if (_is_thread_ready(thread)) { - _remove_thread_from_ready_q(thread); - thread->base.prio = prio; - _add_thread_to_ready_q(thread); - } else { - thread->base.prio = prio; - } -} - -struct k_thread *_find_first_thread_to_unpend(_wait_q_t *wait_q, - struct k_thread *from) -{ -#ifdef CONFIG_SYS_CLOCK_EXISTS - extern volatile int _handling_timeouts; - - if (_handling_timeouts) { - sys_dlist_t *q = (sys_dlist_t *)wait_q; - sys_dnode_t *cur = from ? &from->base.k_q_node : NULL; - - /* skip threads that have an expired timeout */ - SYS_DLIST_ITERATE_FROM_NODE(q, cur) { - struct k_thread *thread = (struct k_thread *)cur; - - if (_is_thread_timeout_expired(thread)) { - continue; - } - - return thread; - } - return NULL; - } -#else - ARG_UNUSED(from); -#endif - - return _waitq_head(wait_q); - -}