kernel: Scheduler rewrite

This replaces the existing scheduler (but not priority handling)
implementation with a somewhat simpler one.  Behavior as to thread
selection does not change.  New features:

+ Unifies SMP and uniprocessing selection code (with the sole
  exception of the "cache" trick not being possible in SMP).

+ The old static multi-queue implementation is gone and has been
  replaced with a build-time choice of either a "dumb" list
  implementation (faster and significantly smaller for apps with only
  a few threads) or a balanced tree queue which scales well to
  arbitrary numbers of threads and priority levels.  This is
  controlled via the CONFIG_SCHED_DUMB kconfig variable.

+ The balanced tree implementation is usable symmetrically for the
  wait_q abstraction, fixing a scalability glitch Zephyr had when many
  threads were waiting on a single object.  This can be selected via
  CONFIG_WAITQ_FAST.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
Andy Ross 2018-05-03 14:51:49 -07:00 committed by Anas Nashif
commit 1acd8c2996
10 changed files with 600 additions and 484 deletions

View file

@ -22,6 +22,7 @@
#include <atomic.h>
#include <errno.h>
#include <misc/__assert.h>
#include <sched_priq.h>
#include <misc/dlist.h>
#include <misc/slist.h>
#include <misc/sflist.h>
@ -33,6 +34,7 @@
#include <syscall.h>
#include <misc/printk.h>
#include <arch/cpu.h>
#include <misc/rb.h>
#ifdef __cplusplus
extern "C" {
@ -91,12 +93,26 @@ extern "C" {
#define K_HIGHEST_APPLICATION_THREAD_PRIO (K_HIGHEST_THREAD_PRIO)
#define K_LOWEST_APPLICATION_THREAD_PRIO (K_LOWEST_THREAD_PRIO - 1)
#ifdef CONFIG_WAITQ_FAST
typedef struct {
struct _priq_rb waitq;
} _wait_q_t;
extern int _priq_rb_lessthan(struct rbnode *a, struct rbnode *b);
#define _WAIT_Q_INIT(wait_q) { { { .lessthan_fn = _priq_rb_lessthan } } }
#else
typedef struct {
sys_dlist_t waitq;
} _wait_q_t;
#define _WAIT_Q_INIT(wait_q) { SYS_DLIST_STATIC_INIT(&(wait_q)->waitq) }
#endif
#ifdef CONFIG_OBJECT_TRACING
#define _OBJECT_TRACING_NEXT_PTR(type) struct type *__next
#define _OBJECT_TRACING_INIT .__next = NULL,
@ -405,7 +421,17 @@ struct __thread_entry {
struct _thread_base {
/* this thread's entry in a ready/wait queue */
sys_dnode_t k_q_node;
union {
sys_dlist_t qnode_dlist;
struct rbnode qnode_rb;
};
#ifdef CONFIG_WAITQ_FAST
/* wait queue on which the thread is pended (needed only for
* trees, not dumb lists)
*/
_wait_q_t *pended_on;
#endif
/* user facing 'thread options'; values defined in include/kernel.h */
u8_t user_options;
@ -440,13 +466,12 @@ struct _thread_base {
u16_t preempt;
};
u32_t order_key;
#ifdef CONFIG_SMP
/* True for the per-CPU idle threads */
u8_t is_idle;
/* Non-zero when actively running on a CPU */
u8_t active;
/* CPU index on which thread was last run */
u8_t cpu;

47
include/sched_priq.h Normal file
View file

@ -0,0 +1,47 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _sched_priq__h_
#define _sched_priq__h_
#include <misc/util.h>
#include <misc/dlist.h>
#include <misc/rb.h>
/* Two abstractions are defined here for "thread priority queues".
*
* One is a "dumb" list implementation appropriate for systems with
* small numbers of threads and sensitive to code size. It is stored
* in sorted order, taking an O(N) cost every time a thread is added
* to the list. This corresponds to the way the original _wait_q_t
* abstraction worked and is very fast as long as the number of
* threads is small.
*
* The other is a balanced tree "fast" implementation with rather
* larger code size (due to the data structure itself, the code here
* is just stubs) and higher constant-factor performance overhead, but
* much better O(logN) scaling in the presence of large number of
* threads.
*
* Each can be used for either the wait_q or system ready queue,
* configurable at build time.
*/
struct k_thread;
struct k_thread *_priq_dumb_best(sys_dlist_t *pq);
void _priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread);
void _priq_dumb_add(sys_dlist_t *pq, struct k_thread *thread);
struct _priq_rb {
struct rbtree tree;
int next_order_key;
};
void _priq_rb_add(struct _priq_rb *pq, struct k_thread *thread);
void _priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread);
struct k_thread *_priq_rb_best(struct _priq_rb *pq);
#endif /* _sched_priq__h_ */

View file

@ -169,6 +169,34 @@ config APPLICATION_MEMORY
will have the project-level application objects and any libraries
including the C library in it.
config WAITQ_FAST
bool
prompt "Use scalable wait_q implementation"
default n
help
When selected, the wait_q abstraction used in IPC primitives
to pend threads for wakeup later will be implemented with a
balanced tree instead of a linear list. Choose this if you
expect to have many threads waiting on individual
primitives, or if you have already included the red/black
tree code in the build for some other purpose (in which case
this results in less code size increase than the default
implementation).
config SCHED_DUMB
bool
prompt "Use a simple linked list scheduler"
default y
help
When selected, the scheduler ready queue will be implemented
as a simple unordered list, with very fast constant time
performance for single threads and very low code size.
Choose this on systems with constrained code size that will
never see more than a small number (3, maybe) of runnable
threads in the queue at any given time. On most platforms
(that are not otherwise using the red/black tree) this
results in a savings of ~2k of code size.
menu "Kernel Debugging and Metrics"
config INIT_STACKS

View file

@ -12,6 +12,7 @@
#if !defined(_ASMLANGUAGE)
#include <atomic.h>
#include <misc/dlist.h>
#include <misc/rb.h>
#include <string.h>
#endif
@ -27,7 +28,6 @@
* defined.
*/
/* states: common uses low bits, arch-specific use high bits */
/* Not a real thread */
@ -48,6 +48,9 @@
/* Thread is actively looking at events to see if they are ready */
#define _THREAD_POLLING (1 << 5)
/* Thread is present in the ready queue */
#define _THREAD_QUEUED (1 << 6)
/* end - states */
#ifdef CONFIG_STACK_SENTINEL
@ -69,13 +72,13 @@ struct _ready_q {
#ifndef CONFIG_SMP
/* always contains next thread to run: cannot be NULL */
struct k_thread *cache;
/* bitmap of priorities that contain at least one ready thread */
u32_t prio_bmap[K_NUM_PRIO_BITMAPS];
#endif
/* ready queues, one per priority */
sys_dlist_t q[K_NUM_PRIORITIES];
#ifdef CONFIG_SCHED_DUMB
sys_dlist_t runq;
#else
struct _priq_rb runq;
#endif
};
typedef struct _ready_q _ready_q_t;
@ -90,6 +93,9 @@ struct _cpu {
/* currently scheduled thread */
struct k_thread *current;
/* one assigned idle thread per CPU */
struct k_thread *idle_thread;
int id;
};
@ -158,8 +164,10 @@ typedef struct _kernel _kernel_t;
extern struct _kernel _kernel;
#ifdef CONFIG_SMP
#define _current_cpu (_arch_curr_cpu())
#define _current (_arch_curr_cpu()->current)
#else
#define _current_cpu (&_kernel.cpus[0])
#define _current _kernel.current
#endif

View file

@ -33,6 +33,7 @@
#define _ASSERT_VALID_PRIO(prio, entry_point) __ASSERT((prio) == -1, "")
#endif
void _sched_init(void);
void _add_thread_to_ready_q(struct k_thread *thread);
void _move_thread_to_end_of_prio_q(struct k_thread *thread);
void _remove_thread_from_ready_q(struct k_thread *thread);
@ -111,6 +112,11 @@ static inline int _is_thread_polling(struct k_thread *thread)
return _is_thread_state_set(thread, _THREAD_POLLING);
}
static inline int _is_thread_queued(struct k_thread *thread)
{
return _is_thread_state_set(thread, _THREAD_QUEUED);
}
static inline void _mark_thread_as_suspended(struct k_thread *thread)
{
thread->base.thread_state |= _THREAD_SUSPENDED;
@ -126,6 +132,11 @@ static inline void _mark_thread_as_started(struct k_thread *thread)
thread->base.thread_state &= ~_THREAD_PRESTART;
}
static inline void _mark_thread_as_pending(struct k_thread *thread)
{
thread->base.thread_state |= _THREAD_PENDING;
}
static inline void _mark_thread_as_not_pending(struct k_thread *thread)
{
thread->base.thread_state &= ~_THREAD_PENDING;
@ -152,6 +163,16 @@ static inline void _mark_thread_as_not_polling(struct k_thread *thread)
_reset_thread_states(thread, _THREAD_POLLING);
}
static inline void _mark_thread_as_queued(struct k_thread *thread)
{
_set_thread_states(thread, _THREAD_QUEUED);
}
static inline void _mark_thread_as_not_queued(struct k_thread *thread)
{
_reset_thread_states(thread, _THREAD_QUEUED);
}
static inline int _is_under_prio_ceiling(int prio)
{
return prio >= CONFIG_PRIORITY_CEILING;

View file

@ -58,12 +58,10 @@ static inline unsigned int _Swap(unsigned int key)
new_thread = _get_next_ready_thread();
if (new_thread != old_thread) {
old_thread->swap_retval = -EAGAIN;
#ifdef CONFIG_SMP
old_thread->base.active = 0;
new_thread->base.active = 1;
new_thread->base.cpu = _arch_curr_cpu()->id;
_smp_release_global_lock(new_thread);

View file

@ -11,7 +11,9 @@
#include <kernel_structs.h>
#include <misc/dlist.h>
#include <misc/rb.h>
#include <ksched.h>
#include <sched_priq.h>
#ifdef __cplusplus
extern "C" {
@ -42,8 +44,30 @@ static ALWAYS_INLINE int _abort_thread_timeout(struct k_thread *thread)
#define _get_next_timeout_expiry() (K_FOREVER)
#endif
#ifdef CONFIG_WAITQ_FAST
#define _WAIT_Q_FOR_EACH(wq, thread_ptr) \
SYS_DLIST_FOR_EACH_CONTAINER(&((wq)->waitq), thread_ptr, base.k_q_node)
RB_FOR_EACH_CONTAINER(&(wq)->waitq.tree, thread_ptr, base.qnode_rb)
static inline void _waitq_init(_wait_q_t *w)
{
w->waitq = (struct _priq_rb) {
.tree = {
.lessthan_fn = _priq_rb_lessthan
}
};
}
static inline struct k_thread *_waitq_head(_wait_q_t *w)
{
return (void *)rb_get_min(&w->waitq.tree);
}
#else /* !CONFIG_WAITQ_FAST: */
#define _WAIT_Q_FOR_EACH(wq, thread_ptr) \
SYS_DLIST_FOR_EACH_CONTAINER(&((wq)->waitq), thread_ptr, \
base.qnode_dlist)
static inline void _waitq_init(_wait_q_t *w)
{
@ -55,6 +79,8 @@ static inline struct k_thread *_waitq_head(_wait_q_t *w)
return (void *)sys_dlist_peek_head(&w->waitq);
}
#endif /* !CONFIG_WAIT_Q_FAST */
#ifdef __cplusplus
}
#endif

View file

@ -273,7 +273,6 @@ static void init_idle_thread(struct k_thread *thr, k_thread_stack_t *stack)
IDLE_STACK_SIZE, idle, NULL, NULL, NULL,
K_LOWEST_THREAD_PRIO, K_ESSENTIAL);
_mark_thread_as_started(thr);
_ready_thread(thr);
}
#endif
@ -327,11 +326,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
_IntLibInit();
/* ready the init/main and idle threads */
for (int ii = 0; ii < K_NUM_PRIORITIES; ii++) {
sys_dlist_init(&_ready_q.q[ii]);
}
_sched_init();
#ifndef CONFIG_SMP
/*
@ -355,10 +350,12 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
#ifdef CONFIG_MULTITHREADING
init_idle_thread(_idle_thread, _idle_stack);
_kernel.cpus[0].idle_thread = _idle_thread;
#endif
#if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 1
init_idle_thread(_idle_thread1, _idle_stack1);
_kernel.cpus[1].idle_thread = _idle_thread1;
_kernel.cpus[1].id = 1;
_kernel.cpus[1].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack1)
+ CONFIG_ISR_STACK_SIZE;
@ -366,6 +363,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
#if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 2
init_idle_thread(_idle_thread2, _idle_stack2);
_kernel.cpus[2].idle_thread = _idle_thread2;
_kernel.cpus[2].id = 2;
_kernel.cpus[2].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack2)
+ CONFIG_ISR_STACK_SIZE;
@ -373,6 +371,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
#if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 3
init_idle_thread(_idle_thread3, _idle_stack3);
_kernel.cpus[3].idle_thread = _idle_thread3;
_kernel.cpus[3].id = 3;
_kernel.cpus[3].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack3)
+ CONFIG_ISR_STACK_SIZE;

View file

@ -316,14 +316,12 @@ static bool pipe_xfer_prepare(sys_dlist_t *xfer_list,
size_t min_xfer,
s32_t timeout)
{
sys_dnode_t *node;
struct k_thread *thread;
struct k_pipe_desc *desc;
size_t num_bytes = 0;
if (timeout == K_NO_WAIT) {
SYS_DLIST_FOR_EACH_NODE(&wait_q->waitq, node) {
thread = (struct k_thread *)node;
_WAIT_Q_FOR_EACH(wait_q, thread) {
desc = (struct k_pipe_desc *)thread->base.swap_data;
num_bytes += desc->bytes_to_xfer;
@ -367,7 +365,7 @@ static bool pipe_xfer_prepare(sys_dlist_t *xfer_list,
* Add it to the transfer list.
*/
_unpend_thread(thread);
sys_dlist_append(xfer_list, &thread->base.k_q_node);
sys_dlist_append(xfer_list, &thread->base.qnode_dlist);
}
*waiter = (num_bytes > bytes_to_xfer) ? thread : NULL;

File diff suppressed because it is too large Load diff