From 1acd8c2996c3a1ae0691247deff8c32519307f17 Mon Sep 17 00:00:00 2001
From: Andy Ross <andrew.j.ross@intel.com>
Date: Thu, 3 May 2018 14:51:49 -0700
Subject: [PATCH] kernel: Scheduler rewrite

This replaces the existing scheduler (but not priority handling)
implementation with a somewhat simpler one.  Behavior as to thread
selection does not change.  New features:

+ Unifies SMP and uniprocessing selection code (with the sole
  exception of the "cache" trick not being possible in SMP).

+ The old static multi-queue implementation is gone and has been
  replaced with a build-time choice of either a "dumb" list
  implementation (faster and significantly smaller for apps with only
  a few threads) or a balanced tree queue which scales well to
  arbitrary numbers of threads and priority levels.  This is
  controlled via the CONFIG_SCHED_DUMB kconfig variable.

+ The balanced tree implementation is usable symmetrically for the
  wait_q abstraction, fixing a scalability glitch Zephyr had when many
  threads were waiting on a single object.  This can be selected via
  CONFIG_WAITQ_FAST.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
---
 include/kernel.h                |  33 +-
 include/sched_priq.h            |  47 ++
 kernel/Kconfig                  |  28 +
 kernel/include/kernel_structs.h |  20 +-
 kernel/include/ksched.h         |  21 +
 kernel/include/kswap.h          |   4 +-
 kernel/include/wait_q.h         |  28 +-
 kernel/init.c                   |  11 +-
 kernel/pipes.c                  |   6 +-
 kernel/sched.c                  | 886 +++++++++++++++-----------------
 10 files changed, 600 insertions(+), 484 deletions(-)
 create mode 100644 include/sched_priq.h

diff --git a/include/kernel.h b/include/kernel.h
index 1419a1f2f4c..419a1b84106 100644
--- a/include/kernel.h
+++ b/include/kernel.h
@@ -22,6 +22,7 @@
 #include <atomic.h>
 #include <errno.h>
 #include <misc/__assert.h>
+#include <sched_priq.h>
 #include <misc/dlist.h>
 #include <misc/slist.h>
 #include <misc/sflist.h>
@@ -33,6 +34,7 @@
 #include <syscall.h>
 #include <misc/printk.h>
 #include <arch/cpu.h>
+#include <misc/rb.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -91,12 +93,26 @@ extern "C" {
 #define K_HIGHEST_APPLICATION_THREAD_PRIO (K_HIGHEST_THREAD_PRIO)
 #define K_LOWEST_APPLICATION_THREAD_PRIO (K_LOWEST_THREAD_PRIO - 1)
 
+#ifdef CONFIG_WAITQ_FAST
+
+typedef struct {
+	struct _priq_rb waitq;
+} _wait_q_t;
+
+extern int _priq_rb_lessthan(struct rbnode *a, struct rbnode *b);
+
+#define _WAIT_Q_INIT(wait_q) { { { .lessthan_fn = _priq_rb_lessthan } } }
+
+#else
+
 typedef struct {
 	sys_dlist_t waitq;
 } _wait_q_t;
 
 #define _WAIT_Q_INIT(wait_q) { SYS_DLIST_STATIC_INIT(&(wait_q)->waitq) }
 
+#endif
+
 #ifdef CONFIG_OBJECT_TRACING
 #define _OBJECT_TRACING_NEXT_PTR(type) struct type *__next
 #define _OBJECT_TRACING_INIT .__next = NULL,
@@ -405,7 +421,17 @@ struct __thread_entry {
 struct _thread_base {
 
 	/* this thread's entry in a ready/wait queue */
-	sys_dnode_t k_q_node;
+	union {
+		sys_dlist_t qnode_dlist;
+		struct rbnode qnode_rb;
+	};
+
+#ifdef CONFIG_WAITQ_FAST
+	/* wait queue on which the thread is pended (needed only for
+	 * trees, not dumb lists)
+	 */
+	_wait_q_t *pended_on;
+#endif
 
 	/* user facing 'thread options'; values defined in include/kernel.h */
 	u8_t user_options;
@@ -440,13 +466,12 @@ struct _thread_base {
 		u16_t preempt;
 	};
 
+	u32_t order_key;
+
 #ifdef CONFIG_SMP
 	/* True for the per-CPU idle threads */
 	u8_t is_idle;
 
-	/* Non-zero when actively running on a CPU */
-	u8_t active;
-
 	/* CPU index on which thread was last run */
 	u8_t cpu;
 
diff --git a/include/sched_priq.h b/include/sched_priq.h
new file mode 100644
index 00000000000..e83b5f685ad
--- /dev/null
+++ b/include/sched_priq.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef _sched_priq__h_
+#define _sched_priq__h_
+
+#include <misc/util.h>
+#include <misc/dlist.h>
+#include <misc/rb.h>
+
+/* Two abstractions are defined here for "thread priority queues".
+ *
+ * One is a "dumb" list implementation appropriate for systems with
+ * small numbers of threads and sensitive to code size.  It is stored
+ * in sorted order, taking an O(N) cost every time a thread is added
+ * to the list.  This corresponds to the way the original _wait_q_t
+ * abstraction worked and is very fast as long as the number of
+ * threads is small.
+ *
+ * The other is a balanced tree "fast" implementation with rather
+ * larger code size (due to the data structure itself, the code here
+ * is just stubs) and higher constant-factor performance overhead, but
+ * much better O(logN) scaling in the presence of large number of
+ * threads.
+ *
+ * Each can be used for either the wait_q or system ready queue,
+ * configurable at build time.
+ */
+
+struct k_thread;
+
+struct k_thread *_priq_dumb_best(sys_dlist_t *pq);
+void _priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread);
+void _priq_dumb_add(sys_dlist_t *pq, struct k_thread *thread);
+
+struct _priq_rb {
+	struct rbtree tree;
+	int next_order_key;
+};
+
+void _priq_rb_add(struct _priq_rb *pq, struct k_thread *thread);
+void _priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread);
+struct k_thread *_priq_rb_best(struct _priq_rb *pq);
+
+#endif /* _sched_priq__h_ */
diff --git a/kernel/Kconfig b/kernel/Kconfig
index 62e9652bf3d..90ce807aca3 100644
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@@ -169,6 +169,34 @@ config APPLICATION_MEMORY
 	  will have the project-level application objects and any libraries
 	  including the C library in it.
 
+config WAITQ_FAST
+	bool
+	prompt "Use scalable wait_q implementation"
+	default n
+	help
+	  When selected, the wait_q abstraction used in IPC primitives
+	  to pend threads for wakeup later will be implemented with a
+	  balanced tree instead of a linear list.  Choose this if you
+	  expect to have many threads waiting on individual
+	  primitives, or if you have already included the red/black
+	  tree code in the build for some other purpose (in which case
+	  this results in less code size increase than the default
+	  implementation).
+
+config SCHED_DUMB
+	bool
+	prompt "Use a simple linked list scheduler"
+	default y
+	help
+	  When selected, the scheduler ready queue will be implemented
+	  as a simple unordered list, with very fast constant time
+	  performance for single threads and very low code size.
+	  Choose this on systems with constrained code size that will
+	  never see more than a small number (3, maybe) of runnable
+	  threads in the queue at any given time.  On most platforms
+	  (that are not otherwise using the red/black tree) this
+	  results in a savings of ~2k of code size.
+
 menu "Kernel Debugging and Metrics"
 
 config INIT_STACKS
diff --git a/kernel/include/kernel_structs.h b/kernel/include/kernel_structs.h
index 083615e1075..5c3cca1e825 100644
--- a/kernel/include/kernel_structs.h
+++ b/kernel/include/kernel_structs.h
@@ -12,6 +12,7 @@
 #if !defined(_ASMLANGUAGE)
 #include <atomic.h>
 #include <misc/dlist.h>
+#include <misc/rb.h>
 #include <string.h>
 #endif
 
@@ -27,7 +28,6 @@
  * defined.
  */
 
-
 /* states: common uses low bits, arch-specific use high bits */
 
 /* Not a real thread */
@@ -48,6 +48,9 @@
 /* Thread is actively looking at events to see if they are ready */
 #define _THREAD_POLLING (1 << 5)
 
+/* Thread is present in the ready queue */
+#define _THREAD_QUEUED (1 << 6)
+
 /* end - states */
 
 #ifdef CONFIG_STACK_SENTINEL
@@ -69,13 +72,13 @@ struct _ready_q {
 #ifndef CONFIG_SMP
 	/* always contains next thread to run: cannot be NULL */
 	struct k_thread *cache;
-
-	/* bitmap of priorities that contain at least one ready thread */
-	u32_t prio_bmap[K_NUM_PRIO_BITMAPS];
 #endif
 
-	/* ready queues, one per priority */
-	sys_dlist_t q[K_NUM_PRIORITIES];
+#ifdef CONFIG_SCHED_DUMB
+	sys_dlist_t runq;
+#else
+	struct _priq_rb runq;
+#endif
 };
 
 typedef struct _ready_q _ready_q_t;
@@ -90,6 +93,9 @@ struct _cpu {
 	/* currently scheduled thread */
 	struct k_thread *current;
 
+	/* one assigned idle thread per CPU */
+	struct k_thread *idle_thread;
+
 	int id;
 };
 
@@ -158,8 +164,10 @@ typedef struct _kernel _kernel_t;
 extern struct _kernel _kernel;
 
 #ifdef CONFIG_SMP
+#define _current_cpu (_arch_curr_cpu())
 #define _current (_arch_curr_cpu()->current)
 #else
+#define _current_cpu (&_kernel.cpus[0])
 #define _current _kernel.current
 #endif
 
diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h
index 5d296649746..65d280450f2 100644
--- a/kernel/include/ksched.h
+++ b/kernel/include/ksched.h
@@ -33,6 +33,7 @@
 #define _ASSERT_VALID_PRIO(prio, entry_point) __ASSERT((prio) == -1, "")
 #endif
 
+void _sched_init(void);
 void _add_thread_to_ready_q(struct k_thread *thread);
 void _move_thread_to_end_of_prio_q(struct k_thread *thread);
 void _remove_thread_from_ready_q(struct k_thread *thread);
@@ -111,6 +112,11 @@ static inline int _is_thread_polling(struct k_thread *thread)
 	return _is_thread_state_set(thread, _THREAD_POLLING);
 }
 
+static inline int _is_thread_queued(struct k_thread *thread)
+{
+	return _is_thread_state_set(thread, _THREAD_QUEUED);
+}
+
 static inline void _mark_thread_as_suspended(struct k_thread *thread)
 {
 	thread->base.thread_state |= _THREAD_SUSPENDED;
@@ -126,6 +132,11 @@ static inline void _mark_thread_as_started(struct k_thread *thread)
 	thread->base.thread_state &= ~_THREAD_PRESTART;
 }
 
+static inline void _mark_thread_as_pending(struct k_thread *thread)
+{
+	thread->base.thread_state |= _THREAD_PENDING;
+}
+
 static inline void _mark_thread_as_not_pending(struct k_thread *thread)
 {
 	thread->base.thread_state &= ~_THREAD_PENDING;
@@ -152,6 +163,16 @@ static inline void _mark_thread_as_not_polling(struct k_thread *thread)
 	_reset_thread_states(thread, _THREAD_POLLING);
 }
 
+static inline void _mark_thread_as_queued(struct k_thread *thread)
+{
+	_set_thread_states(thread, _THREAD_QUEUED);
+}
+
+static inline void _mark_thread_as_not_queued(struct k_thread *thread)
+{
+	_reset_thread_states(thread, _THREAD_QUEUED);
+}
+
 static inline int _is_under_prio_ceiling(int prio)
 {
 	return prio >= CONFIG_PRIORITY_CEILING;
diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h
index 67bf94b4b2a..7a0f7d4c991 100644
--- a/kernel/include/kswap.h
+++ b/kernel/include/kswap.h
@@ -58,12 +58,10 @@ static inline unsigned int _Swap(unsigned int key)
 	new_thread = _get_next_ready_thread();
 
 	if (new_thread != old_thread) {
+
 		old_thread->swap_retval = -EAGAIN;
 
 #ifdef CONFIG_SMP
-		old_thread->base.active = 0;
-		new_thread->base.active = 1;
-
 		new_thread->base.cpu = _arch_curr_cpu()->id;
 
 		_smp_release_global_lock(new_thread);
diff --git a/kernel/include/wait_q.h b/kernel/include/wait_q.h
index e59bc9f26d2..e8c53850756 100644
--- a/kernel/include/wait_q.h
+++ b/kernel/include/wait_q.h
@@ -11,7 +11,9 @@
 
 #include <kernel_structs.h>
 #include <misc/dlist.h>
+#include <misc/rb.h>
 #include <ksched.h>
+#include <sched_priq.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -42,8 +44,30 @@ static ALWAYS_INLINE int _abort_thread_timeout(struct k_thread *thread)
 #define _get_next_timeout_expiry() (K_FOREVER)
 #endif
 
+#ifdef CONFIG_WAITQ_FAST
+
 #define _WAIT_Q_FOR_EACH(wq, thread_ptr) \
-	SYS_DLIST_FOR_EACH_CONTAINER(&((wq)->waitq), thread_ptr, base.k_q_node)
+	RB_FOR_EACH_CONTAINER(&(wq)->waitq.tree, thread_ptr, base.qnode_rb)
+
+static inline void _waitq_init(_wait_q_t *w)
+{
+	w->waitq = (struct _priq_rb) {
+		.tree = {
+			.lessthan_fn = _priq_rb_lessthan
+		}
+	};
+}
+
+static inline struct k_thread *_waitq_head(_wait_q_t *w)
+{
+	return (void *)rb_get_min(&w->waitq.tree);
+}
+
+#else /* !CONFIG_WAITQ_FAST: */
+
+#define _WAIT_Q_FOR_EACH(wq, thread_ptr) \
+	SYS_DLIST_FOR_EACH_CONTAINER(&((wq)->waitq), thread_ptr, \
+				     base.qnode_dlist)
 
 static inline void _waitq_init(_wait_q_t *w)
 {
@@ -55,6 +79,8 @@ static inline struct k_thread *_waitq_head(_wait_q_t *w)
 	return (void *)sys_dlist_peek_head(&w->waitq);
 }
 
+#endif /* !CONFIG_WAIT_Q_FAST */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/kernel/init.c b/kernel/init.c
index 5f2bdb53df2..954fc10b63b 100644
--- a/kernel/init.c
+++ b/kernel/init.c
@@ -273,7 +273,6 @@ static void init_idle_thread(struct k_thread *thr, k_thread_stack_t *stack)
 			  IDLE_STACK_SIZE, idle, NULL, NULL, NULL,
 			  K_LOWEST_THREAD_PRIO, K_ESSENTIAL);
 	_mark_thread_as_started(thr);
-	_ready_thread(thr);
 }
 #endif
 
@@ -327,11 +326,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
 
 	_IntLibInit();
 
-	/* ready the init/main and idle threads */
-
-	for (int ii = 0; ii < K_NUM_PRIORITIES; ii++) {
-		sys_dlist_init(&_ready_q.q[ii]);
-	}
+	_sched_init();
 
 #ifndef CONFIG_SMP
 	/*
@@ -355,10 +350,12 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
 
 #ifdef CONFIG_MULTITHREADING
 	init_idle_thread(_idle_thread, _idle_stack);
+	_kernel.cpus[0].idle_thread = _idle_thread;
 #endif
 
 #if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 1
 	init_idle_thread(_idle_thread1, _idle_stack1);
+	_kernel.cpus[1].idle_thread = _idle_thread1;
 	_kernel.cpus[1].id = 1;
 	_kernel.cpus[1].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack1)
 		+ CONFIG_ISR_STACK_SIZE;
@@ -366,6 +363,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
 
 #if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 2
 	init_idle_thread(_idle_thread2, _idle_stack2);
+	_kernel.cpus[2].idle_thread = _idle_thread2;
 	_kernel.cpus[2].id = 2;
 	_kernel.cpus[2].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack2)
 		+ CONFIG_ISR_STACK_SIZE;
@@ -373,6 +371,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
 
 #if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 3
 	init_idle_thread(_idle_thread3, _idle_stack3);
+	_kernel.cpus[3].idle_thread = _idle_thread3;
 	_kernel.cpus[3].id = 3;
 	_kernel.cpus[3].irq_stack = K_THREAD_STACK_BUFFER(_interrupt_stack3)
 		+ CONFIG_ISR_STACK_SIZE;
diff --git a/kernel/pipes.c b/kernel/pipes.c
index 9b3c7d433b1..61622cbad9b 100644
--- a/kernel/pipes.c
+++ b/kernel/pipes.c
@@ -316,14 +316,12 @@ static bool pipe_xfer_prepare(sys_dlist_t      *xfer_list,
 			       size_t            min_xfer,
 			       s32_t           timeout)
 {
-	sys_dnode_t      *node;
 	struct k_thread  *thread;
 	struct k_pipe_desc *desc;
 	size_t num_bytes = 0;
 
 	if (timeout == K_NO_WAIT) {
-		SYS_DLIST_FOR_EACH_NODE(&wait_q->waitq, node) {
-			thread = (struct k_thread *)node;
+		_WAIT_Q_FOR_EACH(wait_q, thread) {
 			desc = (struct k_pipe_desc *)thread->base.swap_data;
 
 			num_bytes += desc->bytes_to_xfer;
@@ -367,7 +365,7 @@ static bool pipe_xfer_prepare(sys_dlist_t      *xfer_list,
 		 * Add it to the transfer list.
 		 */
 		_unpend_thread(thread);
-		sys_dlist_append(xfer_list, &thread->base.k_q_node);
+		sys_dlist_append(xfer_list, &thread->base.qnode_dlist);
 	}
 
 	*waiter = (num_bytes > bytes_to_xfer) ? thread : NULL;
diff --git a/kernel/sched.c b/kernel/sched.c
index 9bb2a8a0486..7003ea07ac5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1,29 +1,46 @@
 /*
- * Copyright (c) 2016-2017 Wind River Systems, Inc.
+ * Copyright (c) 2018 Intel Corporation
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-
 #include <kernel.h>
-#include <kernel_structs.h>
-#include <atomic.h>
 #include <ksched.h>
+#include <spinlock.h>
+#include <sched_priq.h>
 #include <wait_q.h>
-#include <misc/util.h>
-#include <syscall_handler.h>
 #include <kswap.h>
+#include <kernel_arch_func.h>
+#include <syscall_handler.h>
 
-/* the only struct _kernel instance */
-struct _kernel _kernel = {0};
-
-#ifndef CONFIG_SMP
-extern k_tid_t const _idle_thread;
+#ifdef CONFIG_SCHED_DUMB
+#define _priq_run_add		_priq_dumb_add
+#define _priq_run_remove	_priq_dumb_remove
+#define _priq_run_best		_priq_dumb_best
+#else
+#define _priq_run_add		_priq_rb_add
+#define _priq_run_remove	_priq_rb_remove
+#define _priq_run_best		_priq_rb_best
 #endif
 
-static inline int _is_thread_dummy(struct k_thread *thread)
-{
-	return _is_thread_state_set(thread, _THREAD_DUMMY);
-}
+#ifdef CONFIG_WAITQ_FAST
+#define _priq_wait_add		_priq_rb_add
+#define _priq_wait_remove	_priq_rb_remove
+#define _priq_wait_best		_priq_rb_best
+#else
+#define _priq_wait_add		_priq_dumb_add
+#define _priq_wait_remove	_priq_dumb_remove
+#define _priq_wait_best		_priq_dumb_best
+#endif
+
+/* the only struct _kernel instance */
+struct _kernel _kernel;
+
+static struct k_spinlock sched_lock;
+
+#define LOCKED(lck) for (k_spinlock_key_t __i = {},			\
+					  __key = k_spin_lock(lck);	\
+			!__i.key;					\
+			k_spin_unlock(lck, __key), __i.key = 1)
 
 static inline int _is_preempt(struct k_thread *thread)
 {
@@ -35,206 +52,252 @@ static inline int _is_preempt(struct k_thread *thread)
 #endif
 }
 
-static inline void _mark_thread_as_pending(struct k_thread *thread)
+static inline int _is_thread_dummy(struct k_thread *thread)
 {
-	thread->base.thread_state |= _THREAD_PENDING;
+	return !!(thread->base.thread_state & _THREAD_DUMMY);
+}
+
+static inline int _is_idle(struct k_thread *thread)
+{
+#ifdef CONFIG_SMP
+	return thread->base.is_idle;
+#else
+	extern struct k_thread * const _idle_thread;
+
+	return thread == _idle_thread;
+#endif
+}
+
+#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
+s32_t _ms_to_ticks(s32_t ms)
+{
+	s64_t ms_ticks_per_sec = (s64_t)ms * sys_clock_ticks_per_sec;
+
+	return (s32_t)ceiling_fraction(ms_ticks_per_sec, MSEC_PER_SEC);
+}
+#endif
+
+static struct k_thread *next_up(void)
+{
+#ifndef CONFIG_SMP
+	/* In uniprocessor mode, we can leave the current thread in
+	 * the queue (actually we have to, otherwise the assembly
+	 * context switch code for all architectures would be
+	 * responsible for putting it back in _Swap and ISR return!),
+	 * which makes this choice simple.
+	 */
+	struct k_thread *th = _priq_run_best(&_kernel.ready_q.runq);
+
+	return th ? th : _current_cpu->idle_thread;
+#else
+	/* Under SMP, the "cache" mechanism for selecting the next
+	 * thread doesn't work, so we have more work to do to test
+	 * _current against the best choice from the queue.
+	 */
+	int active = !_is_thread_prevented_from_running(_current);
+	int queued = _is_thread_queued(_current);
+
+	struct k_thread *th = _priq_run_best(&_kernel.ready_q.runq);
+
+	/* Idle thread if nothing else */
+	if (!th) {
+		th = _current_cpu->idle_thread;
+	}
+
+	/* Stay with current unless it's already been put back in the
+	 * queue and something better is available (c.f. timeslicing,
+	 * yield)
+	 */
+	if (active && !queued && !_is_t1_higher_prio_than_t2(th, _current)) {
+		th = _current;
+	}
+
+	/* Put _current back into the queue if necessary */
+	if (th != _current && !queued) {
+		_priq_run_add(&_kernel.ready_q.runq, _current);
+	}
+
+	/* Remove the thread we're about to run from the queue (which
+	 * potentially might not be there, but that's OK)
+	 */
+	_priq_run_remove(&_kernel.ready_q.runq, th);
+
+	return th;
+#endif
+}
+
+static void update_cache(void)
+{
+#ifndef CONFIG_SMP
+	_kernel.ready_q.cache = next_up();
+#endif
+}
+
+void _add_thread_to_ready_q(struct k_thread *thread)
+{
+	LOCKED(&sched_lock) {
+		_priq_run_add(&_kernel.ready_q.runq, thread);
+		_mark_thread_as_queued(thread);
+		update_cache();
+	}
+}
+
+void _move_thread_to_end_of_prio_q(struct k_thread *thread)
+{
+	LOCKED(&sched_lock) {
+		_priq_run_remove(&_kernel.ready_q.runq, thread);
+		_priq_run_add(&_kernel.ready_q.runq, thread);
+		_mark_thread_as_queued(thread);
+		update_cache();
+	}
+}
+
+void _remove_thread_from_ready_q(struct k_thread *thread)
+{
+	LOCKED(&sched_lock) {
+		if (_is_thread_queued(thread)) {
+			_priq_run_remove(&_kernel.ready_q.runq, thread);
+			_mark_thread_as_not_queued(thread);
+			update_cache();
+		}
+	}
+}
+
+static void pend(struct k_thread *thread, _wait_q_t *wait_q, s32_t timeout)
+{
+	_remove_thread_from_ready_q(thread);
+	_mark_thread_as_pending(thread);
+
+	/* The timeout handling is currently synchronized external to
+	 * the scheduler using the legacy global lock.  Should fix
+	 * that.
+	 */
+	if (timeout != K_FOREVER) {
+		s32_t ticks = _TICK_ALIGN + _ms_to_ticks(timeout);
+		int key = irq_lock();
+
+		_add_thread_timeout(thread, wait_q, ticks);
+		irq_unlock(key);
+	}
+
+	if (wait_q) {
+#ifdef CONFIG_WAITQ_FAST
+		thread->base.pended_on = wait_q;
+#endif
+		_priq_wait_add(&wait_q->waitq, thread);
+	}
 
 #ifdef CONFIG_KERNEL_EVENT_LOGGER_THREAD
 	_sys_k_event_logger_thread_pend(thread);
 #endif
 }
 
-static inline int _is_idle_thread_ptr(k_tid_t thread)
+void _pend_thread(struct k_thread *thread, _wait_q_t *wait_q, s32_t timeout)
 {
-#ifdef CONFIG_SMP
-	return thread->base.is_idle;
+	__ASSERT_NO_MSG(thread == _current || _is_thread_dummy(thread));
+	pend(thread, wait_q, timeout);
+}
+
+static _wait_q_t *pended_on(struct k_thread *thread)
+{
+#ifdef CONFIG_WAITQ_FAST
+	__ASSERT_NO_MSG(thread->base.pended_on);
+
+	return thread->base.pended_on;
 #else
-	return thread == _idle_thread;
+	ARG_UNUSED(thread);
+	return NULL;
 #endif
 }
 
-static inline int _get_ready_q_q_index(int prio)
+struct k_thread *_find_first_thread_to_unpend(_wait_q_t *wait_q,
+					      struct k_thread *from)
 {
-	return prio + _NUM_COOP_PRIO;
+	ARG_UNUSED(from);
+
+	struct k_thread *ret = NULL;
+
+	LOCKED(&sched_lock) {
+		ret = _priq_wait_best(&wait_q->waitq);
+	}
+
+	return ret;
 }
 
-static inline int _get_ready_q_prio_bmap_index(int prio)
+void _unpend_thread_no_timeout(struct k_thread *thread)
 {
-	return (prio + _NUM_COOP_PRIO) >> 5;
+	LOCKED(&sched_lock) {
+		_priq_wait_remove(&pended_on(thread)->waitq, thread);
+		_mark_thread_as_not_pending(thread);
+	}
+
+#if defined(CONFIG_ASSERT) && defined(CONFIG_WAITQ_FAST)
+	thread->base.pended_on = NULL;
+#endif
 }
 
-static inline int _get_ready_q_prio_bit(int prio)
+int _pend_current_thread(int key, _wait_q_t *wait_q, s32_t timeout)
 {
-	return (1u << ((prio + _NUM_COOP_PRIO) & 0x1f));
+	pend(_current, wait_q, timeout);
+	return _Swap(key);
 }
 
-#ifdef CONFIG_SMP
-int _get_highest_ready_prio(void);
-#else
-static inline int _get_highest_ready_prio(void)
+struct k_thread *_unpend_first_thread(_wait_q_t *wait_q)
 {
-	int bitmap = 0;
-	u32_t ready_range;
+	struct k_thread *t = _unpend1_no_timeout(wait_q);
 
-#if (K_NUM_PRIORITIES <= 32)
-	ready_range = _ready_q.prio_bmap[0];
-#else
-	for (;; bitmap++) {
+	if (t) {
+		_abort_thread_timeout(t);
+	}
 
-		__ASSERT(bitmap < K_NUM_PRIO_BITMAPS, "prio out-of-range\n");
+	return t;
+}
 
-		if (_ready_q.prio_bmap[bitmap]) {
-			ready_range = _ready_q.prio_bmap[bitmap];
-			break;
+void _unpend_thread(struct k_thread *thread)
+{
+	_unpend_thread_no_timeout(thread);
+	_abort_thread_timeout(thread);
+}
+
+/* FIXME: this API is glitchy when used in SMP.  If the thread is
+ * currently scheduled on the other CPU, it will silently set it's
+ * priority but nothing will cause a reschedule until the next
+ * interrupt.  An audit seems to show that all current usage is to set
+ * priorities on either _current or a pended thread, though, so it's
+ * fine for now.
+ */
+void _thread_priority_set(struct k_thread *thread, int prio)
+{
+	int need_sched = 0;
+
+	LOCKED(&sched_lock) {
+		need_sched = _is_thread_ready(thread);
+
+		if (need_sched) {
+			_priq_run_remove(&_kernel.ready_q.runq, thread);
+			thread->base.prio = prio;
+			_priq_run_add(&_kernel.ready_q.runq, thread);
+			update_cache();
+		} else {
+			thread->base.prio = prio;
 		}
 	}
-#endif
 
-	int abs_prio = (find_lsb_set(ready_range) - 1) + (bitmap << 5);
-
-	__ASSERT(abs_prio < K_NUM_PRIORITIES, "prio out-of-range\n");
-
-	return abs_prio - _NUM_COOP_PRIO;
-}
-#endif
-
-/* set the bit corresponding to prio in ready q bitmap */
-#if defined(CONFIG_MULTITHREADING) && !defined(CONFIG_SMP)
-static void set_ready_q_prio_bit(int prio)
-{
-	int bmap_index = _get_ready_q_prio_bmap_index(prio);
-	u32_t *bmap = &_ready_q.prio_bmap[bmap_index];
-
-	*bmap |= _get_ready_q_prio_bit(prio);
-}
-
-/* clear the bit corresponding to prio in ready q bitmap */
-static void clear_ready_q_prio_bit(int prio)
-{
-	int bmap_index = _get_ready_q_prio_bmap_index(prio);
-	u32_t *bmap = &_ready_q.prio_bmap[bmap_index];
-
-	*bmap &= ~_get_ready_q_prio_bit(prio);
-}
-#endif
-
-#if !defined(CONFIG_SMP) && defined(CONFIG_MULTITHREADING)
-/*
- * Find the next thread to run when there is no thread in the cache and update
- * the cache.
- */
-static struct k_thread *get_ready_q_head(void)
-{
-	int prio = _get_highest_ready_prio();
-	int q_index = _get_ready_q_q_index(prio);
-	sys_dlist_t *list = &_ready_q.q[q_index];
-
-	__ASSERT(!sys_dlist_is_empty(list),
-		 "no thread to run (prio: %d, queue index: %u)!\n",
-		 prio, q_index);
-
-	struct k_thread *thread =
-		(struct k_thread *)sys_dlist_peek_head_not_empty(list);
-
-	return thread;
-}
-#endif
-
-/*
- * Add thread to the ready queue, in the slot for its priority; the thread
- * must not be on a wait queue.
- *
- * This function, along with _move_thread_to_end_of_prio_q(), are the _only_
- * places where a thread is put on the ready queue.
- *
- * Interrupts must be locked when calling this function.
- */
-
-void _add_thread_to_ready_q(struct k_thread *thread)
-{
-	__ASSERT(_is_prio_higher(thread->base.prio, K_LOWEST_THREAD_PRIO) ||
-		 ((thread->base.prio == K_LOWEST_THREAD_PRIO) &&
-		  (thread == _idle_thread)),
-		 "thread %p prio too low (is %d, cannot be lower than %d)",
-		 thread, thread->base.prio,
-		 thread == _idle_thread ? K_LOWEST_THREAD_PRIO :
-					  K_LOWEST_APPLICATION_THREAD_PRIO);
-
-	__ASSERT(!_is_prio_higher(thread->base.prio, K_HIGHEST_THREAD_PRIO),
-		 "thread %p prio too high (id %d, cannot be higher than %d)",
-		 thread, thread->base.prio, K_HIGHEST_THREAD_PRIO);
-
-#ifdef CONFIG_MULTITHREADING
-	int q_index = _get_ready_q_q_index(thread->base.prio);
-	sys_dlist_t *q = &_ready_q.q[q_index];
-
-# ifndef CONFIG_SMP
-	set_ready_q_prio_bit(thread->base.prio);
-# endif
-	sys_dlist_append(q, &thread->base.k_q_node);
-
-# ifndef CONFIG_SMP
-	struct k_thread **cache = &_ready_q.cache;
-
-	*cache = _is_t1_higher_prio_than_t2(thread, *cache) ? thread : *cache;
-# endif
-#else
-	sys_dlist_append(&_ready_q.q[0], &thread->base.k_q_node);
-	_ready_q.prio_bmap[0] = 1;
-# ifndef CONFIG_SMP
-	_ready_q.cache = thread;
-# endif
-#endif
-}
-
-/*
- * This function, along with _move_thread_to_end_of_prio_q(), are the _only_
- * places where a thread is taken off the ready queue.
- *
- * Interrupts must be locked when calling this function.
- */
-
-void _remove_thread_from_ready_q(struct k_thread *thread)
-{
-#if defined(CONFIG_MULTITHREADING) && !defined(CONFIG_SMP)
-	int q_index = _get_ready_q_q_index(thread->base.prio);
-	sys_dlist_t *q = &_ready_q.q[q_index];
-
-	sys_dlist_remove(&thread->base.k_q_node);
-	if (sys_dlist_is_empty(q)) {
-		clear_ready_q_prio_bit(thread->base.prio);
+	if (need_sched) {
+		_reschedule(irq_lock());
 	}
-
-	struct k_thread **cache = &_ready_q.cache;
-
-	*cache = *cache == thread ? get_ready_q_head() : *cache;
-#else
-# if !defined(CONFIG_SMP)
-	_ready_q.prio_bmap[0] = 0;
-	_ready_q.cache = NULL;
-# endif
-	sys_dlist_remove(&thread->base.k_q_node);
-#endif
 }
 
-/* Releases the irq_lock and swaps to a higher priority thread if one
- * is available, returning the _Swap() return value, otherwise zero.
- * Does not swap away from a thread at a cooperative (unpreemptible)
- * priority unless "yield" is true.
- */
 int _reschedule(int key)
 {
-	K_DEBUG("rescheduling threads\n");
-
 	if (!_is_in_isr() &&
 	    _is_preempt(_current) &&
-	    _is_prio_higher(_get_highest_ready_prio(), _current->base.prio)) {
-		K_DEBUG("context-switching out %p\n", _current);
+	    _get_next_ready_thread() != _current) {
 		return _Swap(key);
-	} else {
-		irq_unlock(key);
-		return 0;
 	}
+
+	irq_unlock(key);
+	return 0;
 }
 
 void k_sched_lock(void)
@@ -261,82 +324,197 @@ void k_sched_unlock(void)
 #endif
 }
 
-/* convert milliseconds to ticks */
-
-#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
-s32_t _ms_to_ticks(s32_t ms)
+#ifdef CONFIG_SMP
+struct k_thread *_get_next_ready_thread(void)
 {
-	s64_t ms_ticks_per_sec = (s64_t)ms * sys_clock_ticks_per_sec;
+	struct k_thread *ret = 0;
 
-	return (s32_t)ceiling_fraction(ms_ticks_per_sec, MSEC_PER_SEC);
+	LOCKED(&sched_lock) {
+		ret = next_up();
+	}
+
+	return ret;
 }
 #endif
 
-/* Pend the specified thread: it must *not* be in the ready queue.  It
- * must be either _current or a DUMMY thread (i.e. this is NOT an API
- * for pending another thread that might be running!).  It must be
- * called with interrupts locked
- */
-void _pend_thread(struct k_thread *thread, _wait_q_t *wait_q, s32_t timeout)
+#ifdef CONFIG_USE_SWITCH
+void *_get_next_switch_handle(void *interrupted)
 {
-	__ASSERT(thread == _current || _is_thread_dummy(thread),
-		 "Can only pend _current or DUMMY");
-
-#ifdef CONFIG_MULTITHREADING
-	sys_dlist_t *wait_q_list = (sys_dlist_t *)wait_q;
-	struct k_thread *pending;
-
-	if (!wait_q_list) {
-		goto inserted;
+	if (!_is_preempt(_current) &&
+	    !(_current->base.thread_state & _THREAD_DEAD)) {
+		return interrupted;
 	}
 
-	SYS_DLIST_FOR_EACH_CONTAINER(wait_q_list, pending, base.k_q_node) {
-		if (_is_t1_higher_prio_than_t2(thread, pending)) {
-			sys_dlist_insert_before(wait_q_list,
-						&pending->base.k_q_node,
-						&thread->base.k_q_node);
-			goto inserted;
+	_current->switch_handle = interrupted;
+
+	LOCKED(&sched_lock) {
+		struct k_thread *next = next_up();
+
+		if (next != _current) {
+			_current = next;
 		}
 	}
 
-	sys_dlist_append(wait_q_list, &thread->base.k_q_node);
+	_check_stack_sentinel();
 
-inserted:
-	_mark_thread_as_pending(thread);
-
-	if (timeout != K_FOREVER) {
-		s32_t ticks = _TICK_ALIGN + _ms_to_ticks(timeout);
-
-		_add_thread_timeout(thread, wait_q, ticks);
-	}
+	return _current->switch_handle;
+}
 #endif
-}
 
-void _unpend_thread_no_timeout(struct k_thread *thread)
+void _priq_dumb_add(sys_dlist_t *pq, struct k_thread *thread)
 {
-	__ASSERT(thread->base.thread_state & _THREAD_PENDING, "");
+	struct k_thread *t;
 
-	sys_dlist_remove(&thread->base.k_q_node);
-	_mark_thread_as_not_pending(thread);
-}
+	__ASSERT_NO_MSG(!_is_idle(thread));
 
-void _unpend_thread(struct k_thread *thread)
-{
-	_unpend_thread_no_timeout(thread);
-	_abort_thread_timeout(thread);
-}
-
-struct k_thread *_unpend_first_thread(_wait_q_t *wait_q)
-{
-	struct k_thread *t = _unpend1_no_timeout(wait_q);
-
-	if (t) {
-		_abort_thread_timeout(t);
+	SYS_DLIST_FOR_EACH_CONTAINER(pq, t, base.qnode_dlist) {
+		if (_is_t1_higher_prio_than_t2(thread, t)) {
+			sys_dlist_insert_before(pq, &t->base.qnode_dlist,
+						&thread->base.qnode_dlist);
+			return;
+		}
 	}
 
-	return t;
+	sys_dlist_append(pq, &thread->base.qnode_dlist);
 }
 
+void _priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread)
+{
+	__ASSERT_NO_MSG(!_is_idle(thread));
+
+	sys_dlist_remove(&thread->base.qnode_dlist);
+}
+
+struct k_thread *_priq_dumb_best(sys_dlist_t *pq)
+{
+	return CONTAINER_OF(sys_dlist_peek_head(pq),
+			    struct k_thread, base.qnode_dlist);
+}
+
+int _priq_rb_lessthan(struct rbnode *a, struct rbnode *b)
+{
+	struct k_thread *ta, *tb;
+
+	ta = CONTAINER_OF(a, struct k_thread, base.qnode_rb);
+	tb = CONTAINER_OF(b, struct k_thread, base.qnode_rb);
+
+	if (_is_t1_higher_prio_than_t2(ta, tb)) {
+		return 1;
+	} else if (_is_t1_higher_prio_than_t2(tb, ta)) {
+		return 0;
+	} else {
+		return ta->base.order_key < tb->base.order_key ? 1 : 0;
+	}
+}
+
+void _priq_rb_add(struct _priq_rb *pq, struct k_thread *thread)
+{
+	struct k_thread *t;
+
+	__ASSERT_NO_MSG(!_is_idle(thread));
+
+	thread->base.order_key = pq->next_order_key++;
+
+	/* Renumber at wraparound.  This is tiny code, and in practice
+	 * will almost never be hit on real systems.  BUT on very
+	 * long-running systems where a priq never completely empties
+	 * AND that contains very large numbers of threads, it can be
+	 * a latency glitch to loop over all the threads like this.
+	 */
+	if (!pq->next_order_key) {
+		RB_FOR_EACH_CONTAINER(&pq->tree, t, base.qnode_rb) {
+			t->base.order_key = pq->next_order_key++;
+		}
+	}
+
+	rb_insert(&pq->tree, &thread->base.qnode_rb);
+}
+
+void _priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread)
+{
+	__ASSERT_NO_MSG(!_is_idle(thread));
+
+	rb_remove(&pq->tree, &thread->base.qnode_rb);
+
+	if (!pq->tree.root) {
+		pq->next_order_key = 0;
+	}
+}
+
+struct k_thread *_priq_rb_best(struct _priq_rb *pq)
+{
+	struct rbnode *n = rb_get_min(&pq->tree);
+
+	return CONTAINER_OF(n, struct k_thread, base.qnode_rb);
+}
+
+#ifdef CONFIG_TIMESLICING
+extern s32_t _time_slice_duration;    /* Measured in ms */
+extern s32_t _time_slice_elapsed;     /* Measured in ms */
+extern int _time_slice_prio_ceiling;
+
+void k_sched_time_slice_set(s32_t duration_in_ms, int prio)
+{
+	__ASSERT(duration_in_ms >= 0, "");
+	__ASSERT((prio >= 0) && (prio < CONFIG_NUM_PREEMPT_PRIORITIES), "");
+
+	_time_slice_duration = duration_in_ms;
+	_time_slice_elapsed = 0;
+	_time_slice_prio_ceiling = prio;
+}
+
+int _is_thread_time_slicing(struct k_thread *thread)
+{
+	int ret = 0;
+
+	/* Should fix API.  Doesn't make sense for non-running threads
+	 * to call this
+	 */
+	__ASSERT_NO_MSG(thread == _current);
+
+	if (_time_slice_duration <= 0 || !_is_preempt(thread) ||
+	    _is_prio_higher(thread->base.prio, _time_slice_prio_ceiling)) {
+		return 0;
+	}
+
+
+	LOCKED(&sched_lock) {
+		struct k_thread *next = _priq_run_best(&_kernel.ready_q.runq);
+
+		if (next) {
+			ret = thread->base.prio == next->base.prio;
+		}
+	}
+
+	return ret;
+}
+
+/* Must be called with interrupts locked */
+/* Should be called only immediately before a thread switch */
+void _update_time_slice_before_swap(void)
+{
+#ifdef CONFIG_TICKLESS_KERNEL
+	if (!_is_thread_time_slicing(_get_next_ready_thread())) {
+		return;
+	}
+
+	u32_t remaining = _get_remaining_program_time();
+
+	if (!remaining || (_time_slice_duration < remaining)) {
+		_set_time(_time_slice_duration);
+	} else {
+		/* Account previous elapsed time and reprogram
+		 * timer with remaining time
+		 */
+		_set_time(remaining);
+	}
+
+#endif
+	/* Restart time slice count at new thread switch */
+	_time_slice_elapsed = 0;
+}
+#endif /* CONFIG_TIMESLICING */
+
 int _unpend_all(_wait_q_t *waitq)
 {
 	int need_sched = 0;
@@ -351,16 +529,17 @@ int _unpend_all(_wait_q_t *waitq)
 	return need_sched;
 }
 
-
-/* Block the current thread and swap to the next.  Releases the
- * irq_lock, does a _Swap and returns the return value set at wakeup
- * time
- */
-int _pend_current_thread(int key, _wait_q_t *wait_q, s32_t timeout)
+void _sched_init(void)
 {
-	_remove_thread_from_ready_q(_current);
-	_pend_thread(_current, wait_q, timeout);
-	return _Swap(key);
+#ifdef CONFIG_SCHED_DUMB
+	sys_dlist_init(&_kernel.ready_q.runq);
+#else
+	_kernel.ready_q.runq = (struct _priq_rb) {
+		.tree = {
+			.lessthan_fn = _priq_rb_lessthan,
+		}
+	};
+#endif
 }
 
 int _impl_k_thread_priority_get(k_tid_t thread)
@@ -383,10 +562,8 @@ void _impl_k_thread_priority_set(k_tid_t tid, int prio)
 	__ASSERT(!_is_in_isr(), "");
 
 	struct k_thread *thread = (struct k_thread *)tid;
-	int key = irq_lock();
 
 	_thread_priority_set(thread, prio);
-	_reschedule(key);
 }
 
 #ifdef CONFIG_USERSPACE
@@ -406,49 +583,16 @@ Z_SYSCALL_HANDLER(k_thread_priority_set, thread_p, prio)
 }
 #endif
 
-/*
- * Interrupts must be locked when calling this function.
- *
- * This function, along with _add_thread_to_ready_q() and
- * _remove_thread_from_ready_q(), are the _only_ places where a thread is
- * taken off or put on the ready queue.
- */
-void _move_thread_to_end_of_prio_q(struct k_thread *thread)
-{
-#ifdef CONFIG_MULTITHREADING
-	int q_index = _get_ready_q_q_index(thread->base.prio);
-	sys_dlist_t *q = &_ready_q.q[q_index];
-
-	if (sys_dlist_is_tail(q, &thread->base.k_q_node)) {
-		return;
-	}
-
-	sys_dlist_remove(&thread->base.k_q_node);
-	sys_dlist_append(q, &thread->base.k_q_node);
-
-# ifndef CONFIG_SMP
-	struct k_thread **cache = &_ready_q.cache;
-
-	*cache = *cache == thread ? get_ready_q_head() : *cache;
-# endif
-#endif
-}
-
 void _impl_k_yield(void)
 {
 	__ASSERT(!_is_in_isr(), "");
 
-	int key = irq_lock();
+	if (!_is_idle(_current)) {
+		_move_thread_to_end_of_prio_q(_current);
+	}
 
-	_move_thread_to_end_of_prio_q(_current);
-
-	if (_current == _get_next_ready_thread()) {
-		irq_unlock(key);
-#ifdef CONFIG_STACK_SENTINEL
-		_check_stack_sentinel();
-#endif
-	} else {
-		_Swap(key);
+	if (_get_next_ready_thread() != _current) {
+		_Swap(irq_lock());
 	}
 }
 
@@ -537,69 +681,6 @@ k_tid_t _impl_k_current_get(void)
 Z_SYSCALL_HANDLER0_SIMPLE(k_current_get);
 #endif
 
-#ifdef CONFIG_TIMESLICING
-extern s32_t _time_slice_duration;    /* Measured in ms */
-extern s32_t _time_slice_elapsed;     /* Measured in ms */
-extern int _time_slice_prio_ceiling;
-
-void k_sched_time_slice_set(s32_t duration_in_ms, int prio)
-{
-	__ASSERT(duration_in_ms >= 0, "");
-	__ASSERT((prio >= 0) && (prio < CONFIG_NUM_PREEMPT_PRIORITIES), "");
-
-	_time_slice_duration = duration_in_ms;
-	_time_slice_elapsed = 0;
-	_time_slice_prio_ceiling = prio;
-}
-
-int _is_thread_time_slicing(struct k_thread *thread)
-{
-	/*
-	 * Time slicing is done on the thread if following conditions are met
-	 *
-	 * Time slice duration should be set > 0
-	 * Should not be the idle thread
-	 * Priority should be higher than time slice priority ceiling
-	 * There should be multiple threads active with same priority
-	 */
-
-	if (!(_time_slice_duration > 0) || (_is_idle_thread_ptr(thread))
-	    || _is_prio_higher(thread->base.prio, _time_slice_prio_ceiling)) {
-		return 0;
-	}
-
-	int q_index = _get_ready_q_q_index(thread->base.prio);
-	sys_dlist_t *q = &_ready_q.q[q_index];
-
-	return sys_dlist_has_multiple_nodes(q);
-}
-
-/* Must be called with interrupts locked */
-/* Should be called only immediately before a thread switch */
-void _update_time_slice_before_swap(void)
-{
-#ifdef CONFIG_TICKLESS_KERNEL
-	if (!_is_thread_time_slicing(_get_next_ready_thread())) {
-		return;
-	}
-
-	u32_t remaining = _get_remaining_program_time();
-
-	if (!remaining || (_time_slice_duration < remaining)) {
-		_set_time(_time_slice_duration);
-	} else {
-		/* Account previous elapsed time and reprogram
-		 * timer with remaining time
-		 */
-		_set_time(remaining);
-	}
-
-#endif
-	/* Restart time slice count at new thread switch */
-	_time_slice_elapsed = 0;
-}
-#endif /* CONFIG_TIMESLICING */
-
 int _impl_k_is_preempt_thread(void)
 {
 	return !_is_in_isr() && _is_preempt(_current);
@@ -608,118 +689,3 @@ int _impl_k_is_preempt_thread(void)
 #ifdef CONFIG_USERSPACE
 Z_SYSCALL_HANDLER0_SIMPLE(k_is_preempt_thread);
 #endif
-
-#ifdef CONFIG_SMP
-int _get_highest_ready_prio(void)
-{
-	int p;
-
-	for (p = 0; p < ARRAY_SIZE(_kernel.ready_q.q); p++) {
-		if (!sys_dlist_is_empty(&_kernel.ready_q.q[p])) {
-			break;
-		}
-	}
-
-	__ASSERT(p < K_NUM_PRIORITIES, "No ready prio");
-
-	return p - _NUM_COOP_PRIO;
-}
-
-struct k_thread *_get_next_ready_thread(void)
-{
-	int p, mycpu = _arch_curr_cpu()->id;
-
-	for (p = 0; p < ARRAY_SIZE(_ready_q.q); p++) {
-		sys_dlist_t *list = &_ready_q.q[p];
-		sys_dnode_t *node;
-
-		for (node = list->tail; node != list; node = node->prev) {
-			struct k_thread *th = (struct k_thread *)node;
-
-			/* Skip threads that are already running elsewhere! */
-			if (th->base.active && th->base.cpu != mycpu) {
-				continue;
-			}
-
-			return th;
-		}
-	}
-
-	__ASSERT(0, "No ready thread found for cpu %d\n", mycpu);
-	return NULL;
-}
-#endif
-
-#ifdef CONFIG_USE_SWITCH
-void *_get_next_switch_handle(void *interrupted)
-{
-	if (!_is_preempt(_current) &&
-	    !(_current->base.thread_state & _THREAD_DEAD)) {
-		return interrupted;
-	}
-
-	int key = irq_lock();
-	struct k_thread *new_thread = _get_next_ready_thread();
-
-#ifdef CONFIG_SMP
-	_current->base.active = 0;
-	new_thread->base.active = 1;
-#endif
-
-	irq_unlock(key);
-
-	_current->switch_handle = interrupted;
-	_current = new_thread;
-
-	void *ret = new_thread->switch_handle;
-
-#ifdef CONFIG_SMP
-	_smp_reacquire_global_lock(_current);
-#endif
-
-	_check_stack_sentinel();
-
-	return ret;
-}
-#endif
-
-void _thread_priority_set(struct k_thread *thread, int prio)
-{
-	if (_is_thread_ready(thread)) {
-		_remove_thread_from_ready_q(thread);
-		thread->base.prio = prio;
-		_add_thread_to_ready_q(thread);
-	} else {
-		thread->base.prio = prio;
-	}
-}
-
-struct k_thread *_find_first_thread_to_unpend(_wait_q_t *wait_q,
-					      struct k_thread *from)
-{
-#ifdef CONFIG_SYS_CLOCK_EXISTS
-	extern volatile int _handling_timeouts;
-
-	if (_handling_timeouts) {
-		sys_dlist_t *q = (sys_dlist_t *)wait_q;
-		sys_dnode_t *cur = from ? &from->base.k_q_node : NULL;
-
-		/* skip threads that have an expired timeout */
-		SYS_DLIST_ITERATE_FROM_NODE(q, cur) {
-			struct k_thread *thread = (struct k_thread *)cur;
-
-			if (_is_thread_timeout_expired(thread)) {
-				continue;
-			}
-
-			return thread;
-		}
-		return NULL;
-	}
-#else
-	ARG_UNUSED(from);
-#endif
-
-	return _waitq_head(wait_q);
-
-}