From 2724fd11cbef8498ea1c4577a15a3aa8a40ed9b2 Mon Sep 17 00:00:00 2001
From: Andy Ross <andrew.j.ross@intel.com>
Date: Mon, 29 Jan 2018 14:55:20 -0800
Subject: [PATCH] kernel: SMP-aware scheduler

The scheduler needs a few tweaks to work in SMP mode:

1. The "cache" field just doesn't work.  With more than one CPU,
   caching the highest priority thread isn't useful as you may need N
   of them at any given time before another thread is returned to the
   scheduler.  You could recalculate it at every change, but that
   provides no performance benefit.  Remove.

2. The "bitmask" designed to prevent the need to individually check
   priorities is likewise dropped.  This could work, but in fact on
   our only current SMP system and with current K_NUM_PRIOPRITIES
   values it provides no real benefit.

3. The individual threads now have a "current cpu" and "active" flag
   so that the choice of the next thread to run can correctly skip
   threads that are active on other CPUs.

The upshot is that a decent amount of code gets #if'd out, and the new
SMP implementations for _get_highest_ready_prio() and
_get_next_ready_thread() are simpler and smaller, at the expense of
having to drop older optimizations.

Note that scheduler synchronization is unchanged: all scheduler APIs
used to require that an irq_lock() be held, which means that they now
require the global spinlock via the same API.  This should be a very
early candidate for lock granularity attention!

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
---
 include/kernel.h                | 11 ++++++
 kernel/include/kernel_offsets.h |  2 ++
 kernel/include/kernel_structs.h |  5 ++-
 kernel/include/ksched.h         | 15 +++++++++
 kernel/include/kswap.h          |  7 ++++
 kernel/init.c                   |  6 ++++
 kernel/sched.c                  | 59 ++++++++++++++++++++++++++++++---
 7 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/include/kernel.h b/include/kernel.h
index 1f6bd43d0e0..43addc98841 100644
--- a/include/kernel.h
+++ b/include/kernel.h
@@ -392,6 +392,17 @@ struct _thread_base {
 		u16_t preempt;
 	};
 
+#ifdef CONFIG_SMP
+	/* True for the per-CPU idle threads */
+	u8_t is_idle;
+
+	/* Non-zero when actively running on a CPU */
+	u8_t active;
+
+	/* CPU index on which thread was last run */
+	u8_t cpu;
+#endif
+
 	/* data returned by APIs */
 	void *swap_data;
 
diff --git a/kernel/include/kernel_offsets.h b/kernel/include/kernel_offsets.h
index 9db64b1a249..bced0fc16f2 100644
--- a/kernel/include/kernel_offsets.h
+++ b/kernel/include/kernel_offsets.h
@@ -38,7 +38,9 @@ GEN_OFFSET_SYM(_kernel_t, idle);
 GEN_OFFSET_SYM(_kernel_t, ready_q);
 GEN_OFFSET_SYM(_kernel_t, arch);
 
+#ifndef CONFIG_SMP
 GEN_OFFSET_SYM(_ready_q_t, cache);
+#endif
 
 #ifdef CONFIG_FP_SHARING
 GEN_OFFSET_SYM(_kernel_t, current_fp);
diff --git a/kernel/include/kernel_structs.h b/kernel/include/kernel_structs.h
index 5716822bf93..083615e1075 100644
--- a/kernel/include/kernel_structs.h
+++ b/kernel/include/kernel_structs.h
@@ -66,12 +66,13 @@
 #if !defined(_ASMLANGUAGE)
 
 struct _ready_q {
-
+#ifndef CONFIG_SMP
 	/* always contains next thread to run: cannot be NULL */
 	struct k_thread *cache;
 
 	/* bitmap of priorities that contain at least one ready thread */
 	u32_t prio_bmap[K_NUM_PRIO_BITMAPS];
+#endif
 
 	/* ready queues, one per priority */
 	sys_dlist_t q[K_NUM_PRIORITIES];
@@ -88,6 +89,8 @@ struct _cpu {
 
 	/* currently scheduled thread */
 	struct k_thread *current;
+
+	int id;
 };
 
 typedef struct _cpu _cpu_t;
diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h
index b0f1ad770da..70574026d8a 100644
--- a/kernel/include/ksched.h
+++ b/kernel/include/ksched.h
@@ -14,7 +14,10 @@
 #endif /* CONFIG_KERNEL_EVENT_LOGGER */
 
 extern k_tid_t const _main_thread;
+
+#ifndef CONFIG_SMP
 extern k_tid_t const _idle_thread;
+#endif
 
 extern void _add_thread_to_ready_q(struct k_thread *thread);
 extern void _remove_thread_from_ready_q(struct k_thread *thread);
@@ -34,10 +37,14 @@ extern void idle(void *, void *, void *);
 
 /* find which one is the next thread to run */
 /* must be called with interrupts locked */
+#ifdef CONFIG_SMP
+extern struct k_thread *_get_next_ready_thread(void);
+#else
 static ALWAYS_INLINE struct k_thread *_get_next_ready_thread(void)
 {
 	return _ready_q.cache;
 }
+#endif
 
 static inline int _is_idle_thread(void *entry_point)
 {
@@ -46,7 +53,11 @@ static inline int _is_idle_thread(void *entry_point)
 
 static inline int _is_idle_thread_ptr(k_tid_t thread)
 {
+#ifdef CONFIG_SMP
+	return thread->base.is_idle;
+#else
 	return thread == _idle_thread;
+#endif
 }
 
 #ifdef CONFIG_MULTITHREADING
@@ -203,6 +214,9 @@ static inline int _get_ready_q_q_index(int prio)
 
 /* find out the currently highest priority where a thread is ready to run */
 /* interrupts must be locked */
+#ifdef CONFIG_SMP
+int _get_highest_ready_prio(void);
+#else
 static inline int _get_highest_ready_prio(void)
 {
 	int bitmap = 0;
@@ -228,6 +242,7 @@ static inline int _get_highest_ready_prio(void)
 
 	return abs_prio - _NUM_COOP_PRIO;
 }
+#endif
 
 /*
  * Checks if current thread must be context-switched out. The caller must
diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h
index 05baf631fc4..9d42ffb5fc4 100644
--- a/kernel/include/kswap.h
+++ b/kernel/include/kswap.h
@@ -43,6 +43,13 @@ static inline unsigned int _Swap(unsigned int key)
 
 	old_thread->swap_retval = -EAGAIN;
 
+#ifdef CONFIG_SMP
+	old_thread->base.active = 0;
+	new_thread->base.active = 1;
+
+	new_thread->base.cpu = _arch_curr_cpu()->id;
+#endif
+
 	_current = new_thread;
 	_arch_switch(new_thread->switch_handle,
 		     &old_thread->switch_handle);
diff --git a/kernel/init.c b/kernel/init.c
index 9b8c132d438..4d2f02996f5 100644
--- a/kernel/init.c
+++ b/kernel/init.c
@@ -266,6 +266,10 @@ void __weak main(void)
 #if defined(CONFIG_MULTITHREADING)
 static void init_idle_thread(struct k_thread *thr, k_thread_stack_t *stack)
 {
+#ifdef CONFIG_SMP
+	thr->base.is_idle = 1;
+#endif
+
 	_setup_new_thread(thr, stack,
 			  IDLE_STACK_SIZE, idle, NULL, NULL, NULL,
 			  K_LOWEST_THREAD_PRIO, K_ESSENTIAL);
@@ -330,6 +334,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
 		sys_dlist_init(&_ready_q.q[ii]);
 	}
 
+#ifndef CONFIG_SMP
 	/*
 	 * prime the cache with the main thread since:
 	 *
@@ -340,6 +345,7 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
 	 *   to work as intended
 	 */
 	_ready_q.cache = _main_thread;
+#endif
 
 	_setup_new_thread(_main_thread, _main_stack,
 			  MAIN_STACK_SIZE, _main, NULL, NULL, NULL,
diff --git a/kernel/sched.c b/kernel/sched.c
index c93e2d0ed84..f448a7fbc3f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -17,7 +17,7 @@
 struct _kernel _kernel = {0};
 
 /* set the bit corresponding to prio in ready q bitmap */
-#ifdef CONFIG_MULTITHREADING
+#if defined(CONFIG_MULTITHREADING) && !defined(CONFIG_SMP)
 static void _set_ready_q_prio_bit(int prio)
 {
 	int bmap_index = _get_ready_q_prio_bmap_index(prio);
@@ -25,10 +25,8 @@ static void _set_ready_q_prio_bit(int prio)
 
 	*bmap |= _get_ready_q_prio_bit(prio);
 }
-#endif
 
 /* clear the bit corresponding to prio in ready q bitmap */
-#ifdef CONFIG_MULTITHREADING
 static void _clear_ready_q_prio_bit(int prio)
 {
 	int bmap_index = _get_ready_q_prio_bmap_index(prio);
@@ -38,7 +36,7 @@ static void _clear_ready_q_prio_bit(int prio)
 }
 #endif
 
-#ifdef CONFIG_MULTITHREADING
+#if !defined(CONFIG_SMP) && defined(CONFIG_MULTITHREADING)
 /*
  * Find the next thread to run when there is no thread in the cache and update
  * the cache.
@@ -76,16 +74,22 @@ void _add_thread_to_ready_q(struct k_thread *thread)
 	int q_index = _get_ready_q_q_index(thread->base.prio);
 	sys_dlist_t *q = &_ready_q.q[q_index];
 
+# ifndef CONFIG_SMP
 	_set_ready_q_prio_bit(thread->base.prio);
+# endif
 	sys_dlist_append(q, &thread->base.k_q_node);
 
+# ifndef CONFIG_SMP
 	struct k_thread **cache = &_ready_q.cache;
 
 	*cache = _is_t1_higher_prio_than_t2(thread, *cache) ? thread : *cache;
+# endif
 #else
 	sys_dlist_append(&_ready_q.q[0], &thread->base.k_q_node);
 	_ready_q.prio_bmap[0] = 1;
+# ifndef CONFIG_SMP
 	_ready_q.cache = thread;
+# endif
 #endif
 }
 
@@ -98,7 +102,7 @@ void _add_thread_to_ready_q(struct k_thread *thread)
 
 void _remove_thread_from_ready_q(struct k_thread *thread)
 {
-#ifdef CONFIG_MULTITHREADING
+#if defined(CONFIG_MULTITHREADING) && !defined(CONFIG_SMP)
 	int q_index = _get_ready_q_q_index(thread->base.prio);
 	sys_dlist_t *q = &_ready_q.q[q_index];
 
@@ -111,8 +115,10 @@ void _remove_thread_from_ready_q(struct k_thread *thread)
 
 	*cache = *cache == thread ? _get_ready_q_head() : *cache;
 #else
+# if !defined(CONFIG_SMP)
 	_ready_q.prio_bmap[0] = 0;
 	_ready_q.cache = NULL;
+# endif
 	sys_dlist_remove(&thread->base.k_q_node);
 #endif
 }
@@ -309,9 +315,11 @@ void _move_thread_to_end_of_prio_q(struct k_thread *thread)
 	sys_dlist_remove(&thread->base.k_q_node);
 	sys_dlist_append(q, &thread->base.k_q_node);
 
+# ifndef CONFIG_SMP
 	struct k_thread **cache = &_ready_q.cache;
 
 	*cache = *cache == thread ? _get_ready_q_head() : *cache;
+# endif
 #endif
 }
 
@@ -489,3 +497,44 @@ int _impl_k_is_preempt_thread(void)
 #ifdef CONFIG_USERSPACE
 _SYSCALL_HANDLER0_SIMPLE(k_is_preempt_thread);
 #endif
+
+#ifdef CONFIG_SMP
+int _get_highest_ready_prio(void)
+{
+	int p;
+
+	for (p = 0; p < ARRAY_SIZE(_kernel.ready_q.q); p++) {
+		if (!sys_dlist_is_empty(&_kernel.ready_q.q[p])) {
+			break;
+		}
+	}
+
+	__ASSERT(p < K_NUM_PRIORITIES, "No ready prio");
+
+	return p - _NUM_COOP_PRIO;
+}
+
+struct k_thread *_get_next_ready_thread(void)
+{
+	int p, mycpu = _arch_curr_cpu()->id;
+
+	for (p = 0; p < ARRAY_SIZE(_ready_q.q); p++) {
+		sys_dlist_t *list = &_ready_q.q[p];
+		sys_dnode_t *node;
+
+		for (node = list->tail; node != list; node = node->prev) {
+			struct k_thread *th = (struct k_thread *)node;
+
+			/* Skip threads that are already running elsewhere! */
+			if (th->base.active && th->base.cpu != mycpu) {
+				continue;
+			}
+
+			return th;
+		}
+	}
+
+	__ASSERT(0, "No ready thread found for cpu %d\n", mycpu);
+	return NULL;
+}
+#endif