From 3e696896bfd350eebe1ee73c5286a8d0e63b5918 Mon Sep 17 00:00:00 2001
From: Andy Ross <andrew.j.ross@intel.com>
Date: Tue, 30 Nov 2021 18:26:26 -0800
Subject: [PATCH] kernel: Add "per thread" timeslice mechanism

Zephyr's timeslice implementation has always been somewhat primitive.
You get a global timeslice that applies broadly to the whole bottom of
the priority space, with no ability (beyond that one priority
threshold) to tune it to work on certain threads, etc...

This adds an (optionally configurable) API that allows timeslicing to
be controlled on a per-thread basis: any thread at any priority can be
set to timeslice, for a configurable per-thread slice time, and at the
end of its slice a callback can be provided that can take action.
This allows the application to implement things like responsiveness
heuristics, "fair" scheduling algorithms, etc... without requiring
that facility in the core kernel.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
---
 include/kernel.h         | 41 ++++++++++++++++++
 include/kernel/thread.h  |  6 +++
 include/kernel_structs.h |  2 +
 kernel/Kconfig           |  8 ++++
 kernel/include/ksched.h  |  2 +-
 kernel/include/kswap.h   |  7 ++--
 kernel/sched.c           | 91 ++++++++++++++++++++++++++++++++--------
 kernel/thread.c          |  5 +++
 8 files changed, 140 insertions(+), 22 deletions(-)

diff --git a/include/kernel.h b/include/kernel.h
index 30de64c7791..d6da2eb23ba 100644
--- a/include/kernel.h
+++ b/include/kernel.h
@@ -864,6 +864,47 @@ __syscall void k_thread_resume(k_tid_t thread);
  */
 extern void k_sched_time_slice_set(int32_t slice, int prio);
 
+/**
+ * @brief Set thread time slice
+ *
+ * As for k_sched_time_slice_set, but (when
+ * CONFIG_TIMESLICE_PER_THREAD=y) sets the timeslice for a specific
+ * thread.  When non-zero, this timeslice will take precedence over
+ * the global value.
+ *
+ * When such a thread's timeslice expires, the configured callback
+ * will be called before the thread is removed/re-added to the run
+ * queue.  This callback will occur in interrupt context, and the
+ * specified thread is guaranteed to have been preempted by the
+ * currently-executing ISR.  Such a callback is free to, for example,
+ * modify the thread priority or slice time for future execution,
+ * suspend the thread, etc...
+ *
+ * @note Unlike the older API, the time slice parameter here is
+ * specified in ticks, not milliseconds.  Ticks have always been the
+ * internal unit, and not all platforms have integer conversions
+ * between the two.
+ *
+ * @note Threads with a non-zero slice time set will be timesliced
+ * always, even if they are higher priority than the maximum timeslice
+ * priority set via k_sched_time_slice_set().
+ *
+ * @note The callback notification for slice expiration happens, as it
+ * must, while the thread is still "current", and thus it happens
+ * before any registered timeouts at this tick.  This has the somewhat
+ * confusing side effect that the tick time (c.f. k_uptime_get()) does
+ * not yet reflect the expired ticks.  Applications wishing to make
+ * fine-grained timing decisions within this callback should use the
+ * cycle API, or derived facilities like k_thread_runtime_stats_get().
+ *
+ * @param th A valid, initialized thread
+ * @param slice_ticks Maximum timeslice, in ticks
+ * @param expired Callback function called on slice expiration
+ * @param data Parameter for the expiration handler
+ */
+void k_thread_time_slice_set(struct k_thread *th, int32_t slice_ticks,
+			     k_thread_timeslice_fn_t expired, void *data);
+
 /** @} */
 
 /**
diff --git a/include/kernel/thread.h b/include/kernel/thread.h
index 66ed151a7ba..bd8c97d1906 100644
--- a/include/kernel/thread.h
+++ b/include/kernel/thread.h
@@ -117,6 +117,12 @@ struct _thread_base {
 	struct _timeout timeout;
 #endif
 
+#ifdef CONFIG_TIMESLICE_PER_THREAD
+	int32_t slice_ticks;
+	k_thread_timeslice_fn_t slice_expired;
+	void *slice_data;
+#endif
+
 #ifdef CONFIG_SCHED_THREAD_USAGE
 	struct k_cycle_stats  usage;   /* Track thread usage statistics */
 #endif
diff --git a/include/kernel_structs.h b/include/kernel_structs.h
index f88b46f43b4..c8460719636 100644
--- a/include/kernel_structs.h
+++ b/include/kernel_structs.h
@@ -243,6 +243,8 @@ struct _timeout {
 #endif
 };
 
+typedef void (*k_thread_timeslice_fn_t)(struct k_thread *thread, void *data);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/kernel/Kconfig b/kernel/Kconfig
index 887ac30c07b..3035b3ec6b1 100644
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@@ -500,6 +500,14 @@ config TIMESLICE_PRIORITY
 	  takes effect; threads having a higher priority than this ceiling are
 	  not subject to time slicing.
 
+config TIMESLICE_PER_THREAD
+	bool "Support per-thread timeslice values"
+	depends on TIMESLICING
+	help
+	  When set, this enables an API for setting timeslice values on
+	  a per-thread basis, with an application callback invoked when
+	  a thread reaches the end of its timeslice.
+
 config POLL
 	bool "Async I/O Framework"
 	help
diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h
index 0ad72901561..530ddfd1d46 100644
--- a/kernel/include/ksched.h
+++ b/kernel/include/ksched.h
@@ -56,7 +56,7 @@ bool z_set_prio(struct k_thread *thread, int prio);
 void *z_get_next_switch_handle(void *interrupted);
 void idle(void *unused1, void *unused2, void *unused3);
 void z_time_slice(int ticks);
-void z_reset_time_slice(void);
+void z_reset_time_slice(struct k_thread *curr);
 void z_sched_abort(struct k_thread *thread);
 void z_sched_ipi(void);
 void z_sched_start(struct k_thread *thread);
diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h
index 702f3fe5401..9f8b2a2533b 100644
--- a/kernel/include/kswap.h
+++ b/kernel/include/kswap.h
@@ -106,9 +106,6 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 	new_thread = z_swap_next_thread();
 
 	if (new_thread != old_thread) {
-#ifdef CONFIG_TIMESLICING
-		z_reset_time_slice();
-#endif
 		z_sched_usage_switch(new_thread);
 
 #ifdef CONFIG_SMP
@@ -123,6 +120,10 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 		wait_for_switch(new_thread);
 		_current_cpu->current = new_thread;
 
+#ifdef CONFIG_TIMESLICING
+		z_reset_time_slice(new_thread);
+#endif
+
 #ifdef CONFIG_SPIN_VALIDATE
 		z_spin_lock_set_owner(&sched_spinlock);
 #endif
diff --git a/kernel/sched.c b/kernel/sched.c
index adeffd7ef5c..b94ae6b349c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -381,9 +381,21 @@ static void move_thread_to_end_of_prio_q(struct k_thread *thread)
 
 #ifdef CONFIG_TIMESLICING
 
-static int slice_time;
+static int slice_ticks;
 static int slice_max_prio;
 
+static inline int slice_time(struct k_thread *curr)
+{
+	int ret = slice_ticks;
+
+#ifdef CONFIG_TIMESLICE_PER_THREAD
+	if (curr->base.slice_ticks != 0) {
+		ret = curr->base.slice_ticks;
+	}
+#endif
+	return ret;
+}
+
 #ifdef CONFIG_SWAP_NONATOMIC
 /* If z_swap() isn't atomic, then it's possible for a timer interrupt
  * to try to timeslice away _current after it has already pended
@@ -393,15 +405,15 @@ static int slice_max_prio;
 static struct k_thread *pending_current;
 #endif
 
-void z_reset_time_slice(void)
+void z_reset_time_slice(struct k_thread *curr)
 {
 	/* Add the elapsed time since the last announced tick to the
 	 * slice count, as we'll see those "expired" ticks arrive in a
 	 * FUTURE z_time_slice() call.
 	 */
-	if (slice_time != 0) {
-		_current_cpu->slice_ticks = slice_time + sys_clock_elapsed();
-		z_set_timeout_expiry(slice_time, false);
+	if (slice_time(curr) != 0) {
+		_current_cpu->slice_ticks = slice_time(curr) + sys_clock_elapsed();
+		z_set_timeout_expiry(slice_time(curr), false);
 	}
 }
 
@@ -409,24 +421,61 @@ void k_sched_time_slice_set(int32_t slice, int prio)
 {
 	LOCKED(&sched_spinlock) {
 		_current_cpu->slice_ticks = 0;
-		slice_time = k_ms_to_ticks_ceil32(slice);
+		slice_ticks = k_ms_to_ticks_ceil32(slice);
 		if (IS_ENABLED(CONFIG_TICKLESS_KERNEL) && slice > 0) {
 			/* It's not possible to reliably set a 1-tick
 			 * timeout if ticks aren't regular.
 			 */
-			slice_time = MAX(2, slice_time);
+			slice_ticks = MAX(2, slice_ticks);
 		}
 		slice_max_prio = prio;
-		z_reset_time_slice();
+		z_reset_time_slice(_current);
 	}
 }
 
-static inline int sliceable(struct k_thread *thread)
+#ifdef CONFIG_TIMESLICE_PER_THREAD
+void k_thread_time_slice_set(struct k_thread *th, int32_t slice_ticks,
+			     k_thread_timeslice_fn_t expired, void *data)
 {
-	return is_preempt(thread)
+	LOCKED(&sched_spinlock) {
+		th->base.slice_ticks = slice_ticks;
+		th->base.slice_expired = expired;
+		th->base.slice_data = data;
+	}
+}
+#endif
+
+static inline bool sliceable(struct k_thread *thread)
+{
+	bool ret = is_preempt(thread)
 		&& !z_is_thread_prevented_from_running(thread)
 		&& !z_is_prio_higher(thread->base.prio, slice_max_prio)
 		&& !z_is_idle_thread_object(thread);
+
+#ifdef CONFIG_TIMESLICE_PER_THREAD
+	ret |= thread->base.slice_ticks != 0;
+#endif
+
+	return ret;
+}
+
+static k_spinlock_key_t slice_expired_locked(k_spinlock_key_t sched_lock_key)
+{
+	struct k_thread *curr = _current;
+
+#ifdef CONFIG_TIMESLICE_PER_THREAD
+	if (curr->base.slice_expired) {
+		k_spin_unlock(&sched_spinlock, sched_lock_key);
+		curr->base.slice_expired(curr, curr->base.slice_data);
+		sched_lock_key = k_spin_lock(&sched_spinlock);
+	}
+#endif
+	if (!z_is_thread_prevented_from_running(curr)) {
+		move_thread_to_end_of_prio_q(curr);
+	}
+	z_reset_time_slice(curr);
+
+	return sched_lock_key;
 }
 
 /* Called out of each timer interrupt */
@@ -443,17 +492,22 @@ void z_time_slice(int ticks)
 
 #ifdef CONFIG_SWAP_NONATOMIC
 	if (pending_current == _current) {
-		z_reset_time_slice();
+		z_reset_time_slice(_current);
 		k_spin_unlock(&sched_spinlock, key);
 		return;
 	}
 	pending_current = NULL;
 #endif
 
-	if (slice_time && sliceable(_current)) {
+	if (slice_time(_current) && sliceable(_current)) {
 		if (ticks >= _current_cpu->slice_ticks) {
-			move_thread_to_end_of_prio_q(_current);
-			z_reset_time_slice();
+			/* Note: this will (if so enabled) internally
+			 * drop and reacquire the scheduler lock
+			 * around the callback!  Don't put anything
+			 * after this line that requires
+			 * synchronization.
+			 */
+			key = slice_expired_locked(key);
 		} else {
 			_current_cpu->slice_ticks -= ticks;
 		}
@@ -490,7 +544,7 @@ static void update_cache(int preempt_ok)
 	if (should_preempt(thread, preempt_ok)) {
 #ifdef CONFIG_TIMESLICING
 		if (thread != _current) {
-			z_reset_time_slice();
+			z_reset_time_slice(thread);
 		}
 #endif
 		update_metairq_preempt(thread);
@@ -952,12 +1006,13 @@ void *z_get_next_switch_handle(void *interrupted)
 			wait_for_switch(new_thread);
 			arch_cohere_stacks(old_thread, interrupted, new_thread);
 
-#ifdef CONFIG_TIMESLICING
-			z_reset_time_slice();
-#endif
 			_current_cpu->swap_ok = 0;
 			set_current(new_thread);
 
+#ifdef CONFIG_TIMESLICING
+			z_reset_time_slice(new_thread);
+#endif
+
 #ifdef CONFIG_SPIN_VALIDATE
 			/* Changed _current!  Update the spinlock
 			 * bookkeeping so the validation doesn't get
diff --git a/kernel/thread.c b/kernel/thread.c
index 9109b60f143..b84ce56a83b 100644
--- a/kernel/thread.c
+++ b/kernel/thread.c
@@ -779,6 +779,11 @@ void z_init_thread_base(struct _thread_base *thread_base, int priority,
 	thread_base->is_idle = 0;
 #endif
 
+#ifdef CONFIG_TIMESLICE_PER_THREAD
+	thread_base->slice_ticks = 0;
+	thread_base->slice_expired = NULL;
+#endif
+
 	/* swap_data does not need to be initialized */
 
 	z_init_thread_timeout(thread_base);