From 40d12c142d967f162713c096e0584e56a675f360 Mon Sep 17 00:00:00 2001
From: Andy Ross <andrew.j.ross@intel.com>
Date: Mon, 27 Sep 2021 08:22:43 -0700
Subject: [PATCH] kernel/sched: Add "thread_usage" API for thread runtime cycle
 monitoring

This is an alternate backend that does what THREAD_RUNTIME_STATS is
doing currently, but with a few advantages:

* Correctly synchronized: you can't race against a running thread
  (potentially on another CPU!) while querying its usage.

* Realtime results: you get the right answer always, up to timer
  precision, even if a thread has been running for a while
  uninterrupted and hasn't updated its total.

* Portable, no need for per-architecture code at all for the simple
  case. (It leverages the USE_SWITCH layer to do this, so won't work
  on older architectures)

* Faster/smaller: minimizes use of 64 bit math; lower overhead in
  thread struct (keeps the scratch "started" time in the CPU struct
  instead).  One 64 bit counter per thread and a 32 bit scratch
  register in the CPU struct.

* Standalone.  It's a core (but optional) scheduler feature, no
  dependence on para-kernel configuration like the tracing
  infrastructure.

* More precise: allows architectures to optionally call a trivial
  zero-argument/no-result cdecl function out of interrupt entry to
  avoid accounting for ISR runtime in thread totals.  No configuration
  needed here, if it's called then you get proper ISR accounting, and
  if not you don't.

For right now, pending unification, it's added side-by-side with the
older API and left as a z_*() internal symbol.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
---
 include/kernel/thread.h  |  4 +++
 include/kernel_structs.h |  4 +++
 kernel/Kconfig           |  6 +++++
 kernel/include/ksched.h  | 31 ++++++++++++++++++++++
 kernel/include/kswap.h   |  1 +
 kernel/sched.c           | 55 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 101 insertions(+)

diff --git a/include/kernel/thread.h b/include/kernel/thread.h
index ee6dc8f50ec..ae0930b70e0 100644
--- a/include/kernel/thread.h
+++ b/include/kernel/thread.h
@@ -116,6 +116,10 @@ struct _thread_base {
 	/* this thread's entry in a timeout queue */
 	struct _timeout timeout;
 #endif
+
+#ifdef CONFIG_SCHED_THREAD_USAGE
+	uint64_t usage;
+#endif
 };
 
 typedef struct _thread_base _thread_base_t;
diff --git a/include/kernel_structs.h b/include/kernel_structs.h
index 5199f82b92b..05d2ec309d5 100644
--- a/include/kernel_structs.h
+++ b/include/kernel_structs.h
@@ -130,6 +130,10 @@ struct _cpu {
 	uint8_t swap_ok;
 #endif
 
+#ifdef CONFIG_SCHED_THREAD_USAGE
+	uint32_t usage0;
+#endif
+
 	/* Per CPU architecture specifics */
 	struct _cpu_arch arch;
 };
diff --git a/kernel/Kconfig b/kernel/Kconfig
index 26a078e873e..7c09d4392f3 100644
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@@ -377,6 +377,12 @@ config THREAD_MAX_NAME_LEN
 config INSTRUMENT_THREAD_SWITCHING
 	bool
 
+config SCHED_THREAD_USAGE
+	bool "Collect thread runtime usage"
+	depends on USE_SWITCH
+	help
+	  Alternate implementation of thread runtime cycle usage
+
 menuconfig THREAD_RUNTIME_STATS
 	bool "Thread runtime statistics"
 	select INSTRUMENT_THREAD_SWITCHING
diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h
index 7d39825c52a..f29e6c41669 100644
--- a/kernel/include/ksched.h
+++ b/kernel/include/ksched.h
@@ -363,4 +363,35 @@ static inline bool z_sched_wake_all(_wait_q_t *wait_q, int swap_retval,
 int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key,
 		 _wait_q_t *wait_q, k_timeout_t timeout, void **data);
 
+
+/** @brief Halt thread cycle usage accounting.
+ *
+ * Halts the accumulation of thread cycle usage and adds the current
+ * total to the thread's counter.  Called on context switch.
+ *
+ * Note that this function is idempotent.  The core kernel code calls
+ * it at the end of interrupt handlers (because that is where we have
+ * a portable hook) where we are context switching, which will include
+ * any cycles spent in the ISR in the per-thread accounting.  But
+ * architecture code can also call it earlier out of interrupt entry
+ * to improve measurement fidelity.
+ *
+ * This function assumes local interrupts are masked (so that the
+ * current CPU pointer and current thread are safe to modify), but
+ * requires no other synchronizaton.  Architecture layers don't need
+ * to do anything more.
+ */
+void z_sched_usage_stop(void);
+
+void z_sched_usage_start(struct k_thread *thread);
+
+static inline void z_sched_usage_switch(struct k_thread *thread)
+{
+	ARG_UNUSED(thread);
+#ifdef CONFIG_SCHED_THREAD_USAGE
+	z_sched_usage_stop();
+	z_sched_usage_start(thread);
+#endif
+}
+
 #endif /* ZEPHYR_KERNEL_INCLUDE_KSCHED_H_ */
diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h
index 917315d5899..f43f1941177 100644
--- a/kernel/include/kswap.h
+++ b/kernel/include/kswap.h
@@ -109,6 +109,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 #ifdef CONFIG_TIMESLICING
 		z_reset_time_slice();
 #endif
+		z_sched_usage_switch(new_thread);
 
 #ifdef CONFIG_SMP
 		_current_cpu->swap_ok = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 5eeb192268d..32fb2341ac7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -937,6 +937,8 @@ void *z_get_next_switch_handle(void *interrupted)
 		}
 		new_thread = next_up();
 
+		z_sched_usage_switch(new_thread);
+
 		if (old_thread != new_thread) {
 			update_metairq_preempt(new_thread);
 			wait_for_switch(new_thread);
@@ -976,6 +978,7 @@ void *z_get_next_switch_handle(void *interrupted)
 	}
 	return ret;
 #else
+	z_sched_usage_switch(_kernel.ready_q.cache);
 	_current->switch_handle = interrupted;
 	set_current(_kernel.ready_q.cache);
 	return _current->switch_handle;
@@ -1731,3 +1734,55 @@ int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key,
 	}
 	return ret;
 }
+
+#ifdef CONFIG_SCHED_THREAD_USAGE
+
+static struct k_spinlock usage_lock;
+
+static uint32_t usage_now(void)
+{
+	uint32_t now = k_cycle_get_32();
+
+	/* Edge case: we use a zero as a null ("stop() already called") */
+	return (now == 0) ? 1 : now;
+}
+
+void z_sched_usage_start(struct k_thread *thread)
+{
+	/* One write through a volatile pointer doesn't require
+	 * synchronization as long as _usage() treats it as volatile
+	 * (we can't race with _stop() by design).
+	 */
+	_current_cpu->usage0 = usage_now();
+}
+
+void z_sched_usage_stop(void)
+{
+	k_spinlock_key_t k = k_spin_lock(&usage_lock);
+	uint32_t u0 = _current_cpu->usage0;
+
+	if (u0 != 0) {
+		_current->base.usage += usage_now() - u0;
+	}
+
+	_current_cpu->usage0 = 0;
+	k_spin_unlock(&usage_lock, k);
+}
+
+uint64_t z_sched_thread_usage(struct k_thread *thread)
+{
+	k_spinlock_key_t k = k_spin_lock(&usage_lock);
+	uint32_t u0 = _current_cpu->usage0, now = usage_now();
+	uint64_t ret = thread->base.usage;
+
+	if (u0 != 0) {
+		ret += now - u0;
+		thread->base.usage = ret;
+		_current_cpu->usage0 = now;
+	}
+
+	k_spin_unlock(&usage_lock, k);
+	return ret;
+}
+
+#endif /* CONFIG_SCHED_THREAD_USAGE */