From 40d12c142d967f162713c096e0584e56a675f360 Mon Sep 17 00:00:00 2001 From: Andy Ross Date: Mon, 27 Sep 2021 08:22:43 -0700 Subject: [PATCH] kernel/sched: Add "thread_usage" API for thread runtime cycle monitoring This is an alternate backend that does what THREAD_RUNTIME_STATS is doing currently, but with a few advantages: * Correctly synchronized: you can't race against a running thread (potentially on another CPU!) while querying its usage. * Realtime results: you get the right answer always, up to timer precision, even if a thread has been running for a while uninterrupted and hasn't updated its total. * Portable, no need for per-architecture code at all for the simple case. (It leverages the USE_SWITCH layer to do this, so won't work on older architectures) * Faster/smaller: minimizes use of 64 bit math; lower overhead in thread struct (keeps the scratch "started" time in the CPU struct instead). One 64 bit counter per thread and a 32 bit scratch register in the CPU struct. * Standalone. It's a core (but optional) scheduler feature, no dependence on para-kernel configuration like the tracing infrastructure. * More precise: allows architectures to optionally call a trivial zero-argument/no-result cdecl function out of interrupt entry to avoid accounting for ISR runtime in thread totals. No configuration needed here, if it's called then you get proper ISR accounting, and if not you don't. For right now, pending unification, it's added side-by-side with the older API and left as a z_*() internal symbol. Signed-off-by: Andy Ross --- include/kernel/thread.h | 4 +++ include/kernel_structs.h | 4 +++ kernel/Kconfig | 6 +++++ kernel/include/ksched.h | 31 ++++++++++++++++++++++ kernel/include/kswap.h | 1 + kernel/sched.c | 55 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 101 insertions(+) diff --git a/include/kernel/thread.h b/include/kernel/thread.h index ee6dc8f50ec..ae0930b70e0 100644 --- a/include/kernel/thread.h +++ b/include/kernel/thread.h @@ -116,6 +116,10 @@ struct _thread_base { /* this thread's entry in a timeout queue */ struct _timeout timeout; #endif + +#ifdef CONFIG_SCHED_THREAD_USAGE + uint64_t usage; +#endif }; typedef struct _thread_base _thread_base_t; diff --git a/include/kernel_structs.h b/include/kernel_structs.h index 5199f82b92b..05d2ec309d5 100644 --- a/include/kernel_structs.h +++ b/include/kernel_structs.h @@ -130,6 +130,10 @@ struct _cpu { uint8_t swap_ok; #endif +#ifdef CONFIG_SCHED_THREAD_USAGE + uint32_t usage0; +#endif + /* Per CPU architecture specifics */ struct _cpu_arch arch; }; diff --git a/kernel/Kconfig b/kernel/Kconfig index 26a078e873e..7c09d4392f3 100644 --- a/kernel/Kconfig +++ b/kernel/Kconfig @@ -377,6 +377,12 @@ config THREAD_MAX_NAME_LEN config INSTRUMENT_THREAD_SWITCHING bool +config SCHED_THREAD_USAGE + bool "Collect thread runtime usage" + depends on USE_SWITCH + help + Alternate implementation of thread runtime cycle usage + menuconfig THREAD_RUNTIME_STATS bool "Thread runtime statistics" select INSTRUMENT_THREAD_SWITCHING diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h index 7d39825c52a..f29e6c41669 100644 --- a/kernel/include/ksched.h +++ b/kernel/include/ksched.h @@ -363,4 +363,35 @@ static inline bool z_sched_wake_all(_wait_q_t *wait_q, int swap_retval, int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key, _wait_q_t *wait_q, k_timeout_t timeout, void **data); + +/** @brief Halt thread cycle usage accounting. + * + * Halts the accumulation of thread cycle usage and adds the current + * total to the thread's counter. Called on context switch. + * + * Note that this function is idempotent. The core kernel code calls + * it at the end of interrupt handlers (because that is where we have + * a portable hook) where we are context switching, which will include + * any cycles spent in the ISR in the per-thread accounting. But + * architecture code can also call it earlier out of interrupt entry + * to improve measurement fidelity. + * + * This function assumes local interrupts are masked (so that the + * current CPU pointer and current thread are safe to modify), but + * requires no other synchronizaton. Architecture layers don't need + * to do anything more. + */ +void z_sched_usage_stop(void); + +void z_sched_usage_start(struct k_thread *thread); + +static inline void z_sched_usage_switch(struct k_thread *thread) +{ + ARG_UNUSED(thread); +#ifdef CONFIG_SCHED_THREAD_USAGE + z_sched_usage_stop(); + z_sched_usage_start(thread); +#endif +} + #endif /* ZEPHYR_KERNEL_INCLUDE_KSCHED_H_ */ diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h index 917315d5899..f43f1941177 100644 --- a/kernel/include/kswap.h +++ b/kernel/include/kswap.h @@ -109,6 +109,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key, #ifdef CONFIG_TIMESLICING z_reset_time_slice(); #endif + z_sched_usage_switch(new_thread); #ifdef CONFIG_SMP _current_cpu->swap_ok = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 5eeb192268d..32fb2341ac7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -937,6 +937,8 @@ void *z_get_next_switch_handle(void *interrupted) } new_thread = next_up(); + z_sched_usage_switch(new_thread); + if (old_thread != new_thread) { update_metairq_preempt(new_thread); wait_for_switch(new_thread); @@ -976,6 +978,7 @@ void *z_get_next_switch_handle(void *interrupted) } return ret; #else + z_sched_usage_switch(_kernel.ready_q.cache); _current->switch_handle = interrupted; set_current(_kernel.ready_q.cache); return _current->switch_handle; @@ -1731,3 +1734,55 @@ int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key, } return ret; } + +#ifdef CONFIG_SCHED_THREAD_USAGE + +static struct k_spinlock usage_lock; + +static uint32_t usage_now(void) +{ + uint32_t now = k_cycle_get_32(); + + /* Edge case: we use a zero as a null ("stop() already called") */ + return (now == 0) ? 1 : now; +} + +void z_sched_usage_start(struct k_thread *thread) +{ + /* One write through a volatile pointer doesn't require + * synchronization as long as _usage() treats it as volatile + * (we can't race with _stop() by design). + */ + _current_cpu->usage0 = usage_now(); +} + +void z_sched_usage_stop(void) +{ + k_spinlock_key_t k = k_spin_lock(&usage_lock); + uint32_t u0 = _current_cpu->usage0; + + if (u0 != 0) { + _current->base.usage += usage_now() - u0; + } + + _current_cpu->usage0 = 0; + k_spin_unlock(&usage_lock, k); +} + +uint64_t z_sched_thread_usage(struct k_thread *thread) +{ + k_spinlock_key_t k = k_spin_lock(&usage_lock); + uint32_t u0 = _current_cpu->usage0, now = usage_now(); + uint64_t ret = thread->base.usage; + + if (u0 != 0) { + ret += now - u0; + thread->base.usage = ret; + _current_cpu->usage0 = now; + } + + k_spin_unlock(&usage_lock, k); + return ret; +} + +#endif /* CONFIG_SCHED_THREAD_USAGE */