arch: add new interfaces to set/get the current thread of current CPU

Add the following arch-specific APIs:
- arch_curr_thread()
- arch_set_curr_thread()

which allow SMP architectures to implement a faster "get current
thread pointer" than the default provided by the kernel. The 'set'
function is required for the 'get' to work, more on that later.

When `CONFIG_ARCH_HAS_CUSTOM_CURRENT_IMPL` is selected, calls to
`_current` & `k_sched_current_thread_query()` will be redirected to
`arch_curr_thread()`, which ideally should translate into a single
instruction read, avoiding the current
"lock > read CPU > read current thread > unlock" path in SMP
architectures and thus greatly improves the read performance.

However, since the kernel relies on a copy of the "current thread"s on
every CPU for certain operations (i.e. to compare the priority of the
currently scheduled thread on another CPU to determine if IPI should be
sent), we can't eliminate the copy of "current thread" (`current`) from
the `struct _cpu` and therefore the kernel now has to invoke
`arch_set_curr_thread()` in addition to what it has been doing. This
means that it will take slightly longer (most likely one instruction
write) to change the current thread pointer on the current
CPU.

Signed-off-by: Yong Cong Sin <ycsin@meta.com>
Signed-off-by: Yong Cong Sin <yongcong.sin@gmail.com>
This commit is contained in:
Yong Cong Sin 2024-11-01 12:12:29 +08:00 committed by Anas Nashif
commit d26c712258
9 changed files with 70 additions and 22 deletions

View file

@ -1148,3 +1148,9 @@ config ARCH_HAS_CUSTOM_BUSY_WAIT
It's possible that an architecture port cannot or does not want to use
the provided k_busy_wait(), but instead must do something custom. It must
enable this option in that case.
config ARCH_HAS_CUSTOM_CURRENT_IMPL
bool
help
Select when architecture implements arch_current_thread() &
arch_current_thread_set().

View file

@ -36,6 +36,14 @@ Deprecated in this release
Architectures
*************
* Common
* Introduced :kconfig:option:`CONFIG_ARCH_HAS_CUSTOM_CURRENT_IMPL`, which can be selected when
an architecture implemented and enabled its own :c:func:`arch_current_thread` and
:c:func:`arch_current_thread_set` functions for faster retrieval of the current CPU's thread
pointer. When enabled, ``_current`` variable will be routed to the
:c:func:`arch_current_thread` (:github:`80716`).
* ARC
* ARM

View file

@ -34,4 +34,6 @@
#include <zephyr/arch/sparc/arch_inlines.h>
#endif
#include <zephyr/arch/common/arch_inlines.h>
#endif /* ZEPHYR_INCLUDE_ARCH_INLINES_H_ */

View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2024 Meta Platforms.
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_INCLUDE_ZEPHYR_ARCH_COMMON_ARCH_INLINES_H_
#define ZEPHYR_INCLUDE_ZEPHYR_ARCH_COMMON_ARCH_INLINES_H_
#ifndef ZEPHYR_INCLUDE_ARCH_INLINES_H_
#error "This header shouldn't be included directly"
#endif /* ZEPHYR_INCLUDE_ARCH_INLINES_H_ */
#ifndef _ASMLANGUAGE
#include <zephyr/kernel_structs.h>
#ifndef CONFIG_ARCH_HAS_CUSTOM_CURRENT_IMPL
static ALWAYS_INLINE struct k_thread *arch_current_thread(void)
{
#ifdef CONFIG_SMP
/* In SMP, _current is a field read from _current_cpu, which
* can race with preemption before it is read. We must lock
* local interrupts when reading it.
*/
unsigned int k = arch_irq_lock();
struct k_thread *ret = _current_cpu->current;
arch_irq_unlock(k);
#else
struct k_thread *ret = _kernel.cpus[0].current;
#endif /* CONFIG_SMP */
return ret;
}
static ALWAYS_INLINE void arch_current_thread_set(struct k_thread *thread)
{
_current_cpu->current = thread;
}
#endif /* CONFIG_ARCH_HAS_CUSTOM_CURRENT_IMPL */
#endif /* _ASMLANGUAGE */
#endif /* ZEPHYR_INCLUDE_ZEPHYR_ARCH_COMMON_ARCH_INLINES_H_ */

View file

@ -260,7 +260,7 @@ bool z_smp_cpu_mobile(void);
#define _current_cpu ({ __ASSERT_NO_MSG(!z_smp_cpu_mobile()); \
arch_curr_cpu(); })
#define _current k_sched_current_thread_query()
#define _current arch_current_thread()
#else
#define _current_cpu (&_kernel.cpus[0])

View file

@ -134,7 +134,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
#endif /* CONFIG_SMP */
z_thread_mark_switched_out();
z_sched_switch_spin(new_thread);
_current_cpu->current = new_thread;
arch_current_thread_set(new_thread);
#ifdef CONFIG_TIMESLICING
z_reset_time_slice(new_thread);
@ -260,6 +260,6 @@ static inline void z_dummy_thread_init(struct k_thread *dummy_thread)
dummy_thread->base.slice_ticks = 0;
#endif /* CONFIG_TIMESLICE_PER_THREAD */
_current_cpu->current = dummy_thread;
arch_current_thread_set(dummy_thread);
}
#endif /* ZEPHYR_KERNEL_INCLUDE_KSWAP_H_ */

View file

@ -1674,7 +1674,7 @@ static bool do_page_fault(void *addr, bool pin)
#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
key = k_spin_lock(&z_mm_lock);
faulting_thread = _current_cpu->current;
faulting_thread = _current;
status = arch_page_location_get(addr, &page_in_location);
if (status == ARCH_PAGE_LOCATION_BAD) {

View file

@ -838,11 +838,11 @@ struct k_thread *z_swap_next_thread(void)
}
#ifdef CONFIG_USE_SWITCH
/* Just a wrapper around _current = xxx with tracing */
/* Just a wrapper around arch_current_thread_set(xxx) with tracing */
static inline void set_current(struct k_thread *new_thread)
{
z_thread_mark_switched_out();
_current_cpu->current = new_thread;
arch_current_thread_set(new_thread);
}
/**
@ -1230,20 +1230,7 @@ static inline void z_vrfy_k_wakeup(k_tid_t thread)
k_tid_t z_impl_k_sched_current_thread_query(void)
{
#ifdef CONFIG_SMP
/* In SMP, _current is a field read from _current_cpu, which
* can race with preemption before it is read. We must lock
* local interrupts when reading it.
*/
unsigned int k = arch_irq_lock();
#endif /* CONFIG_SMP */
k_tid_t ret = _current_cpu->current;
#ifdef CONFIG_SMP
arch_irq_unlock(k);
#endif /* CONFIG_SMP */
return ret;
return arch_current_thread();
}
#ifdef CONFIG_USERSPACE

View file

@ -946,8 +946,8 @@ void z_thread_mark_switched_out(void)
#ifdef CONFIG_TRACING
#ifdef CONFIG_THREAD_LOCAL_STORAGE
/* Dummy thread won't have TLS set up to run arbitrary code */
if (!_current_cpu->current ||
(_current_cpu->current->base.thread_state & _THREAD_DUMMY) != 0)
if (!_current ||
(_current->base.thread_state & _THREAD_DUMMY) != 0)
return;
#endif /* CONFIG_THREAD_LOCAL_STORAGE */
SYS_PORT_TRACING_FUNC(k_thread, switched_out);