kernel: Add CONFIG_IPI_OPTIMIZE

The CONFIG_IPI_OPTIMIZE configuration option allows for the flagging
and subsequent signaling of IPIs to be optimized.

It does this by making each bit in the kernel's pending_ipi field
a flag that indicates whether the corresponding CPU might need an IPI
to trigger the scheduling of a new thread on that CPU.

When a new thread is made ready, we compare that thread against each
of the threads currently executing on the other CPUs. If there is a
chance that that thread should preempt the thread on the other CPU
then we flag that an IPI is needed for that CPU. That is, a clear bit
indicates that the CPU absolutely will not need to reschedule, while a
set bit indicates that the target CPU must make that determination for
itself.

Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
This commit is contained in:
Peter Mitsis 2024-02-16 13:54:47 -05:00 committed by Anas Nashif
commit d8a4c8a90c
6 changed files with 90 additions and 15 deletions

View file

@ -240,8 +240,8 @@ struct z_kernel {
#endif #endif
#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
/* Need to signal an IPI at the next scheduling point */ /* Identify CPUs to send IPIs to at the next scheduling point */
bool pending_ipi; atomic_t pending_ipi;
#endif #endif
}; };

View file

@ -73,6 +73,24 @@ config TRACE_SCHED_IPI
depends on SCHED_IPI_SUPPORTED depends on SCHED_IPI_SUPPORTED
depends on MP_MAX_NUM_CPUS>1 depends on MP_MAX_NUM_CPUS>1
config IPI_OPTIMIZE
bool "Optimize IPI delivery"
default n
depends on SCHED_IPI_SUPPORTED && MP_MAX_NUM_CPUS>1
help
When selected, the kernel will attempt to determine the minimum
set of CPUs that need an IPI to trigger a reschedule in response to
a thread newly made ready for execution. This increases the
computation required at every scheduler operation by a value that is
O(N) in the number of CPUs, and in exchange reduces the number of
interrupts delivered. Which to choose is going to depend on
application behavior. If the architecture also supports directing
IPIs to specific CPUs then this has the potential to signficantly
reduce the number of IPIs (and consequently ISRs) processed by the
system as the number of CPUs increases. If not, the only benefit
would be to not issue any IPIs if the newly readied thread is of
lower priority than all the threads currently executing on other CPUs.
config KERNEL_COHERENCE config KERNEL_COHERENCE
bool "Place all shared data into coherent memory" bool "Place all shared data into coherent memory"
depends on ARCH_HAS_COHERENCE depends on ARCH_HAS_COHERENCE

View file

@ -7,13 +7,25 @@
#ifndef ZEPHYR_KERNEL_INCLUDE_IPI_H_ #ifndef ZEPHYR_KERNEL_INCLUDE_IPI_H_
#define ZEPHYR_KERNEL_INCLUDE_IPI_H_ #define ZEPHYR_KERNEL_INCLUDE_IPI_H_
#include <zephyr/kernel.h>
#include <stdint.h>
#include <zephyr/sys/atomic.h>
#define IPI_ALL_CPUS_MASK ((1 << CONFIG_MP_MAX_NUM_CPUS) - 1)
#define IPI_CPU_MASK(cpu_id) \
(IS_ENABLED(CONFIG_IPI_OPTIMIZE) ? BIT(cpu_id) : IPI_ALL_CPUS_MASK)
/* defined in ipi.c when CONFIG_SMP=y */ /* defined in ipi.c when CONFIG_SMP=y */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
void flag_ipi(void); void flag_ipi(uint32_t ipi_mask);
void signal_pending_ipi(void); void signal_pending_ipi(void);
atomic_val_t ipi_mask_create(struct k_thread *thread);
#else #else
#define flag_ipi() do { } while (false) #define flag_ipi(ipi_mask) do { } while (false)
#define signal_pending_ipi() do { } while (false) #define signal_pending_ipi() do { } while (false)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#endif /* ZEPHYR_KERNEL_INCLUDE_IPI_H_ */ #endif /* ZEPHYR_KERNEL_INCLUDE_IPI_H_ */

View file

@ -13,15 +13,58 @@ extern void z_trace_sched_ipi(void);
#endif #endif
void flag_ipi(void) void flag_ipi(uint32_t ipi_mask)
{ {
#if defined(CONFIG_SCHED_IPI_SUPPORTED) #if defined(CONFIG_SCHED_IPI_SUPPORTED)
if (arch_num_cpus() > 1) { if (arch_num_cpus() > 1) {
_kernel.pending_ipi = true; atomic_or(&_kernel.pending_ipi, (atomic_val_t)ipi_mask);
} }
#endif /* CONFIG_SCHED_IPI_SUPPORTED */ #endif /* CONFIG_SCHED_IPI_SUPPORTED */
} }
/* Create a bitmask of CPUs that need an IPI. Note: sched_spinlock is held. */
atomic_val_t ipi_mask_create(struct k_thread *thread)
{
if (!IS_ENABLED(CONFIG_IPI_OPTIMIZE)) {
return (CONFIG_MP_MAX_NUM_CPUS > 1) ? IPI_ALL_CPUS_MASK : 0;
}
uint32_t ipi_mask = 0;
uint32_t num_cpus = (uint32_t)arch_num_cpus();
uint32_t id = _current_cpu->id;
struct k_thread *cpu_thread;
bool executable_on_cpu = true;
for (uint32_t i = 0; i < num_cpus; i++) {
if (id == i) {
continue;
}
/*
* An IPI absolutely does not need to be sent if ...
* 1. the CPU is not active, or
* 2. <thread> can not execute on the target CPU
* ... and might not need to be sent if ...
* 3. the target CPU's active thread is not preemptible, or
* 4. the target CPU's active thread has a higher priority
* (Items 3 & 4 may be overridden by a metaIRQ thread)
*/
#if defined(CONFIG_SCHED_CPU_MASK)
executable_on_cpu = ((thread->base.cpu_mask & BIT(i)) != 0);
#endif
cpu_thread = _kernel.cpus[i].current;
if ((cpu_thread != NULL) &&
(((z_sched_prio_cmp(cpu_thread, thread) < 0) &&
(thread_is_preemptible(cpu_thread))) ||
thread_is_metairq(thread)) && executable_on_cpu) {
ipi_mask |= BIT(i);
}
}
return (atomic_val_t)ipi_mask;
}
void signal_pending_ipi(void) void signal_pending_ipi(void)
{ {
@ -34,8 +77,10 @@ void signal_pending_ipi(void)
*/ */
#if defined(CONFIG_SCHED_IPI_SUPPORTED) #if defined(CONFIG_SCHED_IPI_SUPPORTED)
if (arch_num_cpus() > 1) { if (arch_num_cpus() > 1) {
if (_kernel.pending_ipi) { uint32_t cpu_bitmap;
_kernel.pending_ipi = false;
cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi);
if (cpu_bitmap != 0) {
arch_sched_ipi(); arch_sched_ipi();
} }
} }

View file

@ -384,7 +384,8 @@ static void ready_thread(struct k_thread *thread)
queue_thread(thread); queue_thread(thread);
update_cache(0); update_cache(0);
flag_ipi();
flag_ipi(ipi_mask_create(thread));
} }
} }
@ -746,7 +747,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
queue_thread(thread); queue_thread(thread);
if (old_prio > prio) { if (old_prio > prio) {
flag_ipi(); flag_ipi(ipi_mask_create(thread));
} }
} else { } else {
/* /*
@ -762,7 +763,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
cpu = thread_active_elsewhere(thread); cpu = thread_active_elsewhere(thread);
if ((cpu != NULL) && (old_prio < prio)) { if ((cpu != NULL) && (old_prio < prio)) {
flag_ipi(); flag_ipi(IPI_CPU_MASK(cpu->id));
} }
} }

View file

@ -58,11 +58,10 @@ static void slice_timeout(struct _timeout *timeout)
slice_expired[cpu] = true; slice_expired[cpu] = true;
/* We need an IPI if we just handled a timeslice expiration /* We need an IPI if we just handled a timeslice expiration
* for a different CPU. Ideally this would be able to target * for a different CPU.
* the specific core, but that's not part of the API yet.
*/ */
if (IS_ENABLED(CONFIG_SMP) && cpu != _current_cpu->id) { if (cpu != _current_cpu->id) {
flag_ipi(); flag_ipi(IPI_CPU_MASK(cpu));
} }
} }