kernel: Add CONFIG_IPI_OPTIMIZE
The CONFIG_IPI_OPTIMIZE configuration option allows for the flagging and subsequent signaling of IPIs to be optimized. It does this by making each bit in the kernel's pending_ipi field a flag that indicates whether the corresponding CPU might need an IPI to trigger the scheduling of a new thread on that CPU. When a new thread is made ready, we compare that thread against each of the threads currently executing on the other CPUs. If there is a chance that that thread should preempt the thread on the other CPU then we flag that an IPI is needed for that CPU. That is, a clear bit indicates that the CPU absolutely will not need to reschedule, while a set bit indicates that the target CPU must make that determination for itself. Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
This commit is contained in:
parent
9ff5221d23
commit
d8a4c8a90c
6 changed files with 90 additions and 15 deletions
|
@ -240,8 +240,8 @@ struct z_kernel {
|
|||
#endif
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
|
||||
/* Need to signal an IPI at the next scheduling point */
|
||||
bool pending_ipi;
|
||||
/* Identify CPUs to send IPIs to at the next scheduling point */
|
||||
atomic_t pending_ipi;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
|
@ -73,6 +73,24 @@ config TRACE_SCHED_IPI
|
|||
depends on SCHED_IPI_SUPPORTED
|
||||
depends on MP_MAX_NUM_CPUS>1
|
||||
|
||||
config IPI_OPTIMIZE
|
||||
bool "Optimize IPI delivery"
|
||||
default n
|
||||
depends on SCHED_IPI_SUPPORTED && MP_MAX_NUM_CPUS>1
|
||||
help
|
||||
When selected, the kernel will attempt to determine the minimum
|
||||
set of CPUs that need an IPI to trigger a reschedule in response to
|
||||
a thread newly made ready for execution. This increases the
|
||||
computation required at every scheduler operation by a value that is
|
||||
O(N) in the number of CPUs, and in exchange reduces the number of
|
||||
interrupts delivered. Which to choose is going to depend on
|
||||
application behavior. If the architecture also supports directing
|
||||
IPIs to specific CPUs then this has the potential to signficantly
|
||||
reduce the number of IPIs (and consequently ISRs) processed by the
|
||||
system as the number of CPUs increases. If not, the only benefit
|
||||
would be to not issue any IPIs if the newly readied thread is of
|
||||
lower priority than all the threads currently executing on other CPUs.
|
||||
|
||||
config KERNEL_COHERENCE
|
||||
bool "Place all shared data into coherent memory"
|
||||
depends on ARCH_HAS_COHERENCE
|
||||
|
|
|
@ -7,13 +7,25 @@
|
|||
#ifndef ZEPHYR_KERNEL_INCLUDE_IPI_H_
|
||||
#define ZEPHYR_KERNEL_INCLUDE_IPI_H_
|
||||
|
||||
#include <zephyr/kernel.h>
|
||||
#include <stdint.h>
|
||||
#include <zephyr/sys/atomic.h>
|
||||
|
||||
#define IPI_ALL_CPUS_MASK ((1 << CONFIG_MP_MAX_NUM_CPUS) - 1)
|
||||
|
||||
#define IPI_CPU_MASK(cpu_id) \
|
||||
(IS_ENABLED(CONFIG_IPI_OPTIMIZE) ? BIT(cpu_id) : IPI_ALL_CPUS_MASK)
|
||||
|
||||
|
||||
/* defined in ipi.c when CONFIG_SMP=y */
|
||||
#ifdef CONFIG_SMP
|
||||
void flag_ipi(void);
|
||||
void flag_ipi(uint32_t ipi_mask);
|
||||
void signal_pending_ipi(void);
|
||||
atomic_val_t ipi_mask_create(struct k_thread *thread);
|
||||
#else
|
||||
#define flag_ipi() do { } while (false)
|
||||
#define flag_ipi(ipi_mask) do { } while (false)
|
||||
#define signal_pending_ipi() do { } while (false)
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
|
||||
#endif /* ZEPHYR_KERNEL_INCLUDE_IPI_H_ */
|
||||
|
|
53
kernel/ipi.c
53
kernel/ipi.c
|
@ -13,15 +13,58 @@ extern void z_trace_sched_ipi(void);
|
|||
#endif
|
||||
|
||||
|
||||
void flag_ipi(void)
|
||||
void flag_ipi(uint32_t ipi_mask)
|
||||
{
|
||||
#if defined(CONFIG_SCHED_IPI_SUPPORTED)
|
||||
if (arch_num_cpus() > 1) {
|
||||
_kernel.pending_ipi = true;
|
||||
atomic_or(&_kernel.pending_ipi, (atomic_val_t)ipi_mask);
|
||||
}
|
||||
#endif /* CONFIG_SCHED_IPI_SUPPORTED */
|
||||
}
|
||||
|
||||
/* Create a bitmask of CPUs that need an IPI. Note: sched_spinlock is held. */
|
||||
atomic_val_t ipi_mask_create(struct k_thread *thread)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_IPI_OPTIMIZE)) {
|
||||
return (CONFIG_MP_MAX_NUM_CPUS > 1) ? IPI_ALL_CPUS_MASK : 0;
|
||||
}
|
||||
|
||||
uint32_t ipi_mask = 0;
|
||||
uint32_t num_cpus = (uint32_t)arch_num_cpus();
|
||||
uint32_t id = _current_cpu->id;
|
||||
struct k_thread *cpu_thread;
|
||||
bool executable_on_cpu = true;
|
||||
|
||||
for (uint32_t i = 0; i < num_cpus; i++) {
|
||||
if (id == i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* An IPI absolutely does not need to be sent if ...
|
||||
* 1. the CPU is not active, or
|
||||
* 2. <thread> can not execute on the target CPU
|
||||
* ... and might not need to be sent if ...
|
||||
* 3. the target CPU's active thread is not preemptible, or
|
||||
* 4. the target CPU's active thread has a higher priority
|
||||
* (Items 3 & 4 may be overridden by a metaIRQ thread)
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_SCHED_CPU_MASK)
|
||||
executable_on_cpu = ((thread->base.cpu_mask & BIT(i)) != 0);
|
||||
#endif
|
||||
|
||||
cpu_thread = _kernel.cpus[i].current;
|
||||
if ((cpu_thread != NULL) &&
|
||||
(((z_sched_prio_cmp(cpu_thread, thread) < 0) &&
|
||||
(thread_is_preemptible(cpu_thread))) ||
|
||||
thread_is_metairq(thread)) && executable_on_cpu) {
|
||||
ipi_mask |= BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
return (atomic_val_t)ipi_mask;
|
||||
}
|
||||
|
||||
void signal_pending_ipi(void)
|
||||
{
|
||||
|
@ -34,8 +77,10 @@ void signal_pending_ipi(void)
|
|||
*/
|
||||
#if defined(CONFIG_SCHED_IPI_SUPPORTED)
|
||||
if (arch_num_cpus() > 1) {
|
||||
if (_kernel.pending_ipi) {
|
||||
_kernel.pending_ipi = false;
|
||||
uint32_t cpu_bitmap;
|
||||
|
||||
cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi);
|
||||
if (cpu_bitmap != 0) {
|
||||
arch_sched_ipi();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -384,7 +384,8 @@ static void ready_thread(struct k_thread *thread)
|
|||
|
||||
queue_thread(thread);
|
||||
update_cache(0);
|
||||
flag_ipi();
|
||||
|
||||
flag_ipi(ipi_mask_create(thread));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -746,7 +747,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
|
|||
queue_thread(thread);
|
||||
|
||||
if (old_prio > prio) {
|
||||
flag_ipi();
|
||||
flag_ipi(ipi_mask_create(thread));
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
|
@ -762,7 +763,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
|
|||
|
||||
cpu = thread_active_elsewhere(thread);
|
||||
if ((cpu != NULL) && (old_prio < prio)) {
|
||||
flag_ipi();
|
||||
flag_ipi(IPI_CPU_MASK(cpu->id));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -58,11 +58,10 @@ static void slice_timeout(struct _timeout *timeout)
|
|||
slice_expired[cpu] = true;
|
||||
|
||||
/* We need an IPI if we just handled a timeslice expiration
|
||||
* for a different CPU. Ideally this would be able to target
|
||||
* the specific core, but that's not part of the API yet.
|
||||
* for a different CPU.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_SMP) && cpu != _current_cpu->id) {
|
||||
flag_ipi();
|
||||
if (cpu != _current_cpu->id) {
|
||||
flag_ipi(IPI_CPU_MASK(cpu));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue