diff --git a/include/zephyr/kernel_structs.h b/include/zephyr/kernel_structs.h index baa2046f07c..cf7daff9a6c 100644 --- a/include/zephyr/kernel_structs.h +++ b/include/zephyr/kernel_structs.h @@ -240,8 +240,8 @@ struct z_kernel { #endif #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) - /* Need to signal an IPI at the next scheduling point */ - bool pending_ipi; + /* Identify CPUs to send IPIs to at the next scheduling point */ + atomic_t pending_ipi; #endif }; diff --git a/kernel/Kconfig.smp b/kernel/Kconfig.smp index 22279270b19..04fc01801b3 100644 --- a/kernel/Kconfig.smp +++ b/kernel/Kconfig.smp @@ -73,6 +73,24 @@ config TRACE_SCHED_IPI depends on SCHED_IPI_SUPPORTED depends on MP_MAX_NUM_CPUS>1 +config IPI_OPTIMIZE + bool "Optimize IPI delivery" + default n + depends on SCHED_IPI_SUPPORTED && MP_MAX_NUM_CPUS>1 + help + When selected, the kernel will attempt to determine the minimum + set of CPUs that need an IPI to trigger a reschedule in response to + a thread newly made ready for execution. This increases the + computation required at every scheduler operation by a value that is + O(N) in the number of CPUs, and in exchange reduces the number of + interrupts delivered. Which to choose is going to depend on + application behavior. If the architecture also supports directing + IPIs to specific CPUs then this has the potential to signficantly + reduce the number of IPIs (and consequently ISRs) processed by the + system as the number of CPUs increases. If not, the only benefit + would be to not issue any IPIs if the newly readied thread is of + lower priority than all the threads currently executing on other CPUs. + config KERNEL_COHERENCE bool "Place all shared data into coherent memory" depends on ARCH_HAS_COHERENCE diff --git a/kernel/include/ipi.h b/kernel/include/ipi.h index 77105cac168..b353a676d46 100644 --- a/kernel/include/ipi.h +++ b/kernel/include/ipi.h @@ -7,13 +7,25 @@ #ifndef ZEPHYR_KERNEL_INCLUDE_IPI_H_ #define ZEPHYR_KERNEL_INCLUDE_IPI_H_ +#include +#include +#include + +#define IPI_ALL_CPUS_MASK ((1 << CONFIG_MP_MAX_NUM_CPUS) - 1) + +#define IPI_CPU_MASK(cpu_id) \ + (IS_ENABLED(CONFIG_IPI_OPTIMIZE) ? BIT(cpu_id) : IPI_ALL_CPUS_MASK) + + /* defined in ipi.c when CONFIG_SMP=y */ #ifdef CONFIG_SMP -void flag_ipi(void); +void flag_ipi(uint32_t ipi_mask); void signal_pending_ipi(void); +atomic_val_t ipi_mask_create(struct k_thread *thread); #else -#define flag_ipi() do { } while (false) +#define flag_ipi(ipi_mask) do { } while (false) #define signal_pending_ipi() do { } while (false) #endif /* CONFIG_SMP */ + #endif /* ZEPHYR_KERNEL_INCLUDE_IPI_H_ */ diff --git a/kernel/ipi.c b/kernel/ipi.c index 99693c0ecbf..9985c9485c2 100644 --- a/kernel/ipi.c +++ b/kernel/ipi.c @@ -13,15 +13,58 @@ extern void z_trace_sched_ipi(void); #endif -void flag_ipi(void) +void flag_ipi(uint32_t ipi_mask) { #if defined(CONFIG_SCHED_IPI_SUPPORTED) if (arch_num_cpus() > 1) { - _kernel.pending_ipi = true; + atomic_or(&_kernel.pending_ipi, (atomic_val_t)ipi_mask); } #endif /* CONFIG_SCHED_IPI_SUPPORTED */ } +/* Create a bitmask of CPUs that need an IPI. Note: sched_spinlock is held. */ +atomic_val_t ipi_mask_create(struct k_thread *thread) +{ + if (!IS_ENABLED(CONFIG_IPI_OPTIMIZE)) { + return (CONFIG_MP_MAX_NUM_CPUS > 1) ? IPI_ALL_CPUS_MASK : 0; + } + + uint32_t ipi_mask = 0; + uint32_t num_cpus = (uint32_t)arch_num_cpus(); + uint32_t id = _current_cpu->id; + struct k_thread *cpu_thread; + bool executable_on_cpu = true; + + for (uint32_t i = 0; i < num_cpus; i++) { + if (id == i) { + continue; + } + + /* + * An IPI absolutely does not need to be sent if ... + * 1. the CPU is not active, or + * 2. can not execute on the target CPU + * ... and might not need to be sent if ... + * 3. the target CPU's active thread is not preemptible, or + * 4. the target CPU's active thread has a higher priority + * (Items 3 & 4 may be overridden by a metaIRQ thread) + */ + +#if defined(CONFIG_SCHED_CPU_MASK) + executable_on_cpu = ((thread->base.cpu_mask & BIT(i)) != 0); +#endif + + cpu_thread = _kernel.cpus[i].current; + if ((cpu_thread != NULL) && + (((z_sched_prio_cmp(cpu_thread, thread) < 0) && + (thread_is_preemptible(cpu_thread))) || + thread_is_metairq(thread)) && executable_on_cpu) { + ipi_mask |= BIT(i); + } + } + + return (atomic_val_t)ipi_mask; +} void signal_pending_ipi(void) { @@ -34,8 +77,10 @@ void signal_pending_ipi(void) */ #if defined(CONFIG_SCHED_IPI_SUPPORTED) if (arch_num_cpus() > 1) { - if (_kernel.pending_ipi) { - _kernel.pending_ipi = false; + uint32_t cpu_bitmap; + + cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi); + if (cpu_bitmap != 0) { arch_sched_ipi(); } } diff --git a/kernel/sched.c b/kernel/sched.c index c17efeaf6ef..34b256c22a8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -384,7 +384,8 @@ static void ready_thread(struct k_thread *thread) queue_thread(thread); update_cache(0); - flag_ipi(); + + flag_ipi(ipi_mask_create(thread)); } } @@ -746,7 +747,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio) queue_thread(thread); if (old_prio > prio) { - flag_ipi(); + flag_ipi(ipi_mask_create(thread)); } } else { /* @@ -762,7 +763,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio) cpu = thread_active_elsewhere(thread); if ((cpu != NULL) && (old_prio < prio)) { - flag_ipi(); + flag_ipi(IPI_CPU_MASK(cpu->id)); } } diff --git a/kernel/timeslicing.c b/kernel/timeslicing.c index 07ae497c7f9..be91d9606f5 100644 --- a/kernel/timeslicing.c +++ b/kernel/timeslicing.c @@ -58,11 +58,10 @@ static void slice_timeout(struct _timeout *timeout) slice_expired[cpu] = true; /* We need an IPI if we just handled a timeslice expiration - * for a different CPU. Ideally this would be able to target - * the specific core, but that's not part of the API yet. + * for a different CPU. */ - if (IS_ENABLED(CONFIG_SMP) && cpu != _current_cpu->id) { - flag_ipi(); + if (cpu != _current_cpu->id) { + flag_ipi(IPI_CPU_MASK(cpu)); } }