kernel: Add CONFIG_IPI_OPTIMIZE

The CONFIG_IPI_OPTIMIZE configuration option allows for the flagging and subsequent signaling of IPIs to be optimized. It does this by making each bit in the kernel's pending_ipi field a flag that indicates whether the corresponding CPU might need an IPI to trigger the scheduling of a new thread on that CPU. When a new thread is made ready, we compare that thread against each of the threads currently executing on the other CPUs. If there is a chance that that thread should preempt the thread on the other CPU then we flag that an IPI is needed for that CPU. That is, a clear bit indicates that the CPU absolutely will not need to reschedule, while a set bit indicates that the target CPU must make that determination for itself. Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
2024-02-16 13:54:47 -05:00 · 2024-02-16 13:54:47 -05:00 · d8a4c8a90c
commit d8a4c8a90c
parent 9ff5221d23
6 changed files with 90 additions and 15 deletions
--- a/include/zephyr/kernel_structs.h
+++ b/include/zephyr/kernel_structs.h
@ -240,8 +240,8 @@ struct z_kernel {
 #endif
 #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
-	/* Need to signal an IPI at the next scheduling point */
+	/* Identify CPUs to send IPIs to at the next scheduling point */
-	bool pending_ipi;
+	atomic_t pending_ipi;
 #endif
 };
--- a/kernel/Kconfig.smp
+++ b/kernel/Kconfig.smp
@ -73,6 +73,24 @@ config TRACE_SCHED_IPI
 	depends on SCHED_IPI_SUPPORTED
 	depends on MP_MAX_NUM_CPUS>1
 config IPI_OPTIMIZE
 	bool "Optimize IPI delivery"
 	default n
 	depends on SCHED_IPI_SUPPORTED && MP_MAX_NUM_CPUS>1
 	help
 	  When selected, the kernel will attempt to determine the minimum
 	  set of CPUs that need an IPI to trigger a reschedule in response to
 	  a thread newly made ready for execution. This increases the
 	  computation required at every scheduler operation by a value that is
 	  O(N) in the number of CPUs, and in exchange reduces the number of
 	  interrupts delivered. Which to choose is going to depend on
 	  application behavior. If the architecture also supports directing
 	  IPIs to specific CPUs then this has the potential to signficantly
 	  reduce the number of IPIs (and consequently ISRs) processed by the
 	  system as the number of CPUs increases. If not, the only benefit
 	  would be to not issue any IPIs if the newly readied thread is of
 	  lower priority than all the threads currently executing on other CPUs.
 config KERNEL_COHERENCE
 	bool "Place all shared data into coherent memory"
 	depends on ARCH_HAS_COHERENCE
--- a/kernel/include/ipi.h
+++ b/kernel/include/ipi.h
@ -7,13 +7,25 @@
 #ifndef ZEPHYR_KERNEL_INCLUDE_IPI_H_
 #define ZEPHYR_KERNEL_INCLUDE_IPI_H_
 #include <zephyr/kernel.h>
 #include <stdint.h>
 #include <zephyr/sys/atomic.h>
 #define IPI_ALL_CPUS_MASK  ((1 << CONFIG_MP_MAX_NUM_CPUS) - 1)
 #define IPI_CPU_MASK(cpu_id)   \
 	(IS_ENABLED(CONFIG_IPI_OPTIMIZE) ? BIT(cpu_id) : IPI_ALL_CPUS_MASK)
 /* defined in ipi.c when CONFIG_SMP=y */
 #ifdef CONFIG_SMP
-void flag_ipi(void);
+void flag_ipi(uint32_t ipi_mask);
 void signal_pending_ipi(void);
 atomic_val_t ipi_mask_create(struct k_thread *thread);
 #else
-#define flag_ipi() do { } while (false)
+#define flag_ipi(ipi_mask) do { } while (false)
 #define signal_pending_ipi() do { } while (false)
 #endif /* CONFIG_SMP */
 #endif /* ZEPHYR_KERNEL_INCLUDE_IPI_H_ */
--- a/kernel/ipi.c
+++ b/kernel/ipi.c
@ -13,15 +13,58 @@ extern void z_trace_sched_ipi(void);
 #endif
-void flag_ipi(void)
+void flag_ipi(uint32_t ipi_mask)
 {
 #if defined(CONFIG_SCHED_IPI_SUPPORTED)
 	if (arch_num_cpus() > 1) {
-		_kernel.pending_ipi = true;
+		atomic_or(&_kernel.pending_ipi, (atomic_val_t)ipi_mask);
 	}
 #endif /* CONFIG_SCHED_IPI_SUPPORTED */
 }
 /* Create a bitmask of CPUs that need an IPI. Note: sched_spinlock is held. */
 atomic_val_t ipi_mask_create(struct k_thread *thread)
 {
 	if (!IS_ENABLED(CONFIG_IPI_OPTIMIZE)) {
 		return (CONFIG_MP_MAX_NUM_CPUS > 1) ? IPI_ALL_CPUS_MASK : 0;
 	}
 	uint32_t  ipi_mask = 0;
 	uint32_t  num_cpus = (uint32_t)arch_num_cpus();
 	uint32_t  id = _current_cpu->id;
 	struct k_thread *cpu_thread;
 	bool   executable_on_cpu = true;
 	for (uint32_t i = 0; i < num_cpus; i++) {
 		if (id == i) {
 			continue;
 		}
 		/*
 		 * An IPI absolutely does not need to be sent if ...
 		 * 1. the CPU is not active, or
 		 * 2. <thread> can not execute on the target CPU
 		 * ... and might not need to be sent if ...
 		 * 3. the target CPU's active thread is not preemptible, or
 		 * 4. the target CPU's active thread has a higher priority
 		 *    (Items 3 & 4 may be overridden by a metaIRQ thread)
 		 */
 #if defined(CONFIG_SCHED_CPU_MASK)
 		executable_on_cpu = ((thread->base.cpu_mask & BIT(i)) != 0);
 #endif
 		cpu_thread = _kernel.cpus[i].current;
 		if ((cpu_thread != NULL) &&
 		    (((z_sched_prio_cmp(cpu_thread, thread) < 0) &&
 		      (thread_is_preemptible(cpu_thread))) ||
 		     thread_is_metairq(thread)) && executable_on_cpu) {
 			ipi_mask |= BIT(i);
 		}
 	}
 	return (atomic_val_t)ipi_mask;
 }
 void signal_pending_ipi(void)
 {
@ -34,8 +77,10 @@ void signal_pending_ipi(void)
 	 */
 #if defined(CONFIG_SCHED_IPI_SUPPORTED)
 	if (arch_num_cpus() > 1) {
-		if (_kernel.pending_ipi) {
+		uint32_t  cpu_bitmap;
-			_kernel.pending_ipi = false;
+
 		cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi);
 		if (cpu_bitmap != 0) {
 			arch_sched_ipi();
 		}
 	}
--- a/kernel/sched.c
+++ b/kernel/sched.c
@ -384,7 +384,8 @@ static void ready_thread(struct k_thread *thread)
 		queue_thread(thread);
 		update_cache(0);
-		flag_ipi();
+
 		flag_ipi(ipi_mask_create(thread));
 	}
 }
@ -746,7 +747,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
 				queue_thread(thread);
 				if (old_prio > prio) {
-					flag_ipi();
+					flag_ipi(ipi_mask_create(thread));
 				}
 			} else {
 				/*
@ -762,7 +763,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
 				cpu = thread_active_elsewhere(thread);
 				if ((cpu != NULL) && (old_prio < prio)) {
-					flag_ipi();
+					flag_ipi(IPI_CPU_MASK(cpu->id));
 				}
 			}
--- a/kernel/timeslicing.c
+++ b/kernel/timeslicing.c
@ -58,11 +58,10 @@ static void slice_timeout(struct _timeout *timeout)
 	slice_expired[cpu] = true;
 	/* We need an IPI if we just handled a timeslice expiration
-	 * for a different CPU.  Ideally this would be able to target
+	 * for a different CPU.
 	 * the specific core, but that's not part of the API yet.
 	 */
-	if (IS_ENABLED(CONFIG_SMP) && cpu != _current_cpu->id) {
+	if (cpu != _current_cpu->id) {
-		flag_ipi();
+		flag_ipi(IPI_CPU_MASK(cpu));
 	}
 }