diff --git a/include/zephyr/kernel_structs.h b/include/zephyr/kernel_structs.h
index baa2046f07c..cf7daff9a6c 100644
--- a/include/zephyr/kernel_structs.h
+++ b/include/zephyr/kernel_structs.h
@@ -240,8 +240,8 @@ struct z_kernel {
 #endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
-	/* Need to signal an IPI at the next scheduling point */
-	bool pending_ipi;
+	/* Identify CPUs to send IPIs to at the next scheduling point */
+	atomic_t pending_ipi;
 #endif
 };
 
diff --git a/kernel/Kconfig.smp b/kernel/Kconfig.smp
index 22279270b19..04fc01801b3 100644
--- a/kernel/Kconfig.smp
+++ b/kernel/Kconfig.smp
@@ -73,6 +73,24 @@ config TRACE_SCHED_IPI
 	depends on SCHED_IPI_SUPPORTED
 	depends on MP_MAX_NUM_CPUS>1
 
+config IPI_OPTIMIZE
+	bool "Optimize IPI delivery"
+	default n
+	depends on SCHED_IPI_SUPPORTED && MP_MAX_NUM_CPUS>1
+	help
+	  When selected, the kernel will attempt to determine the minimum
+	  set of CPUs that need an IPI to trigger a reschedule in response to
+	  a thread newly made ready for execution. This increases the
+	  computation required at every scheduler operation by a value that is
+	  O(N) in the number of CPUs, and in exchange reduces the number of
+	  interrupts delivered. Which to choose is going to depend on
+	  application behavior. If the architecture also supports directing
+	  IPIs to specific CPUs then this has the potential to signficantly
+	  reduce the number of IPIs (and consequently ISRs) processed by the
+	  system as the number of CPUs increases. If not, the only benefit
+	  would be to not issue any IPIs if the newly readied thread is of
+	  lower priority than all the threads currently executing on other CPUs.
+
 config KERNEL_COHERENCE
 	bool "Place all shared data into coherent memory"
 	depends on ARCH_HAS_COHERENCE
diff --git a/kernel/include/ipi.h b/kernel/include/ipi.h
index 77105cac168..b353a676d46 100644
--- a/kernel/include/ipi.h
+++ b/kernel/include/ipi.h
@@ -7,13 +7,25 @@
 #ifndef ZEPHYR_KERNEL_INCLUDE_IPI_H_
 #define ZEPHYR_KERNEL_INCLUDE_IPI_H_
 
+#include <zephyr/kernel.h>
+#include <stdint.h>
+#include <zephyr/sys/atomic.h>
+
+#define IPI_ALL_CPUS_MASK  ((1 << CONFIG_MP_MAX_NUM_CPUS) - 1)
+
+#define IPI_CPU_MASK(cpu_id)   \
+	(IS_ENABLED(CONFIG_IPI_OPTIMIZE) ? BIT(cpu_id) : IPI_ALL_CPUS_MASK)
+
+
 /* defined in ipi.c when CONFIG_SMP=y */
 #ifdef CONFIG_SMP
-void flag_ipi(void);
+void flag_ipi(uint32_t ipi_mask);
 void signal_pending_ipi(void);
+atomic_val_t ipi_mask_create(struct k_thread *thread);
 #else
-#define flag_ipi() do { } while (false)
+#define flag_ipi(ipi_mask) do { } while (false)
 #define signal_pending_ipi() do { } while (false)
 #endif /* CONFIG_SMP */
 
+
 #endif /* ZEPHYR_KERNEL_INCLUDE_IPI_H_ */
diff --git a/kernel/ipi.c b/kernel/ipi.c
index 99693c0ecbf..9985c9485c2 100644
--- a/kernel/ipi.c
+++ b/kernel/ipi.c
@@ -13,15 +13,58 @@ extern void z_trace_sched_ipi(void);
 #endif
 
 
-void flag_ipi(void)
+void flag_ipi(uint32_t ipi_mask)
 {
 #if defined(CONFIG_SCHED_IPI_SUPPORTED)
 	if (arch_num_cpus() > 1) {
-		_kernel.pending_ipi = true;
+		atomic_or(&_kernel.pending_ipi, (atomic_val_t)ipi_mask);
 	}
 #endif /* CONFIG_SCHED_IPI_SUPPORTED */
 }
 
+/* Create a bitmask of CPUs that need an IPI. Note: sched_spinlock is held. */
+atomic_val_t ipi_mask_create(struct k_thread *thread)
+{
+	if (!IS_ENABLED(CONFIG_IPI_OPTIMIZE)) {
+		return (CONFIG_MP_MAX_NUM_CPUS > 1) ? IPI_ALL_CPUS_MASK : 0;
+	}
+
+	uint32_t  ipi_mask = 0;
+	uint32_t  num_cpus = (uint32_t)arch_num_cpus();
+	uint32_t  id = _current_cpu->id;
+	struct k_thread *cpu_thread;
+	bool   executable_on_cpu = true;
+
+	for (uint32_t i = 0; i < num_cpus; i++) {
+		if (id == i) {
+			continue;
+		}
+
+		/*
+		 * An IPI absolutely does not need to be sent if ...
+		 * 1. the CPU is not active, or
+		 * 2. <thread> can not execute on the target CPU
+		 * ... and might not need to be sent if ...
+		 * 3. the target CPU's active thread is not preemptible, or
+		 * 4. the target CPU's active thread has a higher priority
+		 *    (Items 3 & 4 may be overridden by a metaIRQ thread)
+		 */
+
+#if defined(CONFIG_SCHED_CPU_MASK)
+		executable_on_cpu = ((thread->base.cpu_mask & BIT(i)) != 0);
+#endif
+
+		cpu_thread = _kernel.cpus[i].current;
+		if ((cpu_thread != NULL) &&
+		    (((z_sched_prio_cmp(cpu_thread, thread) < 0) &&
+		      (thread_is_preemptible(cpu_thread))) ||
+		     thread_is_metairq(thread)) && executable_on_cpu) {
+			ipi_mask |= BIT(i);
+		}
+	}
+
+	return (atomic_val_t)ipi_mask;
+}
 
 void signal_pending_ipi(void)
 {
@@ -34,8 +77,10 @@ void signal_pending_ipi(void)
 	 */
 #if defined(CONFIG_SCHED_IPI_SUPPORTED)
 	if (arch_num_cpus() > 1) {
-		if (_kernel.pending_ipi) {
-			_kernel.pending_ipi = false;
+		uint32_t  cpu_bitmap;
+
+		cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi);
+		if (cpu_bitmap != 0) {
 			arch_sched_ipi();
 		}
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index c17efeaf6ef..34b256c22a8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -384,7 +384,8 @@ static void ready_thread(struct k_thread *thread)
 
 		queue_thread(thread);
 		update_cache(0);
-		flag_ipi();
+
+		flag_ipi(ipi_mask_create(thread));
 	}
 }
 
@@ -746,7 +747,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
 				queue_thread(thread);
 
 				if (old_prio > prio) {
-					flag_ipi();
+					flag_ipi(ipi_mask_create(thread));
 				}
 			} else {
 				/*
@@ -762,7 +763,7 @@ bool z_thread_prio_set(struct k_thread *thread, int prio)
 
 				cpu = thread_active_elsewhere(thread);
 				if ((cpu != NULL) && (old_prio < prio)) {
-					flag_ipi();
+					flag_ipi(IPI_CPU_MASK(cpu->id));
 				}
 			}
 
diff --git a/kernel/timeslicing.c b/kernel/timeslicing.c
index 07ae497c7f9..be91d9606f5 100644
--- a/kernel/timeslicing.c
+++ b/kernel/timeslicing.c
@@ -58,11 +58,10 @@ static void slice_timeout(struct _timeout *timeout)
 	slice_expired[cpu] = true;
 
 	/* We need an IPI if we just handled a timeslice expiration
-	 * for a different CPU.  Ideally this would be able to target
-	 * the specific core, but that's not part of the API yet.
+	 * for a different CPU.
 	 */
-	if (IS_ENABLED(CONFIG_SMP) && cpu != _current_cpu->id) {
-		flag_ipi();
+	if (cpu != _current_cpu->id) {
+		flag_ipi(IPI_CPU_MASK(cpu));
 	}
 }