diff --git a/arch/Kconfig b/arch/Kconfig
index f0f36b5bdd2..ce0bb7bc78f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -35,6 +35,7 @@ config X86
 config X86_64
 	bool "x86_64 architecture"
 	select ATOMIC_OPERATIONS_BUILTIN
+	select SCHED_IPI_SUPPORTED
 
 config NIOS2
 	bool "Nios II Gen 2 architecture"
diff --git a/arch/x86_64/core/x86_64.c b/arch/x86_64/core/x86_64.c
index 7480a485c40..6b0eab244ca 100644
--- a/arch/x86_64/core/x86_64.c
+++ b/arch/x86_64/core/x86_64.c
@@ -10,6 +10,12 @@
 #include <irq_offload.h>
 #include "xuk.h"
 
+/* Always pick a lowest priority interrupt for scheduling IPI's, by
+ * definition they're done on behalf of thread mode code and should
+ * never preempt a true device interrupt
+ */
+#define SCHED_IPI_VECTOR 0x20
+
 struct device;
 
 struct NANO_ESF {
@@ -112,6 +118,26 @@ void __weak x86_apic_timer_isr(void *arg, int code)
 	ARG_UNUSED(code);
 }
 
+static void sched_ipi_handler(void *arg, int err)
+{
+	ARG_UNUSED(arg);
+	ARG_UNUSED(err);
+#ifdef CONFIG_SMP
+	z_sched_ipi();
+#endif
+}
+
+void z_arch_sched_ipi(void)
+{
+	_apic.ICR_HI = (struct apic_icr_hi) {};
+	_apic.ICR_LO = (struct apic_icr_lo) {
+		.delivery_mode = FIXED,
+		.vector = SCHED_IPI_VECTOR,
+		.shorthand = NOTSELF,
+	};
+}
+
+
 /* Called from xuk layer on actual CPU start */
 void _cpu_start(int cpu)
 {
@@ -121,6 +147,9 @@ void _cpu_start(int cpu)
 	xuk_set_isr(INT_APIC_LVT_TIMER, 13, x86_apic_timer_isr, 0);
 	_apic.INIT_COUNT = 0;
 
+	xuk_set_isr(XUK_INT_RAW_VECTOR(SCHED_IPI_VECTOR),
+		    -1, sched_ipi_handler, 0);
+
 #ifdef CONFIG_IRQ_OFFLOAD
 	xuk_set_isr(XUK_INT_RAW_VECTOR(CONFIG_IRQ_OFFLOAD_VECTOR),
 		    -1, irq_offload_handler, 0);
diff --git a/arch/x86_64/include/kernel_arch_func.h b/arch/x86_64/include/kernel_arch_func.h
index a3638a7aee2..39e16c28018 100644
--- a/arch/x86_64/include/kernel_arch_func.h
+++ b/arch/x86_64/include/kernel_arch_func.h
@@ -102,4 +102,6 @@ extern int x86_64_except_reason;
 		__asm__ volatile("int $5");	\
 	} while (false)
 
+void z_arch_sched_ipi(void);
+
 #endif /* _KERNEL_ARCH_FUNC_H */
diff --git a/kernel/Kconfig b/kernel/Kconfig
index 513f394733d..749efcb6b17 100644
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@@ -715,6 +715,17 @@ config MP_NUM_CPUS
 	  Number of multiprocessing-capable cores available to the
 	  multicpu API and SMP features.
 
+config SCHED_IPI_SUPPORTED
+	bool "Architecture supports broadcast interprocessor interrupts"
+	help
+	  True if the architecture supports a call to
+	  z_arch_sched_ipi() to broadcast an interrupt that will call
+	  z_sched_ipi() on other CPUs in the system.  Required for
+	  k_thread_abort() to operate with reasonable latency
+	  (otherwise we might have to wait for the other thread to
+	  take an interrupt, which can be arbitrarily far in the
+	  future).
+
 endmenu
 
 config TICKLESS_IDLE
diff --git a/kernel/include/kernel_structs.h b/kernel/include/kernel_structs.h
index 9ed69379f1a..4c9ba2905e0 100644
--- a/kernel/include/kernel_structs.h
+++ b/kernel/include/kernel_structs.h
@@ -46,6 +46,9 @@
 /* Thread is suspended */
 #define _THREAD_SUSPENDED (BIT(4))
 
+/* Thread is being aborted (SMP only) */
+#define _THREAD_ABORTING (BIT(5))
+
 /* Thread is present in the ready queue */
 #define _THREAD_QUEUED (BIT(6))
 
diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h
index 662fb1eee17..52d5ad69691 100644
--- a/kernel/include/ksched.h
+++ b/kernel/include/ksched.h
@@ -53,6 +53,8 @@ struct k_thread *z_find_first_thread_to_unpend(_wait_q_t *wait_q,
 					      struct k_thread *from);
 void idle(void *a, void *b, void *c);
 void z_time_slice(int ticks);
+void z_sched_abort(struct k_thread *thread);
+void z_sched_ipi(void);
 
 static inline void z_pend_curr_unlocked(_wait_q_t *wait_q, s32_t timeout)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 06cdc0becc4..cc3322bb0f4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -464,13 +464,6 @@ void z_unpend_thread(struct k_thread *thread)
 	(void)z_abort_thread_timeout(thread);
 }
 
-/* FIXME: this API is glitchy when used in SMP.  If the thread is
- * currently scheduled on the other CPU, it will silently set it's
- * priority but nothing will cause a reschedule until the next
- * interrupt.  An audit seems to show that all current usage is to set
- * priorities on either _current or a pended thread, though, so it's
- * fine for now.
- */
 void z_thread_priority_set(struct k_thread *thread, int prio)
 {
 	bool need_sched = 0;
@@ -489,6 +482,11 @@ void z_thread_priority_set(struct k_thread *thread, int prio)
 	}
 	sys_trace_thread_priority_set(thread);
 
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    !IS_ENABLED(CONFIG_SCHED_IPI_SUPPORTED)) {
+		z_sched_ipi();
+	}
+
 	if (need_sched && _current->base.sched_locked == 0) {
 		z_reschedule_unlocked();
 	}
@@ -594,6 +592,15 @@ void *z_get_next_switch_handle(void *interrupted)
 #endif
 #endif
 
+	/* Some architectures don't have a working IPI, so the best we
+	 * can do there is check the abort status of the current
+	 * thread here on ISR exit
+	 */
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    !IS_ENABLED(CONFIG_SCHED_IPI_SUPPORTED)) {
+		z_sched_ipi();
+	}
+
 	z_check_stack_sentinel();
 
 	return _current->switch_handle;
@@ -957,8 +964,61 @@ void z_impl_k_wakeup(k_tid_t thread)
 	if (!z_is_in_isr()) {
 		z_reschedule_unlocked();
 	}
+
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    !IS_ENABLED(CONFIG_SCHED_IPI_SUPPORTED)) {
+		z_sched_ipi();
+	}
 }
 
+#ifdef CONFIG_SMP
+/* Called out of the scheduler interprocessor interrupt.  All it does
+ * is flag the current thread as dead if it needs to abort, so the ISR
+ * return into something else and the other thread which called
+ * k_thread_abort() can finish its work knowing the thing won't be
+ * rescheduled.
+ */
+void z_sched_ipi(void)
+{
+	LOCKED(&sched_spinlock) {
+		if (_current->base.thread_state & _THREAD_ABORTING) {
+			_current->base.thread_state |= _THREAD_DEAD;
+			_current_cpu->swap_ok = true;
+		}
+	}
+}
+
+void z_sched_abort(struct k_thread *thread)
+{
+	if (thread == _current) {
+		z_remove_thread_from_ready_q(thread);
+		return;
+	}
+
+	/* First broadcast an IPI to the other CPUs so they can stop
+	 * it locally.  Not all architectures support that, alas.  If
+	 * we don't have it, we need to wait for some other interrupt.
+	 */
+	thread->base.thread_state |= _THREAD_ABORTING;
+#ifdef CONFIG_SCHED_IPI_SUPPORTED
+	z_arch_sched_ipi();
+#endif
+
+	/* Wait for it to be flagged dead either by the CPU it was
+	 * running on or because we caught it idle in the queue
+	 */
+	while ((thread->base.thread_state & _THREAD_DEAD) == 0) {
+		LOCKED(&sched_spinlock) {
+			if (z_is_thread_queued(thread)) {
+				_current->base.thread_state |= _THREAD_DEAD;
+				_priq_run_remove(&_kernel.ready_q.runq, thread);
+				z_mark_thread_as_not_queued(thread);
+			}
+		}
+	}
+}
+#endif
+
 #ifdef CONFIG_USERSPACE
 Z_SYSCALL_HANDLER1_SIMPLE_VOID(k_wakeup, K_OBJ_THREAD, k_tid_t);
 #endif
diff --git a/kernel/smp.c b/kernel/smp.c
index aa689a42f6c..6cbca8ffe74 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -60,13 +60,11 @@ void z_smp_release_global_lock(struct k_thread *thread)
 	}
 }
 
-#endif
-
 extern k_thread_stack_t _interrupt_stack1[];
 extern k_thread_stack_t _interrupt_stack2[];
 extern k_thread_stack_t _interrupt_stack3[];
 
-#if defined(CONFIG_SMP) && CONFIG_MP_NUM_CPUS > 1
+#if CONFIG_MP_NUM_CPUS > 1
 static void smp_init_top(int key, void *arg)
 {
 	atomic_t *start_flag = arg;
@@ -89,6 +87,7 @@ static void smp_init_top(int key, void *arg)
 
 	CODE_UNREACHABLE;
 }
+#endif
 
 void smp_init(void)
 {
@@ -111,4 +110,5 @@ void smp_init(void)
 
 	(void)atomic_set(&start_flag, 1);
 }
-#endif
+
+#endif /* CONFIG_SMP */
diff --git a/kernel/thread.c b/kernel/thread.c
index 8b12bebdfe2..6c70458b89e 100644
--- a/kernel/thread.c
+++ b/kernel/thread.c
@@ -582,6 +582,10 @@ void z_thread_single_abort(struct k_thread *thread)
 		thread->fn_abort();
 	}
 
+	if (IS_ENABLED(CONFIG_SMP)) {
+		z_sched_abort(thread);
+	}
+
 	if (z_is_thread_ready(thread)) {
 		z_remove_thread_from_ready_q(thread);
 	} else {