kernel: Add cache coherence management framework

Zephyr SMP kernels need to be able to run on architectures with incoherent caches. Naive implementation of synchronization on such architectures requires extensive cache flushing (e.g. flush+invalidate everything on every spin lock operation, flush on every unlock!) and is a performance problem. Instead, many of these systems will have access to separate "coherent" (usually uncached) and "incoherent" regions of memory. Where this is available, place all writable data sections by default into the coherent region. An "__incoherent" attribute flag is defined for data regions that are known to be CPU-local and which should use the cache. By default, this is used for stack memory. Stack memory will be incoherent by default, as by definition it is local to its current thread. This requires special cache management on context switch, so an arch API has been added for that. Also, when enabled, add assertions to strategic places to ensure that shared kernel data is indeed coherent. We check thread objects, the _kernel struct, waitq's, timeouts and spinlocks. In practice almost all kernel synchronization is built on top of these structures, and any shared data structs will contain at least one of them. Signed-off-by: Andy Ross <andrew.j.ross@intel.com> Signed-off-by: Anas Nashif <anas.nashif@intel.com>
2020-05-13 15:34:04 +00:00 · 2020-05-13 15:34:04 +00:00 · f6d32ab0a4
commit f6d32ab0a4
parent c151f1768e
11 changed files with 149 additions and 16 deletions
--- a/arch/Kconfig
+++ b/arch/Kconfig
@ -420,6 +420,13 @@ config ARCH_HAS_EXTRA_EXCEPTION_INFO
 config ARCH_HAS_GDBSTUB
 	bool

+config ARCH_HAS_COHERENCE
+	bool
+	help
+	  When selected, the architecture supports the
+	  arch_mem_coherent() API and can link into incoherent/cached
+	  memory using the ".cached" linker section.
+
 #
 # Other architecture related options
 #
--- a/include/linker/section_tags.h
+++ b/include/linker/section_tags.h
@ -38,6 +38,14 @@
 #define __nocache
 #endif /* CONFIG_NOCACHE_MEMORY */

+#if defined(CONFIG_KERNEL_COHERENCE)
+#define __incoherent __in_section_unique(cached)
+#define __stackmem __incoherent
+#else
+#define __incoherent Z_GENERIC_SECTION(.user_stacks)
+#define __stackmem __incoherent
+#endif /* CONFIG_KERNEL_COHERENCE */
+
 #endif /* !_ASMLANGUAGE */

 #endif /* ZEPHYR_INCLUDE_LINKER_SECTION_TAGS_H_ */
--- a/include/spinlock.h
+++ b/include/spinlock.h
@ -118,6 +118,9 @@ static ALWAYS_INLINE k_spinlock_key_t k_spin_lock(struct k_spinlock *l)

 #ifdef CONFIG_SPIN_VALIDATE
 	__ASSERT(z_spin_lock_valid(l), "Recursive spinlock %p", l);
+# ifdef KERNEL_COHERENCE
+	__ASSERT_NO_MSG(arch_mem_coherent(l));
+# endif
 #endif

 #ifdef CONFIG_SMP
--- a/include/sys/arch_interface.h
+++ b/include/sys/arch_interface.h
@ -691,6 +691,79 @@ FUNC_NORETURN void arch_syscall_oops(void *ssf);
 size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err);
 #endif /* CONFIG_USERSPACE */

+/**
+ * @brief Detect memory coherence type
+ *
+ * Required when ARCH_HAS_COHERENCE is true.  This function returns
+ * true if the byte pointed to lies within an architecture-defined
+ * "coherence region" (typically implemented with uncached memory) and
+ * can safely be used in multiprocessor code without explicit flush or
+ * invalidate operations.
+ *
+ * @note The result is for only the single byte at the specified
+ * address, this API is not required to check region boundaries or to
+ * expect aligned pointers.  The expectation is that the code above
+ * will have queried the appropriate address(es).
+ */
+#ifndef CONFIG_ARCH_HAS_COHERENCE
+static inline bool arch_mem_coherent(void *ptr)
+{
+	ARG_UNUSED(ptr);
+	return true;
+}
+#endif
+
+/**
+ * @brief Ensure cache coherence prior to context switch
+ *
+ * Required when ARCH_HAS_COHERENCE is true.  On cache-incoherent
+ * multiprocessor architectures, thread stacks are cached by default
+ * for performance reasons.  They must therefore be flushed
+ * appropriately on context switch.  The rules are:
+ *
+ * 1. The region containing live data in the old stack (generally the
+ *    bytes between the current stack pointer and the top of the stack
+ *    memory) must be flushed to underlying storage so a new CPU that
+ *    runs the same thread sees the correct data.  This must happen
+ *    before the assignment of the switch_handle field in the thread
+ *    struct which signals the completion of context switch.
+ *
+ * 2. Any data areas to be read from the new stack (generally the same
+ *    as the live region when it was saved) should be invalidated (and
+ *    NOT flushed!) in the data cache.  This is because another CPU
+ *    may have run or re-initialized the thread since this CPU
+ *    suspended it, and any data present in cache will be stale.
+ *
+ * @note The kernel will call this function during interrupt exit when
+ * a new thread has been chosen to run, and also immediately before
+ * entering arch_switch() to effect a code-driven context switch.  In
+ * the latter case, it is very likely that more data will be written
+ * to the old_thread stack region after this function returns but
+ * before the completion of the switch.  Simply flushing naively here
+ * is not sufficient on many architectures and coordination with the
+ * arch_switch() implementation is likely required.
+ *
+ * @arg old_thread The old thread to be flushed before being allowed
+ *                 to run on other CPUs.
+ * @arg old_switch_handle The switch handle to be stored into
+ *                        old_thread (it will not be valid until the
+ *                        cache is flushed so is not present yet).
+ *                        This will be NULL if inside z_swap()
+ *                        (because the arch_switch() has not saved it
+ *                        yet).
+ * @arg new_thread The new thread to be invalidated before it runs locally.
+ */
+#ifndef CONFIG_KERNEL_COHERENCE
+static inline void arch_cohere_stacks(struct k_thread *old_thread,
+                                      void *old_switch_handle,
+                                      struct k_thread *new_thread)
+{
+	ARG_UNUSED(old_thread);
+	ARG_UNUSED(old_switch_handle);
+	ARG_UNUSED(new_thread);
+}
+#endif
+
 /** @} */

 /**
--- a/include/sys/thread_stack.h
+++ b/include/sys/thread_stack.h
@ -319,7 +319,7 @@ static inline char *Z_KERNEL_STACK_BUFFER(k_thread_stack_t *sym)
 * @param size Size of the stack memory region
 */
 #define K_THREAD_STACK_DEFINE(sym, size) \
-	struct z_thread_stack_element Z_GENERIC_SECTION(.user_stacks) \
+	struct z_thread_stack_element __stackmem \
 		__aligned(Z_THREAD_STACK_OBJ_ALIGN(size)) \
 		sym[Z_THREAD_STACK_SIZE_ADJUST(size)]

@ -354,7 +354,7 @@ static inline char *Z_KERNEL_STACK_BUFFER(k_thread_stack_t *sym)
 * @param size Size of the stack memory region
 */
 #define K_THREAD_STACK_ARRAY_DEFINE(sym, nmemb, size) \
-	struct z_thread_stack_element Z_GENERIC_SECTION(.user_stacks) \
+	struct z_thread_stack_element __stackmem \
 		__aligned(Z_THREAD_STACK_OBJ_ALIGN(size)) \
 		sym[nmemb][K_THREAD_STACK_LEN(size)]

--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@ -806,6 +806,23 @@ config TRACE_SCHED_IPI
 	depends on SCHED_IPI_SUPPORTED
 	depends on MP_NUM_CPUS>1

+config KERNEL_COHERENCE
+	bool "Place all shared data into coherent memory"
+	default y if ARCH_HAS_COHERENCE && SMP && MP_NUM_CPUS > 1
+	select THREAD_STACK_INFO
+	help
+	  When available and selected, the kernel will build in a mode
+	  where all shared data is placed in multiprocessor-coherent
+	  (generally "uncached") memory.  Thread stacks will remain
+	  cached, as will application memory declared with
+	  __incoherent.  This is intended for Zephyr SMP kernels
+	  running on cache-incoherent architectures only.  Note that
+	  when this is selected, there is an implicit API change that
+	  assumes cache coherence to any memory passed to the kernel.
+	  Code that creates kernel data structures in uncached regions
+	  may fail strangely.  Some assertions exist to catch these
+	  mistakes, but not all circumstances can be tested.
+
 endmenu

 config TICKLESS_IDLE
--- a/kernel/include/kswap.h
+++ b/kernel/include/kswap.h
@ -100,7 +100,6 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,

 #ifdef CONFIG_SMP
 		_current_cpu->swap_ok = 0;
-
 		new_thread->base.cpu = arch_curr_cpu()->id;

 		if (!is_spinlock) {
@ -108,8 +107,10 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 		}
 #endif
 		sys_trace_thread_switched_out();
-		_current_cpu->current = new_thread;
 		wait_for_switch(new_thread);
+		arch_cohere_stacks(old_thread, NULL, new_thread);
+		_current_cpu->current = new_thread;
+
 		arch_switch(new_thread->switch_handle,
 			     &old_thread->switch_handle);

--- a/kernel/init.c
+++ b/kernel/init.c
@ -247,6 +247,10 @@ static void bg_thread_main(void *unused1, void *unused2, void *unused3)

 	z_init_static_threads();

+#ifdef KERNEL_COHERENCE
+	__ASSERT_NO_MSG(arch_mem_coherent(_kernel));
+#endif
+
 #ifdef CONFIG_SMP
 	z_smp_init();
 	z_sys_init_run_level(_SYS_INIT_LEVEL_SMP);
--- a/kernel/sched.c
+++ b/kernel/sched.c
@ -413,6 +413,10 @@ static void update_cache(int preempt_ok)

 static void ready_thread(struct k_thread *thread)
 {
+#ifdef KERNEL_COHERENCE
+	__ASSERT_NO_MSG(arch_mem_coherent(thread));
+#endif
+
 	if (z_is_thread_ready(thread)) {
 		sys_trace_thread_ready(thread);
 		_priq_run_add(&_kernel.ready_q.runq, thread);
@ -662,6 +666,10 @@ static void add_thread_timeout(struct k_thread *thread, k_timeout_t timeout)
 static void pend(struct k_thread *thread, _wait_q_t *wait_q,
 		 k_timeout_t timeout)
 {
+#ifdef KERNEL_COHERENCE
+	__ASSERT_NO_MSG(arch_mem_coherent(wait_q));
+#endif
+
 	LOCKED(&sched_spinlock) {
 		add_to_waitq_locked(thread, wait_q);
 	}
@ -903,22 +911,26 @@ static inline void set_current(struct k_thread *new_thread)
 #ifdef CONFIG_USE_SWITCH
 void *z_get_next_switch_handle(void *interrupted)
 {
-	_current->switch_handle = interrupted;
-
 	z_check_stack_sentinel();

 #ifdef CONFIG_SMP
 	LOCKED(&sched_spinlock) {
-		struct k_thread *thread = next_up();
+		struct k_thread *old_thread = _current, *new_thread;

-		if (_current != thread) {
-			update_metairq_preempt(thread);
+		old_thread->switch_handle = NULL;
+		new_thread = next_up();
+
+		if (old_thread != new_thread) {
+			update_metairq_preempt(new_thread);
+			wait_for_switch(new_thread);
+			arch_cohere_stacks(old_thread, interrupted, new_thread);

 #ifdef CONFIG_TIMESLICING
 			z_reset_time_slice();
 #endif
 			_current_cpu->swap_ok = 0;
-			set_current(thread);
+			set_current(new_thread);
+
 #ifdef CONFIG_SPIN_VALIDATE
 			/* Changed _current!  Update the spinlock
 			 * bookeeping so the validation doesn't get
@ -928,15 +940,12 @@ void *z_get_next_switch_handle(void *interrupted)
 			z_spin_lock_set_owner(&sched_spinlock);
 #endif
 		}
+		old_thread->switch_handle = interrupted;
 	}
 #else
-	struct k_thread *thread = z_get_next_ready_thread();
-	if (_current != thread) {
-		set_current(thread);
-	}
+	_current->switch_handle = interrupted;
+	set_current(z_get_next_ready_thread());
 #endif
-
-	wait_for_switch(_current);
 	return _current->switch_handle;
 }
 #endif
--- a/kernel/thread.c
+++ b/kernel/thread.c
@ -565,6 +565,13 @@ char *z_setup_new_thread(struct k_thread *new_thread,
 	z_init_thread_base(&new_thread->base, prio, _THREAD_PRESTART, options);
 	stack_ptr = setup_thread_stack(new_thread, stack, stack_size);

+#ifdef KERNEL_COHERENCE
+        /* Check that the thread object is safe, but that the stack is
+         * still cached! */
+	__ASSERT_NO_MSG(arch_mem_coherent(new_thread));
+	__ASSERT_NO_MSG(!arch_mem_coherent(stack));
+#endif
+
 	arch_new_thread(new_thread, stack, stack_ptr, entry, p1, p2, p3);

 	/* static threads overwrite it afterwards with real value */
--- a/kernel/timeout.c
+++ b/kernel/timeout.c
@ -91,6 +91,10 @@ void z_add_timeout(struct _timeout *to, _timeout_func_t fn,
 		return;
 	}

+#ifdef KERNEL_COHERENCE
+	__ASSERT_NO_MSG(arch_mem_coherent(to));
+#endif
+
 #ifdef CONFIG_LEGACY_TIMEOUT_API
 	k_ticks_t ticks = timeout;
 #else