samples/xtensa-asm2: Unit test for new Xtensa assembly primitives

This sample (which should eventually become a proper test) suite builds from simple applications of the new primitives to a full context switch test and interrupt handling suite (based on the CPU-internal CCOMPARE2 timer). It's been extraordinarily useful finding regressing as the asm2 code gets modified and should probably stick around as long as possible. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
2017-12-07 15:04:51 -08:00 · 2017-12-07 15:04:51 -08:00 · 837dd99a0e
commit 837dd99a0e
parent a34f884f23
4 changed files with 755 additions and 0 deletions
--- a/samples/xtensa_asm2/CMakeLists.txt
+++ b/samples/xtensa_asm2/CMakeLists.txt
@ -0,0 +1,8 @@
 set(IS_TEST 1)
 include($ENV{ZEPHYR_BASE}/cmake/app/boilerplate.cmake NO_POLICY_SCOPE)
 project(NONE)
 enable_language(ASM)
 target_sources(app PRIVATE src/main.c src/asmhelp.S)
--- a/samples/xtensa_asm2/prj.conf
+++ b/samples/xtensa_asm2/prj.conf
@ -0,0 +1,3 @@
 CONFIG_MULTITHREADING=n
 CONFIG_MAIN_STACK_SIZE=8192
 CONFIG_XTENSA_OMIT_HIGH_INTERRUPTS=y
--- a/samples/xtensa_asm2/src/asmhelp.S
+++ b/samples/xtensa_asm2/src/asmhelp.S
@ -0,0 +1,205 @@
 /*
 * Copyright (c) 2017, Intel Corporation
 *
 * SPDX-License-Identifier: Apache-2.0
 */
 #include <xtensa-asm2-s.h>
 /*
 * spill_reg_windows
 *
 * Globally visible symbol to do register spills.  Useful for unit
 * testing, or maybe as part of a debug/watchdog/error handler.  Not a
 * C function, call this via CALL0 (so you probably have to save off
 * A0, but no other registers need to be spilled).  On return, all
 * registers not part of the current function will be spilled to
 * memory.
 */
 .global spill_reg_windows
 .align 4
 spill_reg_windows:
 	SPILL_ALL_WINDOWS
 	ret
 /* Takes two arguments, a function pointer in A2 and a count in A3.
 * Decrements the count, and if non-zero calls itself
 * recursively.  Otherwise calls the function.
 */
 .align 4
 _one_quad:
 	entry a1, 16
 	addi a3, a3, -1
 	beqz a3, _call_fn
 	mov a6, a2
 	mov a7, a3
 	call4 _one_quad
 	retw
 _call_fn:
 	callx4 a2
 	retw
 /* Takes a function pointer as its single argument (in A2 as per ABI)
 * and invokes it having "filled" the register window with CALL4
 * frames.
 */
 .global fill_window
 .align 4
 fill_window:
 	entry a1, 16
 	mov a6, a2
 	movi a7, 16
 	call4 _one_quad
 	retw
 /* The operation of the specific tests is to put some known values
 * into a particular subset of high registers.  Doing this will cause
 * the window exception to spill wrapped-around frames to make space,
 * which should be detected by the save code and cause it to write
 * only the specific registers needed.
 */
 .global test_highreg_0
 .align 4
 test_highreg_0:
 	entry a1, 16
 	j _test_highreg_end
 .global test_highreg_4
 .align 4
 test_highreg_4:
 	entry a1, 16
 	movi a4, 4
 	movi a5, 5
 	movi a6, 6
 	movi a7, 7
 	j _test_highreg_end
 .global test_highreg_8
 .align 4
 test_highreg_8:
 	entry a1, 16
 	movi a4, 4
 	movi a5, 5
 	movi a6, 6
 	movi a7, 7
 	movi a8, 8
 	movi a9, 9
 	movi a10, 10
 	movi a11, 11
 	j _test_highreg_end
 .global test_highreg_12
 .align 4
 test_highreg_12:
 	entry a1, 16
 	movi a4, 4
 	movi a5, 5
 	movi a6, 6
 	movi a7, 7
 	movi a8, 8
 	movi a9, 9
 	movi a10, 10
 	movi a11, 11
 	movi a12, 12
 	movi a13, 13
 	movi a14, 14
 	movi a15, 15
 	j _test_highreg_end
 /* Loads a pointer into A1 to serve as a "save stack" that can be
 * inspected by the caller, does the save, then restores and returns,
 * placing the output stack pointer "test_highreg_handle" for
 * inspection.
 */
 .align 4
 _test_highreg_end:
 	movi a2, _test_highreg_a0_save
 	s32i a0, a2, 0
 	movi a2, _test_highreg_sp_save
 	s32i a1, a2, 0
 	/* Do it once just to make sure the restore code works */
 	call0 xtensa_save_high_regs
 	movi a2, 22
 	movi a3, 33
 	call0 xtensa_restore_high_regs
 	movi a2, test_highreg_sp_top
 	l32i a1, a2, 0
 	call0 xtensa_save_high_regs
 	movi a2, test_highreg_handle
 	s32i a1, a2, 0
 	movi a2, _test_highreg_sp_save
 	l32i a1, a2, 0
 	movi a2, _test_highreg_a0_save
 	l32i a0, a2, 0
 	retw
 .global testfw
 .align 4
 testfw:
 	entry a1, 16
 	movi a2, testfw_wb
 	rsr.WINDOWBASE a3
 	s32i a3, a2, 0
 	movi a2, testfw_ws
 	rsr.WINDOWSTART a3
 	s32i a3, a2, 0
 	retw
 /* Does a "jump" to a symbol named "rfi_jump_c" using RFI. */
 .global rfi_jump
 .align 4
 rfi_jump:
 #if 1
 	movi a2, rfi_jump_c
 	wsr.EPC6 a2
 	rsr.PS a2
 	wsr.EPS6 a2
 	rsync
 	rfi 6
 #else
 	movi a2, rfi_jump_c
 	jx a2
 #endif
 .global do_xstack_call
 .align 4
 do_xstack_call:
 	entry a1, 16
 	mov a3, a2  /* a3 == "new sp" (this function's 1st argument) */
 	movi a2, xstack_top /* a2 == cross-stack callee/handler */
 	/* Fake a save frame, CROSS_STACK_CALL just wants the old SP
 	 * from it, we don't need to fill it.  Only one available
 	 * register, so it uses the bottom slot of the "fake BSA" as
 	 * scratch.
 	 */
 	addi a1, a1, -BASE_SAVE_AREA_SIZE
 	s32i a1, a1, 0
 	addi a1, a1, -4
 	l32i a1, a1, 4
 	s32i a1, a1, 0
 	CROSS_STACK_CALL
 	/* Restore the stack */
 	l32i a1, a1, 0
 	addi a1, a1, BASE_SAVE_AREA_SIZE
 	retw
 /* Define our exception handler.  Offsets written to assume:
 *     struct { int nest; void *stack_top; }
 */
 .align 4
 _handle_excint:
 	EXCINT_HANDLER MISC0, 0, 4
 /* And a single vector at level 5 to point to it and call our C
 * handler.  There is a timer on most cores (qemu and LX6/ESP-32 at
 * least) that can be used for unit testing.
 */
 DEF_EXCINT 5, _handle_excint, handle_int5_c
--- a/samples/xtensa_asm2/src/main.c
+++ b/samples/xtensa_asm2/src/main.c
@ -0,0 +1,539 @@
 /*
 * Copyright (c) 2017, Intel Corporation
 *
 * SPDX-License-Identifier: Apache-2.0
 */
 #include <zephyr.h>
 #include <string.h>
 #include <misc/printk.h>
 #include <xtensa-asm2.h>
 #ifdef CONFIG_MULTITHREADING
 #error Disable multithreading for this unit test!
 #endif
 /* Just random numbers intended to whiten the register contents during
 * the spill test and make every bit of every register in every call
 * significant in an attempt to catch any mistakes/swaps/etc...
 */
 int white[] = {
 	       0x5fad484a,
 	       0xc23e88f7,
 	       0xfff301fb,
 	       0xf1189ba7,
 	       0x88bffad6,
 	       0xaabb96fa,
 	       0x629619d5,
 	       0x246bee82
 };
 static inline unsigned int ccount(void)
 {
 	unsigned int cc;
 	__asm__ volatile("rsr.ccount %0" : "=r"(cc));
 	return cc;
 }
 /* We call spill_fn() through a pointer to prevent the compiler from
 * detecting and optimizing out the tail recursion in fn() and forcing
 * a real function call using CALLn instructions.
 */
 int (*spill_fnp)(int level, int a, int b, int c);
 /* WINDOWBASE/WINDOWSTART registers tested before and after the spill */
 unsigned int spill_wb0, spill_ws0, spill_wb1, spill_ws1;
 /* Test start/end values for CCOUNT */
 unsigned int spill_start, spill_end;
 /* Validated result for spill_fn() */
 int spill_expect;
 enum {
 	NO_SPILL, HAL_SPILL, ZEPHYR_SPILL, NUM_MODES
 } spill_mode;
 static int spill_fn(int level, int a, int b, int c)
 {
 	/* Be very careful when debugging, note that a printk() call
 	 * tends to push all the registers out of the windows on its
 	 * own, leaving no frames for us to test against!
 	 */
 	if (level >= ARRAY_SIZE(white)) {
 		__asm__ volatile ("rsr.WINDOWBASE %0" : "=r"(spill_wb0));
 		__asm__ volatile ("rsr.WINDOWSTART %0" : "=r"(spill_ws0));
 		spill_start = ccount();
 		if (spill_mode == NO_SPILL) {
 			/* Just here to test the cycle count overhead
 			 * and get the baseline function result.
 			 */
 		} else if (spill_mode == ZEPHYR_SPILL) {
 			/* FIXME: the a0_save hack should be needless.  It
 			 * *should* be enough to list "a0" in the clobber list
 			 * of the __asm__ statement (and let the compiler
 			 * decide on how to save the value), but that's not
 			 * working for me...
 			 */
 			int a0_save;
 			__asm__ volatile
 				("mov %0, a0"			"\n\t"
 				 "call0 spill_reg_windows"	"\n\t"
 				 "mov a0, %0"			"\n\t"
 				 : "=r"(a0_save));
 		} else if (spill_mode == HAL_SPILL) {
 			/* Strictly there is a xthal_window_spill_nw
 			 * routine that is called with special setup
 			 * (use CALL0, spill A2/A3, clear WOE) and
 			 * supposed to be faster, but I couldn't make
 			 * that work.
 			 */
 			extern void xthal_window_spill(void);
 			xthal_window_spill();
 		}
 		spill_end = ccount();
 		__asm__ volatile ("rsr.WINDOWBASE %0" : "=r" (spill_wb1));
 		__asm__ volatile ("rsr.WINDOWSTART %0" : "=r" (spill_ws1));
 		return ((a + b) | c);
 	}
 	int val1 = (a - (b & c)) ^ white[level];
 	int val2 = ((a | b) + c) ^ white[(level + 1) % ARRAY_SIZE(white)];
 	int val3 = (a - (b - c)) ^ white[(level + 2) % ARRAY_SIZE(white)];
 	int x = spill_fnp(level+1, val1, val2, val3);
 	/* FIXME: as it happens, the compiler seems not to be
 	 * optimizing components of this addition before the function
 	 * call, which is what we want: the desire is that the
 	 * individual values be held in registers across the call so
 	 * that they can be checked to have been spilled/filled
 	 * properly as we return up the stack.  But the compiler
 	 * certainly COULD reorder this addition (it would actually be
 	 * a good optimization: you could reduce the number of
 	 * registers used before the tail return and use a smaller
 	 * call frame).  For now, I'm happy enough simply having read
 	 * the generated code, but long term this should be a more
 	 * robust test if possible.  Maybe write the values to some
 	 * extern volatile spots...
 	 */
 	return x + val1 + val2 + val3 + a + b + c;
 }
 int test_reg_spill(void)
 {
 	spill_fnp = spill_fn;
 	int ok = 1;
 	for (spill_mode = 0; spill_mode < NUM_MODES; spill_mode++) {
 		printk("Testing %s\n",
 		       spill_mode == NO_SPILL ? "NO_SPILL"
 		       : (spill_mode == HAL_SPILL ? "HAL_SPILL"
 			  : "ZEPHYR_SPILL"));
 		int result = spill_fnp(0, 1, 2, 3);
 		printk("  WINDOWBASE %d -> %d, WINDOWSTART 0x%x -> 0x%x (%d cycles)\n",
 		       spill_wb0, spill_wb1, spill_ws0, spill_ws1,
 		       spill_end - spill_start);
 		if (spill_mode == NO_SPILL) {
 			spill_expect = result;
 			continue;
 		}
 		if (spill_ws1 != 1 << spill_wb1) {
 			printk("WINDOWSTART should show exactly one frame at WINDOWBASE\n");
 			ok = 0;
 		}
 		if (result != spill_expect) {
 			printk("Unexpected fn(1, 2, 3) result, got %d want %d\n",
 			       result, spill_expect);
 			ok = 0;
 		}
 	}
 	return ok;
 }
 int *test_highreg_handle;
 /* Simple save locations for some context needed by the test assembly */
 void *_test_highreg_sp_save;
 void *_test_highreg_a0_save;
 int test_highreg_stack[64];
 int *test_highreg_sp_top = &test_highreg_stack[ARRAY_SIZE(test_highreg_stack)];
 /* External function, defined in assembly */
 void fill_window(void (*fn)(void));
 /* Test rig for fill_window, maybe remove as a metatest */
 int testfw_wb, testfw_ws;
 void testfw(void);
 /* Assembly-defined leaf functions for fill_window which poke the
 * specified number of high GPRs before calling xtensa_save_high_regs
 * to spill them into the test_highreg_stack area for inspection.
 */
 void test_highreg_0(void);
 void test_highreg_4(void);
 void test_highreg_8(void);
 void test_highreg_12(void);
 typedef void (*test_fn_t)(void);
 test_fn_t highreg_tests[] = {
 	test_highreg_0,
 	test_highreg_4,
 	test_highreg_8,
 	test_highreg_12,
 };
 int test_highreg_save(void)
 {
 	int ok = 1;
 	fill_window(testfw);
 	printk("testfw wb %d ws 0x%x\n", testfw_wb, testfw_ws);
 	ok = ok && (testfw_ws == ((1 << (XCHAL_NUM_AREGS / 4)) - 1));
 	for (int i = 0; i < ARRAY_SIZE(highreg_tests); i++) {
 		printk("\nHighreg test %d\n", i);
 		fill_window(highreg_tests[i]);
 		ok = ok && (*test_highreg_handle == (int)test_highreg_sp_top);
 		int spilled_words = test_highreg_sp_top - test_highreg_handle;
 		for (int quad = 0; ok && quad < (spilled_words - 1)/4; quad++) {
 			int *qbase = test_highreg_sp_top - (quad + 1) * 4;
 			for (int ri = 0; ri < 4; ri++) {
 				int reg = 4 + quad * 4 + ri;
 				ok = ok && (qbase[ri] == reg);
 				printk("  q %d reg %d qb[%d] %d\n",
 				       quad, reg, ri, qbase[ri]);
 			}
 		}
 	}
 	return ok;
 }
 void *switch_handle0, *switch_handle1;
 void xtensa_switch(void *handle, void **old_handle);
 void test_switch_bounce(void);
 __asm__("test_switch_bounce:"	"\n\t"
 	"call4 test_switch_top"	"\n\t");
 volatile int switch_count;
 /* Sits in a loop switching back to handle0 (which is the main thread) */
 void test_switch_top(void)
 {
 	int n = 1;
 	while (1) {
 		switch_count = n++;
 		xtensa_switch(switch_handle0, &switch_handle1);
 	}
 }
 int test_switch(void)
 {
 	static int stack2[512];
 	printk("%s\n", __func__);
 	memset(stack2, 0, sizeof(stack2));
 	int *sp = xtensa_init_stack(&stack2[ARRAY_SIZE(stack2)],
 				    (void *)test_switch_bounce,
 				    0, 0, 0);
 #if 0
 	/* DEBUG: dump the stack contents for manual inspection */
 	for (int i = 0; i < 64; i++) {
 		int idx = ARRAY_SIZE(stack2) - (i+1);
 		int off = (i+1) * -4;
 		int *addr = &stack2[idx];
 		if (addr < sp) {
 			break;
 		}
 		printk("%p (%d): 0x%x\n", addr, off, stack2[idx]);
 	}
 	printk("sp: %p\n", sp);
 #endif
 	switch_handle1 = sp;
 	const int n_switch = 10;
 	for (int i = 0; i < n_switch; i++) {
 		xtensa_switch(switch_handle1, &switch_handle0);
 		/* printk("switch %d count %d\n", i, switch_count); */
 	}
 	return switch_count == n_switch;
 }
 void rfi_jump(void);
 void rfi_jump_c(void)
 {
 	int ps;
 	__asm__ volatile ("rsr.PS %0" : "=r"(ps));
 	printk("%s, PS = %xh\n", __func__, ps);
 }
 int xstack_ok;
 #define XSTACK_SIZE 1024
 #define XSTACK_CANARY 0x5a5aa5a5
 static int xstack_stack2[XSTACK_SIZE + 1];
 void do_xstack_call(void *new_stack); /* in asmhelp.S */
 void xstack_bottom(void)
 {
 	xstack_ok = 1;
 }
 void xstack_top(void)
 {
 	int on_my_stack;
 	printk("%s oms %p\n", __func__, &on_my_stack);
 	/* Do this via fill_window() to be absolutely sure the whole
 	 * call stack across both physical stacks got spilled and
 	 * filled properly.
 	 */
 	fill_window(xstack_bottom);
 }
 int test_xstack(void)
 {
 	/* Make the stack one element big and put a canary above it to
 	 * check nothing underflows
 	 */
 	int *new_stack = &xstack_stack2[XSTACK_SIZE];
 	*new_stack = XSTACK_CANARY;
 	printk("%s new_stack = %p\n", __func__, new_stack);
 	do_xstack_call(new_stack);
 	printk("xstack_ok %d stack2[%d] 0x%x\n",
 	       xstack_ok, XSTACK_SIZE, xstack_stack2[XSTACK_SIZE]);
 	return xstack_ok && xstack_stack2[XSTACK_SIZE] == XSTACK_CANARY;
 }
 #ifdef CONFIG_SOC_ESP32
 #define TIMER_INT 16
 #else
 #define TIMER_INT 13
 #endif
 volatile int timer2_fired;
 int excint_stack[8192];
 void *excint_stack_top = &excint_stack[ARRAY_SIZE(excint_stack)];
 static struct { int nest; void *stack_top; } excint_cpu;
 volatile int int5_result;
 void disable_timer(void)
 {
 	int ie;
 	__asm__ volatile("rsr.intenable %0" : "=r"(ie));
 	ie &= ~(1<<TIMER_INT);
 	__asm__ volatile("wsr.intenable %0; rsync" : : "r"(ie));
 }
 void enable_timer(void)
 {
 	int ie;
 	__asm__ volatile("rsr.intenable %0" : "=r"(ie));
 	ie |= (1<<TIMER_INT);
 	__asm__ volatile("wsr.intenable %0; rsync" : : "r"(ie));
 }
 void *handle_int5_c(void *handle)
 {
 	int5_result = spill_fnp(0, 3, 2, 1);
 	int ccompare2_val = ccount() - 1;
 	__asm__ volatile("wsr.ccompare2 %0; rsync" : : "r"(ccompare2_val));
 	disable_timer();
 	timer2_fired = 1;
 	return handle;
 }
 int interrupt_test(void)
 {
 	int ok = 1;
 	excint_cpu.nest = 0;
 	excint_cpu.stack_top = &excint_stack[ARRAY_SIZE(excint_stack)];
 	void *cpuptr = &excint_cpu;
 	__asm__ volatile("wsr.MISC0 %0" : : "r"(cpuptr));
 	/* We reuse the "spill_fn" logic from above to get a
 	 * stack-sensitive, deeply-recursive computation going that
 	 * will be sensitive to interrupt bugs
 	 */
 	spill_mode = NO_SPILL;
 	unsigned int start = ccount();
 	int expect = spill_fnp(0, 3, 2, 1);
 	unsigned int spill_time = ccount() - start;
 	/* Ten thousand iterations is still pretty quick */
 	for (int i = 0; i < 10000; i++) {
 		int nest = i & 1;
 		excint_cpu.nest = nest;
 		timer2_fired = 0;
 		/* Vaguely random delay between 2-8 iterations of
 		 * spill_fn().  Maybe improve with a real PRNG.
 		 */
 		const int max_reps = 8;
 		int wh = white[i % ARRAY_SIZE(white)];
 		int delay = 2*spill_time
 			+ ((wh * (i+1)) % (spill_time * (max_reps - 2)));
 		int alarm = ccount() + delay;
 		__asm__ volatile("wsr.ccompare2 %0; rsync" : : "r"(alarm));
 		enable_timer();
 #if 0
 		/* This is what I want to test: run the spill_fn test
 		 * repeatedly in the main thread so that it can be
 		 * interrupted and restored, and validate that it
 		 * returns the same result every time.  But this can't
 		 * work, even in principle: the timer interrupt we are
 		 * using is "high priority", which means that it can
 		 * interrupt the window exceptions being thrown in the
 		 * main thread.  And by design, Xtensa window
 		 * exceptions CANNOT be made reentrant (they don't
 		 * save the interrupted state, so can be interrupted
 		 * again before they can mask off exceptions, which
 		 * will then lose/clobber the OWB field in PS when the
 		 * interrupt handler throws another window exception).
 		 * So this doesn't work, in fact it fails every 2-10
 		 * iterations as spill_fn spends a lot of its time
 		 * spill/filling stack frames (by design, of course).
 		 *
 		 * This could be made to work if we could repurpose
 		 * the existing medium priority timer interrupt (which
 		 * is hard in a unit test: that's an important
 		 * interrupt!) or use the low priority timer which
 		 * delivers to the global exception handler (basically
 		 * impossible in a unit test).  Frustrating.
 		 */
 		int reps = 0;
 		while (!timer2_fired && reps < (max_reps+2)) {
 			int result = spill_fnp(0, 3, 2, 1);
 			reps++;
 			if (result != expect) {
 				ok = 0;
 			}
 		}
 		if (reps >= max_reps+2) {
 			printk("Interrupt didn't arrive\n");
 			ok = 0;
 		}
 		if (int5_result != expect) {
 			printk("Unexpected int spill_fn() result\n");
 			ok = 0;
 		}
 		printk("INT test delay %d nest %d reps %d\n",
 		       delay, nest, reps);
 #else
 		/* So this is what we do instead: just spin in the
 		 * main thread calling functions that don't involve
 		 * exceptions.  By experiment, calling spill_fn with a
 		 * first (depth) argument of 6 or 7 results in a
 		 * shallow call tree that won't throw exepctions.  At
 		 * least we're executing real code which depends on
 		 * its register state and validating that interrupts
 		 * don't hurt.
 		 */
 		volatile int dummy = 1;
 		while (!timer2_fired) {
 			dummy = spill_fnp(6, dummy, 2, 3);
 		}
 		if (int5_result != expect) {
 			printk("Unexpected int spill_fn() result\n");
 			ok = 0;
 		}
 #endif
 	}
 	return ok;
 }
 void main(void)
 {
 	/* Turn off interrupts and leave disabled, otherwise the
 	 * "userspace" context switching tests might not be reliable.
 	 * Stack pointers can exist in indeterminate states here.
 	 * (Note: the interrupt test below is using a high priority
 	 * interrupt which is not masked by irq_lock(), so it doesn't
 	 * care).
 	 */
 	int key = irq_lock();
 	/* Strictly not a "test", we just want to know that the jump
 	 * worked.  If the rest of the code runs, this must have
 	 * "passed".
 	 */
 	rfi_jump();
 	int ok = 1;
 	ok = ok && test_reg_spill();
 	ok = ok && test_highreg_save();
 	ok = ok && test_switch();
 	ok = ok && test_xstack();
 	ok = ok && interrupt_test();
 	irq_unlock(key);
 	printk("%s\n", ok ? "OK" : "Failed");
 }