zephyr/arch/nios2/core/swap.S

195 lines
5.2 KiB
ArmAsm
Raw Normal View History

/*
* Copyright (c) 2016 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
#include <kernel_structs.h>
#include <offsets_short.h>
/* exports */
GTEXT(__swap)
GTEXT(z_thread_entry_wrapper)
/* imports */
GTEXT(sys_trace_thread_switched_in)
GTEXT(_k_neg_eagain)
/* unsigned int __swap(unsigned int key)
*
* Always called with interrupts locked
*/
SECTION_FUNC(exception.other, __swap)
#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Get a reference to _kernel in r10 */
movhi r10, %hi(_kernel)
ori r10, r10, %lo(_kernel)
/* Get the pointer to kernel->current */
ldw r11, _kernel_offset_to_current(r10)
stw r2, _thread_offset_to_r16(r11)
stw r3, _thread_offset_to_r17(r11)
stw r4, _thread_offset_to_r18(r11)
stw ra, _thread_offset_to_ra(r11)
stw sp, _thread_offset_to_sp(r11)
call read_timer_start_of_swap
/* Get a reference to _kernel in r10 */
movhi r10, %hi(_kernel)
ori r10, r10, %lo(_kernel)
/* Get the pointer to kernel->current */
ldw r11, _kernel_offset_to_current(r10)
ldw r2, _thread_offset_to_r16(r11)
ldw r3, _thread_offset_to_r17(r11)
ldw r4, _thread_offset_to_r18(r11)
ldw ra, _thread_offset_to_ra(r11)
ldw sp, _thread_offset_to_sp(r11)
#endif
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
/* Get a reference to _kernel in r10 */
movhi r10, %hi(_kernel)
ori r10, r10, %lo(_kernel)
/* Get the pointer to kernel->current */
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
ldw r11, _kernel_offset_to_current(r10)
/* Store all the callee saved registers. We either got here via
* an exception or from a cooperative invocation of __swap() from C
* domain, so all the caller-saved registers have already been
* saved by the exception asm or the calling C code already.
*/
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
stw r16, _thread_offset_to_r16(r11)
stw r17, _thread_offset_to_r17(r11)
stw r18, _thread_offset_to_r18(r11)
stw r19, _thread_offset_to_r19(r11)
stw r20, _thread_offset_to_r20(r11)
stw r21, _thread_offset_to_r21(r11)
stw r22, _thread_offset_to_r22(r11)
stw r23, _thread_offset_to_r23(r11)
stw r28, _thread_offset_to_r28(r11)
stw ra, _thread_offset_to_ra(r11)
stw sp, _thread_offset_to_sp(r11)
/* r4 has the 'key' argument which is the result of irq_lock()
* before this was called
*/
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
stw r4, _thread_offset_to_key(r11)
/* Populate default return value */
movhi r5, %hi(_k_neg_eagain)
ori r5, r5, %lo(_k_neg_eagain)
ldw r4, (r5)
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
stw r4, _thread_offset_to_retval(r11)
#if CONFIG_TRACING
call sys_trace_thread_switched_in
/* restore caller-saved r10 */
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
movhi r10, %hi(_kernel)
ori r10, r10, %lo(_kernel)
#endif
kernel/arch: enhance the "ready thread" cache The way the ready thread cache was implemented caused it to not always be "hot", i.e. there could be some misses, which happened when the cached thread was taken out of the ready queue. When that happened, it was not replaced immediately, since doing so could mean that the replacement might not run because the flow could be interrupted and another thread could take its place. This was the more conservative approach that insured that moving a thread to the cache would never be wasted. However, this caused two problems: 1. The cache could not be refilled until another thread context-switched in, since there was no thread in the cache to compare priorities against. 2. Interrupt exit code would always have to call into C to find what thread to run when the current thread was not coop and did not have the scheduler locked. Furthermore, it was possible for this code path to encounter a cold cache and then it had to find out what thread to run the long way. To fix this, filling the cache is now more aggressive, i.e. the next thread to put in the cache is found even in the case the current cached thread is context-switched out. This ensures the interrupt exit code is much faster on the slow path. In addition, since finding the next thread to run is now always "get it from the cache", which is a simple fetch from memory (_kernel.ready_q.cache), there is no need to call the more complex C code. On the ARM FRDM K64F board, this improvement is seen: Before: 1- Measure time to switch from ISR back to interrupted task switching time is 215 tcs = 1791 nsec 2- Measure time from ISR to executing a different task (rescheduled) switch time is 315 tcs = 2625 nsec After: 1- Measure time to switch from ISR back to interrupted task switching time is 130 tcs = 1083 nsec 2- Measure time from ISR to executing a different task (rescheduled) switch time is 225 tcs = 1875 nsec These are the most dramatic improvements, but most of the numbers generated by the latency_measure test are improved. Fixes ZEP-1401. Change-Id: I2eaac147048b1ec71a93bd0a285e743a39533973 Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-12-02 10:37:27 -05:00
/* get cached thread to run */
ldw r2, _kernel_offset_to_ready_q_cache(r10)
/* At this point r2 points to the next thread to be swapped in */
kernel/arch: enhance the "ready thread" cache The way the ready thread cache was implemented caused it to not always be "hot", i.e. there could be some misses, which happened when the cached thread was taken out of the ready queue. When that happened, it was not replaced immediately, since doing so could mean that the replacement might not run because the flow could be interrupted and another thread could take its place. This was the more conservative approach that insured that moving a thread to the cache would never be wasted. However, this caused two problems: 1. The cache could not be refilled until another thread context-switched in, since there was no thread in the cache to compare priorities against. 2. Interrupt exit code would always have to call into C to find what thread to run when the current thread was not coop and did not have the scheduler locked. Furthermore, it was possible for this code path to encounter a cold cache and then it had to find out what thread to run the long way. To fix this, filling the cache is now more aggressive, i.e. the next thread to put in the cache is found even in the case the current cached thread is context-switched out. This ensures the interrupt exit code is much faster on the slow path. In addition, since finding the next thread to run is now always "get it from the cache", which is a simple fetch from memory (_kernel.ready_q.cache), there is no need to call the more complex C code. On the ARM FRDM K64F board, this improvement is seen: Before: 1- Measure time to switch from ISR back to interrupted task switching time is 215 tcs = 1791 nsec 2- Measure time from ISR to executing a different task (rescheduled) switch time is 315 tcs = 2625 nsec After: 1- Measure time to switch from ISR back to interrupted task switching time is 130 tcs = 1083 nsec 2- Measure time from ISR to executing a different task (rescheduled) switch time is 225 tcs = 1875 nsec These are the most dramatic improvements, but most of the numbers generated by the latency_measure test are improved. Fixes ZEP-1401. Change-Id: I2eaac147048b1ec71a93bd0a285e743a39533973 Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-12-02 10:37:27 -05:00
/* the thread to be swapped in is now the current thread */
stw r2, _kernel_offset_to_current(r10)
/* Restore callee-saved registers and switch to the incoming
* thread's stack
*/
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
ldw r16, _thread_offset_to_r16(r2)
ldw r17, _thread_offset_to_r17(r2)
ldw r18, _thread_offset_to_r18(r2)
ldw r19, _thread_offset_to_r19(r2)
ldw r20, _thread_offset_to_r20(r2)
ldw r21, _thread_offset_to_r21(r2)
ldw r22, _thread_offset_to_r22(r2)
ldw r23, _thread_offset_to_r23(r2)
ldw r28, _thread_offset_to_r28(r2)
ldw ra, _thread_offset_to_ra(r2)
ldw sp, _thread_offset_to_sp(r2)
/* We need to irq_unlock(current->coopReg.key);
* key was supplied as argument to __swap(). Fetch it.
*/
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
ldw r3, _thread_offset_to_key(r2)
/*
* Load return value into r2 (return value register). -EAGAIN unless
* someone previously called z_arch_thread_return_value_set(). Do this before
* we potentially unlock interrupts.
*/
kernel/arch: consolidate tTCS and TNANO definitions There was a lot of duplication between architectures for the definition of threads and the "nanokernel" guts. These have been consolidated. Now, a common file kernel/unified/include/kernel_structs.h holds the common definitions. Architectures provide two files to complement it: kernel_arch_data.h and kernel_arch_func.h. The first one contains at least the struct _thread_arch and struct _kernel_arch data structures, as well as the struct _callee_saved and struct _caller_saved register layouts. The second file contains anything that needs what is provided by the common stuff in kernel_structs.h. Those two files are only meant to be included in kernel_structs.h in very specific locations. The thread data structure has been separated into three major parts: common struct _thread_base and struct k_thread, and arch-specific struct _thread_arch. The first and third ones are included in the second. The struct s_NANO data structure has been split into two: common struct _kernel and arch-specific struct _kernel_arch. The latter is included in the former. Offsets files have also changed: nano_offsets.h has been renamed kernel_offsets.h and is still included by the arch-specific offsets.c. Also, since the thread and kernel data structures are now made of sub-structures, offsets have to be added to make up the full offset. Some of these additions have been consolidated in shorter symbols, available from kernel/unified/include/offsets_short.h, which includes an arch-specific offsets_arch_short.h. Most of the code include offsets_short.h now instead of offsets.h. Change-Id: I084645cb7e6db8db69aeaaf162963fe157045d5a Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
2016-11-08 10:36:50 -05:00
ldw r2, _thread_offset_to_retval(r2)
/* Now do irq_unlock(current->coopReg.key) */
#if (ALT_CPU_NUM_OF_SHADOW_REG_SETS > 0) || \
(defined ALT_CPU_EIC_PRESENT) || \
(defined ALT_CPU_MMU_PRESENT) || \
(defined ALT_CPU_MPU_PRESENT)
andi r3, r3, NIOS2_STATUS_PIE_MSK
beq r3, zero, no_unlock
rdctl r3, status
ori r3, r3, NIOS2_STATUS_PIE_MSK
wrctl status, r3
no_unlock:
#else
wrctl status, r3
#endif
#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Get a reference to _kernel in r10 */
movhi r10, %hi(_kernel)
ori r10, r10, %lo(_kernel)
ldw r11, _kernel_offset_to_current(r10)
stw r2, _thread_offset_to_r16(r11)
stw r3, _thread_offset_to_r17(r11)
stw r4, _thread_offset_to_r18(r11)
stw ra, _thread_offset_to_ra(r11)
stw sp, _thread_offset_to_sp(r11)
call read_timer_end_of_swap
/* Get a reference to _kernel in r10 */
movhi r10, %hi(_kernel)
ori r10, r10, %lo(_kernel)
/* Get the pointer to kernel->current */
ldw r11, _kernel_offset_to_current(r10)
ldw r2, _thread_offset_to_r16(r11)
ldw r3, _thread_offset_to_r17(r11)
ldw r4, _thread_offset_to_r18(r11)
ldw ra, _thread_offset_to_ra(r11)
ldw sp, _thread_offset_to_sp(r11)
#endif
ret
/* void z_thread_entry_wrapper(void)
*/
SECTION_FUNC(TEXT, z_thread_entry_wrapper)
/* This all corresponds to struct init_stack_frame defined in
* thread.c. We need to take this stuff off the stack and put
* it in the appropriate registers
*/
/* Can't return from here, just put NULL in ra */
movi ra, 0
/* Calling convention has first 4 arguments in registers r4-r7. */
ldw r4, 0(sp)
ldw r5, 4(sp)
ldw r6, 8(sp)
ldw r7, 12(sp)
/* pop all the stuff that we just loaded into registers */
addi sp, sp, 16
call z_thread_entry