From e11be42558b3f15cc74af2315b34c8a931709a72 Mon Sep 17 00:00:00 2001 From: Wayne Ren Date: Thu, 25 Jul 2019 12:13:13 +0800 Subject: [PATCH] arch: arc: add initial support of SMP * modify the reset flow for SMP * add smp related initialization * implement ipi related functions * implement thread switch in isr/exception Signed-off-by: Wayne Ren --- arch/arc/core/CMakeLists.txt | 1 + arch/arc/core/arc_smp.c | 174 ++++++++++++++++++++++++++++ arch/arc/core/fast_irq.S | 37 +++++- arch/arc/core/fault_s.S | 24 +++- arch/arc/core/prep_c.c | 33 ------ arch/arc/core/regular_irq.S | 17 ++- arch/arc/core/reset.S | 34 ++++++ arch/arc/include/kernel_arch_func.h | 18 +++ arch/arc/include/swap_macros.h | 11 ++ arch/arc/include/v2/irq.h | 3 - 10 files changed, 305 insertions(+), 47 deletions(-) create mode 100644 arch/arc/core/arc_smp.c diff --git a/arch/arc/core/CMakeLists.txt b/arch/arc/core/CMakeLists.txt index 199d015afa8..8d6990fd6d2 100644 --- a/arch/arc/core/CMakeLists.txt +++ b/arch/arc/core/CMakeLists.txt @@ -26,3 +26,4 @@ zephyr_library_sources_if_kconfig(irq_offload.c) add_subdirectory_ifdef(CONFIG_ARC_CORE_MPU mpu) zephyr_library_sources_ifdef(CONFIG_USERSPACE userspace.S) zephyr_library_sources_ifdef(CONFIG_ARC_CONNECT arc_connect.c) +zephyr_library_sources_ifdef(CONFIG_SMP arc_smp.c) diff --git a/arch/arc/core/arc_smp.c b/arch/arc/core/arc_smp.c new file mode 100644 index 00000000000..8676c328b01 --- /dev/null +++ b/arch/arc/core/arc_smp.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2019 Synopsys. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file + * @brief codes required for ARC smp support + * + */ +#include +#include +#include +#include +#include +#include + + +#ifndef IRQ_ICI +#define IRQ_ICI 19 +#endif + +#define ARCV2_ICI_IRQ_PRIORITY 1 + +static void sched_ipi_handler(void *unused) +{ + ARG_UNUSED(unused); + + z_arc_connect_ici_clear(); + z_sched_ipi(); +} + +/** + * @brief Check whether need to do thread switch in isr context + * + * @details u64_t is used to let compiler use (r0, r1) as return register. + * use register r0 and register r1 as return value, r0 has + * new thread, r1 has old thread. If r0 == 0, it means no thread switch. + */ +u64_t z_arch_smp_switch_in_isr(void) +{ + u64_t ret = 0; + u32_t new_thread; + u32_t old_thread; + + if (!_current_cpu->swap_ok) { + return 0; + } + + old_thread = (u32_t)_current; + + new_thread = (u32_t)z_get_next_ready_thread(); + + if (new_thread != old_thread) { + _current_cpu->swap_ok = 0; + ((struct k_thread *)new_thread)->base.cpu = + z_arch_curr_cpu()->id; + _current = (struct k_thread *) new_thread; + ret = new_thread | ((u64_t)(old_thread) << 32); + } + + return ret; +} + +volatile struct { + void (*fn)(int, void*); + void *arg; +} arc_cpu_init[CONFIG_MP_NUM_CPUS]; + +/* + * arc_cpu_wake_flag is used to sync up master core and slave cores + * Slave core will spin for arc_cpu_wake_flag until master core sets + * it to the core id of slave core. Then, slave core clears it to notify + * master core that it's waken + * + */ +volatile u32_t arc_cpu_wake_flag; +/* + * _curr_irq_stack is used to record the irq stack pointer + * of per_cpu. _kernel.cpus[CONFIG_MP_NUM_CPUS].irq_stack also + * has a copy of irq stack pointer, but not efficient to use in assembly + */ +volatile u32_t _curr_irq_stack[CONFIG_MP_NUM_CPUS]; + +/* Called from Zephyr initialization */ +void z_arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz, + void (*fn)(int, void *), void *arg) +{ + + _curr_irq_stack[cpu_num] = (u32_t)(sz + (char *)stack); + arc_cpu_init[cpu_num].fn = fn; + arc_cpu_init[cpu_num].arg = arg; + + arc_cpu_wake_flag = cpu_num; + + /* wait slave cpu to start */ + while (arc_cpu_wake_flag != 0) { + ; + } +} + +/* the C entry of slave cores */ +void z_arch_slave_start(int cpu_num) +{ + void (*fn)(int, void*); + + z_icache_setup(); + z_irq_setup(); + + z_irq_priority_set(IRQ_ICI, ARCV2_ICI_IRQ_PRIORITY, 0); + irq_enable(IRQ_ICI); + + /* call the function set by z_arch_start_cpu */ + fn = arc_cpu_init[cpu_num].fn; + + fn(cpu_num, arc_cpu_init[cpu_num].arg); +} + +/* arch implementation of sched_ipi */ +void z_arch_sched_ipi(void) +{ + u32_t i; + + /* broadcast sched_ipi request to all cores + * if the target is current core, hardware will ignore it + */ + for (i = 0; i < CONFIG_MP_NUM_CPUS; i++) { + z_arc_connect_ici_generate(i); + } +} + +static int arc_smp_init(struct device *dev) +{ + ARG_UNUSED(dev); + struct arc_connect_bcr bcr; + + /* necessary master core init */ + _kernel.cpus[0].id = 0; + _kernel.cpus[0].irq_stack = Z_THREAD_STACK_BUFFER(_interrupt_stack) + + CONFIG_ISR_STACK_SIZE; + _curr_irq_stack[0] = (u32_t)(_kernel.cpus[0].irq_stack); + + bcr.val = z_arc_v2_aux_reg_read(_ARC_V2_CONNECT_BCR); + + if (bcr.ipi) { + /* register ici interrupt, just need master core to register once */ + IRQ_CONNECT(IRQ_ICI, ARCV2_ICI_IRQ_PRIORITY, + sched_ipi_handler, NULL, 0); + + irq_enable(IRQ_ICI); + } else { + __ASSERT(0, + "ARC connect has no inter-core interrupt\n"); + return -ENODEV; + } + + if (bcr.gfrc) { + /* global free running count init */ + z_arc_connect_gfrc_enable(); + + /* when all cores halt, gfrc halt */ + z_arc_connect_gfrc_core_set((1 << CONFIG_MP_NUM_CPUS) - 1); + z_arc_connect_gfrc_clear(); + } else { + __ASSERT(0, + "ARC connect has no global free running counter\n"); + return -ENODEV; + } + + return 0; +} + +SYS_INIT(arc_smp_init, PRE_KERNEL_1, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT); diff --git a/arch/arc/core/fast_irq.S b/arch/arc/core/fast_irq.S index 0011bbf6754..029899067e6 100644 --- a/arch/arc/core/fast_irq.S +++ b/arch/arc/core/fast_irq.S @@ -83,8 +83,7 @@ SECTION_FUNC(TEXT, _firq_enter) bne.d firq_nest mov r0, sp - mov r1, _kernel - ld sp, [r1, _kernel_offset_to_irq_stack] + _get_curr_cpu_irq_stack sp #if CONFIG_RGF_NUM_BANKS != 1 b firq_nest_1 firq_nest: @@ -152,13 +151,18 @@ SECTION_FUNC(TEXT, _firq_exit) #ifdef CONFIG_PREEMPT_ENABLED +#ifdef CONFIG_SMP + bl z_arch_smp_switch_in_isr + /* r0 points to new thread, r1 points to old thread */ + brne r0, 0, _firq_reschedule +#else mov_s r1, _kernel ld_s r2, [r1, _kernel_offset_to_current] /* Check if the current thread (in r2) is the cached thread */ ld_s r0, [r1, _kernel_offset_to_ready_q_cache] brne r0, r2, _firq_reschedule - +#endif /* fall to no rescheduling */ #endif /* CONFIG_PREEMPT_ENABLED */ @@ -184,6 +188,15 @@ _firq_reschedule: pop sp #if CONFIG_RGF_NUM_BANKS != 1 +#ifdef CONFIG_SMP +/* + * save r0, r1 in irq stack for a while, as they will be changed by register + * bank switch + */ + _get_curr_cpu_irq_stack r2 + st r0, [r2, -4] + st r1, [r2, -8] +#endif /* * We know there is no interrupted interrupt of lower priority at this * point, so when switching back to register bank 0, it will contain the @@ -206,18 +219,34 @@ _firq_reschedule: lr r0, [_ARC_V2_STATUS32_P0] st_s r0, [sp, ___isf_t_status32_OFFSET] + lr ilink, [_ARC_V2_ERET] st ilink, [sp, ___isf_t_pc_OFFSET] /* ilink into pc */ +#ifdef CONFIG_SMP +/* + * load r0, r1 from irq stack + */ + _get_curr_cpu_irq_stack r2 + ld r0, [r2, -4] + ld r1, [r2, -8] +#endif #endif +#ifdef CONFIG_SMP + mov r2, r1 +#else mov_s r1, _kernel ld_s r2, [r1, _kernel_offset_to_current] - +#endif _save_callee_saved_regs st _CAUSE_FIRQ, [r2, _thread_offset_to_relinquish_cause] +#ifdef CONFIG_SMP + mov r2, r0 +#else ld_s r2, [r1, _kernel_offset_to_ready_q_cache] st_s r2, [r1, _kernel_offset_to_current] +#endif #ifdef CONFIG_ARC_STACK_CHECKING _load_stack_check_regs diff --git a/arch/arc/core/fault_s.S b/arch/arc/core/fault_s.S index 2c893ec633f..176195a273b 100644 --- a/arch/arc/core/fault_s.S +++ b/arch/arc/core/fault_s.S @@ -59,14 +59,14 @@ SECTION_SUBSEC_FUNC(TEXT,__fault,__ev_dc_error) SECTION_SUBSEC_FUNC(TEXT,__fault,__ev_maligned) _exc_entry: - mov_s ilink, sp /* * re-use the top part of interrupt stack as exception * stack. If this top part is used by interrupt handling, * and exception is raised, then here it's guaranteed that * exception handling has necessary stack to use */ - mov_s sp, _interrupt_stack + mov_s ilink, sp + _get_curr_cpu_irq_stack sp add sp, sp, EXCEPTION_STACK_SIZE /* @@ -98,6 +98,11 @@ _exc_entry: _exc_return: #ifdef CONFIG_PREEMPT_ENABLED +#ifdef CONFIG_SMP + bl z_arch_smp_switch_in_isr + breq r0, 0, _exc_return_from_exc + mov r2, r0 +#else mov_s r1, _kernel ld_s r2, [r1, _kernel_offset_to_current] @@ -107,6 +112,7 @@ _exc_return: ld_s r2, [r1, _kernel_offset_to_ready_q_cache] st_s r2, [r1, _kernel_offset_to_current] +#endif #ifdef CONFIG_ARC_HAS_SECURE /* @@ -208,8 +214,7 @@ _do_non_syscall_trap: bne.d exc_nest_handle mov r0, sp - mov r1, _kernel - ld sp, [r1, _kernel_offset_to_irq_stack] + _get_curr_cpu_irq_stack sp exc_nest_handle: push_s r0 @@ -223,6 +228,16 @@ exc_nest_handle: bne _exc_return_from_exc #ifdef CONFIG_PREEMPT_ENABLED +#ifdef CONFIG_SMP + bl z_arch_smp_switch_in_isr + breq r0, 0, _exc_return_from_irqoffload_trap + mov r2, r1 + + _save_callee_saved_regs + + st _CAUSE_RIRQ, [r2, _thread_offset_to_relinquish_cause] + mov r2, r0 +#else mov_s r1, _kernel ld_s r2, [r1, _kernel_offset_to_current] @@ -237,6 +252,7 @@ exc_nest_handle: ld_s r2, [r1, _kernel_offset_to_ready_q_cache] st_s r2, [r1, _kernel_offset_to_current] +#endif #ifdef CONFIG_ARC_HAS_SECURE /* diff --git a/arch/arc/core/prep_c.c b/arch/arc/core/prep_c.c index 9256a5e5643..408eb9d53d7 100644 --- a/arch/arc/core/prep_c.c +++ b/arch/arc/core/prep_c.c @@ -73,38 +73,6 @@ static void invalidate_dcache(void) } #endif -/** - * - * @brief Adjust the vector table base - * - * Set the vector table base if the value found in the - * _ARC_V2_IRQ_VECT_BASE auxiliary register is different from the - * _VectorTable known by software. It is important to do this very early - * so that exception vectors can be handled. - * - * @return N/A - */ - -static void adjust_vector_table_base(void) -{ -#ifdef CONFIG_ARC_HAS_SECURE -#undef _ARC_V2_IRQ_VECT_BASE -#define _ARC_V2_IRQ_VECT_BASE _ARC_V2_IRQ_VECT_BASE_S -#endif - extern struct vector_table _VectorTable; - unsigned int vbr; - /* if the compiled-in vector table is different - * from the base address known by the ARC CPU, - * set the vector base to the compiled-in address. - */ - vbr = z_arc_v2_aux_reg_read(_ARC_V2_IRQ_VECT_BASE); - vbr &= 0xfffffc00; - if (vbr != (unsigned int)&_VectorTable) { - z_arc_v2_aux_reg_write(_ARC_V2_IRQ_VECT_BASE, - (unsigned int)&_VectorTable); - } -} - extern FUNC_NORETURN void z_cstart(void); /** * @@ -118,7 +86,6 @@ extern FUNC_NORETURN void z_cstart(void); void _PrepC(void) { z_icache_setup(); - adjust_vector_table_base(); z_bss_zero(); z_data_copy(); z_cstart(); diff --git a/arch/arc/core/regular_irq.S b/arch/arc/core/regular_irq.S index d28e6794334..9218ad35b70 100644 --- a/arch/arc/core/regular_irq.S +++ b/arch/arc/core/regular_irq.S @@ -24,6 +24,7 @@ GTEXT(_rirq_enter) GTEXT(_rirq_exit) GTEXT(_rirq_common_interrupt_swap) + #if 0 /* TODO: when FIRQ is not present, all would be regular */ #define NUM_REGULAR_IRQ_PRIO_LEVELS CONFIG_NUM_IRQ_PRIO_LEVELS #else @@ -72,8 +73,7 @@ SECTION_FUNC(TEXT, _rirq_enter) bne.d rirq_nest mov r0, sp - mov r1, _kernel - ld sp, [r1, _kernel_offset_to_irq_stack] + _get_curr_cpu_irq_stack sp rirq_nest: push_s r0 @@ -103,6 +103,13 @@ SECTION_FUNC(TEXT, _rirq_exit) #ifdef CONFIG_PREEMPT_ENABLED +#ifdef CONFIG_SMP + bl z_arch_smp_switch_in_isr + /* r0 points to new thread, r1 points to old thread */ + cmp r0, 0 + beq _rirq_no_reschedule + mov r2, r1 +#else mov r1, _kernel ld_s r2, [r1, _kernel_offset_to_current] @@ -127,7 +134,7 @@ SECTION_FUNC(TEXT, _rirq_exit) beq _rirq_no_reschedule /* cached thread to run is in r0, fall through */ - +#endif .balign 4 _rirq_reschedule: @@ -136,9 +143,13 @@ _rirq_reschedule: st _CAUSE_RIRQ, [r2, _thread_offset_to_relinquish_cause] +#ifdef CONFIG_SMP + mov r2, r0 +#else /* incoming thread is in r0: it becomes the new 'current' */ mov r2, r0 st_s r2, [r1, _kernel_offset_to_current] +#endif .balign 4 _rirq_common_interrupt_swap: diff --git a/arch/arc/core/reset.S b/arch/arc/core/reset.S index 10a9d4f61ce..2d881cb39b3 100644 --- a/arch/arc/core/reset.S +++ b/arch/arc/core/reset.S @@ -14,9 +14,11 @@ #include #include #include +#include GDATA(_interrupt_stack) GDATA(_main_stack) +GDATA(_VectorTable) /* use one of the available interrupt stacks during init */ @@ -66,6 +68,16 @@ SECTION_FUNC(TEXT,__start) /* \todo: MPU init, gp for small data? */ + /* set the vector table base early, + * so that exception vectors can be handled. + */ + mov r0, _VectorTable +#ifdef CONFIG_ARC_HAS_SECURE + sr r0, [_ARC_V2_IRQ_VECT_BASE_S] +#else + sr r0, [_ARC_V2_IRQ_VECT_BASE] +#endif + #if defined(CONFIG_USERSPACE) lr r0, [_ARC_V2_STATUS32] bset r0, r0, _ARC_V2_STATUS32_US_BIT @@ -109,6 +121,28 @@ done_cache_invalidate: jl @_sys_resume_from_deep_sleep #endif +#ifdef CONFIG_SMP + _get_cpu_id r0 + breq r0, 0, _master_core_startup + +/* + * Non-masters wait for master core (core 0) to boot enough + */ +_slave_core_wait: + ld r1, [arc_cpu_wake_flag] + brne r0, r1, _slave_core_wait + + /* signal master core that slave core runs */ + st 0, [arc_cpu_wake_flag] + + /* get sp set by master core */ + _get_curr_cpu_irq_stack sp + + j z_arch_slave_start + +_master_core_startup: +#endif + #ifdef CONFIG_INIT_STACKS /* * use the main stack to call memset on the interrupt stack and the diff --git a/arch/arc/include/kernel_arch_func.h b/arch/arc/include/kernel_arch_func.h index 8f9aa7f882c..530a3423e41 100644 --- a/arch/arc/include/kernel_arch_func.h +++ b/arch/arc/include/kernel_arch_func.h @@ -31,9 +31,24 @@ extern "C" { #include #endif +static ALWAYS_INLINE _cpu_t *z_arch_curr_cpu(void) +{ +#ifdef CONFIG_SMP + u32_t core; + + core = z_arc_v2_core_id(); + + return &_kernel.cpus[core]; +#else + return &_kernel.cpus[0]; +#endif +} + static ALWAYS_INLINE void kernel_arch_init(void) { z_irq_setup(); + _current_cpu->irq_stack = + Z_THREAD_STACK_BUFFER(_interrupt_stack) + CONFIG_ISR_STACK_SIZE; } @@ -62,6 +77,9 @@ extern void z_arc_userspace_enter(k_thread_entry_t user_entry, void *p1, extern void z_arch_switch(void *switch_to, void **switched_from); extern void z_arc_fatal_error(unsigned int reason, const z_arch_esf_t *esf); + +extern void z_arch_sched_ipi(void); + #endif /* _ASMLANGUAGE */ #ifdef __cplusplus diff --git a/arch/arc/include/swap_macros.h b/arch/arc/include/swap_macros.h index d33ab4a8959..c8c04bafcb9 100644 --- a/arch/arc/include/swap_macros.h +++ b/arch/arc/include/swap_macros.h @@ -284,6 +284,7 @@ extern "C" { #endif /* CONFIG_ARC_HAS_SECURE */ .endm + /* If multi bits in IRQ_ACT are set, i.e. last bit != fist bit, it's * in nest interrupt. The result will be EQ bit of status32 */ @@ -300,6 +301,16 @@ extern "C" { xbfu \reg, \reg, 0xe8 .endm +.macro _get_curr_cpu_irq_stack irq_sp +#ifdef CONFIG_SMP + _get_cpu_id \irq_sp + ld.as \irq_sp, [@_curr_irq_stack, \irq_sp] +#else + mov \irq_sp, _kernel + ld \irq_sp, [\irq_sp, _kernel_offset_to_irq_stack] +#endif +.endm + #endif /* _ASMLANGUAGE */ #ifdef __cplusplus diff --git a/arch/arc/include/v2/irq.h b/arch/arc/include/v2/irq.h index faa62f32ab1..a45d7f1adf9 100644 --- a/arch/arc/include/v2/irq.h +++ b/arch/arc/include/v2/irq.h @@ -60,9 +60,6 @@ static ALWAYS_INLINE void z_irq_setup(void) k_cpu_sleep_mode = _ARC_V2_WAKE_IRQ_LEVEL; z_arc_v2_aux_reg_write(_ARC_V2_AUX_IRQ_CTRL, aux_irq_ctrl_value); - - _kernel.irq_stack = - Z_THREAD_STACK_BUFFER(_interrupt_stack) + CONFIG_ISR_STACK_SIZE; } #endif /* _ASMLANGUAGE */