/* * Copyright (c) 2019 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ #include #include #include #include #include #include #include #include #include LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL); #include #include #include #include #include #include #include extern void z_sched_ipi(void); /* ROM wake version parsed by ROM during core wake up. */ #define IDC_ROM_WAKE_VERSION 0x2 /* IDC message type. */ #define IDC_TYPE_SHIFT 24 #define IDC_TYPE_MASK 0x7f #define IDC_TYPE(x) (((x) & IDC_TYPE_MASK) << IDC_TYPE_SHIFT) /* IDC message header. */ #define IDC_HEADER_MASK 0xffffff #define IDC_HEADER(x) ((x) & IDC_HEADER_MASK) /* IDC message extension. */ #define IDC_EXTENSION_MASK 0x3fffffff #define IDC_EXTENSION(x) ((x) & IDC_EXTENSION_MASK) /* IDC power up message. */ #define IDC_MSG_POWER_UP \ (IDC_TYPE(0x1) | IDC_HEADER(IDC_ROM_WAKE_VERSION)) #define IDC_MSG_POWER_UP_EXT(x) IDC_EXTENSION((x) >> 2) struct cpustart_rec { uint32_t cpu; arch_cpustart_t fn; void *arg; uint32_t vecbase; uint32_t alive; }; char *z_mp_stack_top; #ifdef CONFIG_KERNEL_COHERENCE /* Coherence guarantees that normal .data will be coherent and that it * won't overlap any cached memory. */ static struct { struct cpustart_rec cpustart; } cpustart_mem; #else /* If .data RAM is by default incoherent, then the start record goes * into its own dedicated cache line(s) */ static __aligned(XCHAL_DCACHE_LINESIZE) union { struct cpustart_rec cpustart; char pad[XCHAL_DCACHE_LINESIZE]; } cpustart_mem; #endif #define start_rec \ (*((volatile struct cpustart_rec *) \ z_soc_uncached_ptr(&cpustart_mem.cpustart))) static uint32_t cpu_mask; /* Simple array of CPUs that are active and available for an IPI. The * IDC interrupt is ALSO used to bring a CPU out of reset, so we need * to be absolutely sure we don't try to IPI a CPU that isn't ready to * start, or else we'll launch it into garbage and crash the DSP. */ static bool cpus_active[CONFIG_MP_NUM_CPUS]; /* Tiny assembly stub for calling z_mp_entry() on the auxiliary CPUs. * Mask interrupts, clear the register window state and set the stack * pointer. This represents the minimum work required to run C code * safely. * * Note that alignment is absolutely required: the IDC protocol passes * only the upper 30 bits of the address to the second CPU. */ void z_soc_mp_asm_entry(void); __asm__(".align 4 \n\t" ".global z_soc_mp_asm_entry \n\t" "z_soc_mp_asm_entry: \n\t" " movi a0, 0x40025 \n\t" /* WOE | UM | INTLEVEL(5) */ " wsr a0, PS \n\t" " movi a0, 0 \n\t" " wsr a0, WINDOWBASE \n\t" " movi a0, 1 \n\t" " wsr a0, WINDOWSTART \n\t" " rsync \n\t" " movi a1, z_mp_stack_top \n\t" " l32i a1, a1, 0 \n\t" " call4 z_mp_entry \n\t"); BUILD_ASSERT(XCHAL_EXCM_LEVEL == 5); int cavs_idc_smp_init(const struct device *dev); #define CxL1CCAP (*(volatile uint32_t *)0x9F080080) #define CxL1CCFG (*(volatile uint32_t *)0x9F080084) #define CxL1PCFG (*(volatile uint32_t *)0x9F080088) /* "Data/Instruction Cache Memory Way Count" fields */ #define CxL1CCAP_DCMWC ((CxL1CCAP >> 16) & 7) #define CxL1CCAP_ICMWC ((CxL1CCAP >> 20) & 7) static ALWAYS_INLINE void enable_l1_cache(void) { uint32_t reg; #ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25 /* First, on cAVS 2.5 we need to power the cache SRAM banks * on! Write a bit for each cache way in the bottom half of * the L1CCFG register and poll the top half for them to turn * on. */ uint32_t dmask = BIT(CxL1CCAP_DCMWC) - 1; uint32_t imask = BIT(CxL1CCAP_ICMWC) - 1; uint32_t waymask = (imask << 8) | dmask; CxL1CCFG = waymask; while (((CxL1CCFG >> 16) & waymask) != waymask) { } /* Prefetcher also power gates, same interface */ CxL1PCFG = 1; while ((CxL1PCFG & 0x10000) == 0) { } #endif /* Now set up the Xtensa CPU to enable the cache logic. The * details of the fields are somewhat complicated, but per the * ISA ref: "Turning on caches at power-up usually consists of * writing a constant with bits[31:8] all 1’s to MEMCTL.". * Also set bit 0 to enable the LOOP extension instruction * fetch buffer. */ #ifdef XCHAL_HAVE_ICACHE_DYN_ENABLE reg = 0xffffff01; __asm__ volatile("wsr %0, MEMCTL; rsync" :: "r"(reg)); #endif /* Likewise enable prefetching. Sadly these values are not * architecturally defined by Xtensa (they're just documented * as priority hints), so this constant is just copied from * SOF for now. If we care about prefetch priority tuning * we're supposed to ask Cadence I guess. */ reg = IS_ENABLED(CONFIG_SOC_SERIES_INTEL_CAVS_V25) ? 0x1038 : 0; __asm__ volatile("wsr %0, PREFCTL; rsync" :: "r"(reg)); /* Finally we need to enable the cache in the Region * Protection Option "TLB" entries. The hardware defaults * have this set to RW/uncached (2) everywhere. We want * writeback caching (4) in the sixth mapping (the second of * two RAM mappings) and to mark all unused regions * inaccessible (15) for safety. Note that there is a HAL * routine that does this (by emulating the older "cacheattr" * hardware register), but it generates significantly larger * code. */ const uint8_t attribs[] = { 2, 15, 15, 15, 2, 4, 15, 15 }; for (int region = 0; region < 8; region++) { reg = 0x20000000 * region; __asm__ volatile("wdtlb %0, %1" :: "r"(attribs[region]), "r"(reg)); } } void z_mp_entry(void) { volatile int ie; uint32_t reg; enable_l1_cache(); /* Fix ATOMCTL to match CPU0. Hardware defaults for S32C1I * use internal operations (and are thus presumably atomic * only WRT the local CPU!). We need external transactions on * the shared bus. */ reg = 0x15; __asm__ volatile("wsr %0, ATOMCTL" :: "r"(reg)); /* We don't know what the boot ROM (on pre-2.5 DSPs) might * have touched and we don't care. Make sure it's not in our * local cache to be flushed accidentally later. * * Note that technically this is dropping our own (cached) * stack memory, which we don't have a guarantee the compiler * isn't using yet. Manual inspection of generated code says * we're safe, but really we need a better solution here. */ #ifndef CONFIG_SOC_SERIES_INTEL_CAVS_V25 z_xtensa_cache_flush_inv_all(); #endif /* Copy over VECBASE from the main CPU for an initial value * (will need to revisit this if we ever allow a user API to * change interrupt vectors at runtime). */ ie = 0; __asm__ volatile("wsr.INTENABLE %0" : : "r"(ie)); __asm__ volatile("wsr.VECBASE %0" : : "r"(start_rec.vecbase)); __asm__ volatile("rsync"); /* Set up the CPU pointer. */ _cpu_t *cpu = &_kernel.cpus[start_rec.cpu]; __asm__ volatile( "wsr." CONFIG_XTENSA_KERNEL_CPU_PTR_SR " %0" : : "r"(cpu)); /* We got here via an IDC interrupt. Clear the TFC high bit * (by writing a one!) to acknowledge and clear the latched * hardware interrupt (so we don't have to service it as a * spurious IPI when we enter user code). Remember: this * could have come from any core, clear all of them. */ for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { IDC[start_rec.cpu].core[i].tfc = BIT(31); } /* Interrupt must be enabled while running on current core */ irq_enable(DT_IRQN(DT_INST(0, intel_cavs_idc))); #ifdef CONFIG_SMP_BOOT_DELAY cavs_idc_smp_init(NULL); #endif cpus_active[start_rec.cpu] = true; start_rec.alive = 1; start_rec.fn(start_rec.arg); #if CONFIG_MP_NUM_CPUS == 1 /* CPU#1 can be under manual control running custom functions * instead of participating in general thread execution. * Put the CPU into idle after those functions return * so this won't return. */ for (;;) { k_cpu_idle(); } #endif } bool arch_cpu_active(int cpu_num) { return !!(cpu_mask & BIT(cpu_num)); } static ALWAYS_INLINE uint32_t prid(void) { uint32_t prid; __asm__ volatile("rsr %0, PRID" : "=r"(prid)); return prid; } void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz, arch_cpustart_t fn, void *arg) { uint32_t vecbase, curr_cpu; __asm__ volatile("rsr %0, PRID" : "=r"(curr_cpu)); #ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25 /* On cAVS v2.5, MP startup works differently. The core has * no ROM, and starts running immediately upon receipt of an * IDC interrupt at the start of LPSRAM at 0xbe800000. Note * that means we don't need to bother constructing a "message" * below, it will be ignored. But it's left in place for * simplicity and compatibility. * * All we need to do is place a single jump at that address to * our existing MP entry point. Unfortunately Xtensa makes * this difficult, as the region is beyond the range of a * relative jump instruction, so we need an immediate, which * can only be backwards-referenced. So we hand-assemble a * tiny trampoline here ("jump over the immediate address, * load it, jump to it"). * * Long term we want to have this in linkable LP-SRAM memory * such that the standard system bootstrap out of IMR can * place it there. But this is fine for now. */ void **lpsram = z_soc_uncached_ptr((void *)LP_SRAM_BASE); uint8_t tramp[] = { 0x06, 0x01, 0x00, /* J (jump to L32R) */ 0, /* (padding to align entry_addr) */ 0, 0, 0, 0, /* (entry_addr goes here) */ 0x01, 0xff, 0xff, /* L32R a0, */ 0xa0, 0x00, 0x00, /* JX a0 */ }; memcpy(lpsram, tramp, ARRAY_SIZE(tramp)); lpsram[1] = z_soc_mp_asm_entry; #endif __asm__ volatile("rsr.VECBASE %0\n\t" : "=r"(vecbase)); start_rec.cpu = cpu_num; start_rec.fn = fn; start_rec.arg = arg; start_rec.vecbase = vecbase; start_rec.alive = 0; z_mp_stack_top = Z_THREAD_STACK_BUFFER(stack) + sz; /* Send power up message to the other core */ uint32_t ietc = IDC_MSG_POWER_UP_EXT((long) z_soc_mp_asm_entry); IDC[curr_cpu].core[cpu_num].ietc = ietc; IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP | IPC_IDCITC_BUSY; #ifndef CONFIG_SOC_SERIES_INTEL_CAVS_V25 /* Early DSPs have a ROM that actually receives the startup * IDC as an interrupt, and we don't want that to be confused * by IPIs sent by the OS elsewhere. Mask the IDC interrupt * on other core so IPI won't cause them to jump to ISR until * the core is fully initialized. */ uint32_t idc_reg = idc_read(IPC_IDCCTL, cpu_num); idc_reg &= ~IPC_IDCCTL_IDCTBIE(0); idc_write(IPC_IDCCTL, cpu_num, idc_reg); sys_set_bit(DT_REG_ADDR(DT_NODELABEL(cavs0)) + 0x00 + CAVS_ICTL_INT_CPU_OFFSET(cpu_num), 8); k_busy_wait(100); #ifdef CONFIG_SMP_BOOT_DELAY cavs_idc_smp_init(NULL); #endif #endif while (!start_rec.alive) ; /* * No locking needed as long as CPUs can only be powered on by the main * CPU and cannot be powered off */ cpu_mask |= BIT(cpu_num); } void arch_sched_ipi(void) { #ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25 uint32_t curr = prid(); for (int c = 0; c < CONFIG_MP_NUM_CPUS; c++) { if (c != curr && cpus_active[c]) { IDC[curr].core[c].itc = BIT(31); } } #else /* Legacy implementation for cavs15 based on the 2-core-only * IPM driver. To be replaced with the general one when * validated. */ const struct device *idcdev = device_get_binding(DT_LABEL(DT_INST(0, intel_cavs_idc))); ipm_send(idcdev, 0, IPM_CAVS_IDC_MSG_SCHED_IPI_ID, IPM_CAVS_IDC_MSG_SCHED_IPI_DATA, 0); #endif } void idc_isr(void *param) { ARG_UNUSED(param); #ifdef CONFIG_SMP /* Right now this interrupt is only used for IPIs */ z_sched_ipi(); #endif /* ACK the interrupt to all the possible sources. This is a * level-sensitive interrupt triggered by a logical OR of each * of the ITC/TFC high bits, INCLUDING the one "from this * CPU". */ for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { IDC[start_rec.cpu].core[i].tfc = BIT(31); } } #ifndef CONFIG_IPM_CAVS_IDC /* Fallback stub for external SOF code */ int cavs_idc_smp_init(const struct device *dev) { ARG_UNUSED(dev); return 0; } #endif void soc_idc_init(void) { #ifndef CONFIG_IPM_CAVS_IDC IRQ_CONNECT(DT_IRQN(DT_NODELABEL(idc)), 0, idc_isr, NULL, 0); #endif /* Every CPU should be able to receive an IDC interrupt from * every other CPU, but not to be back-interrupted when the * target core clears the busy bit. */ for (int core = 0; core < CONFIG_MP_NUM_CPUS; core++) { uint32_t coremask = BIT(CONFIG_MP_NUM_CPUS) - 1; IDC[core].busy_int |= coremask; IDC[core].done_int &= ~coremask; /* Also unmask the interrupt for every core in the L2 * mask register. Really this should have an API * exposed out of the interrupt controller layer... */ sys_set_bit(DT_REG_ADDR(DT_NODELABEL(cavs0)) + 0x04 + CAVS_ICTL_INT_CPU_OFFSET(core), 8); } }