soc: intel_adsp: New IDC driver

The original interface for the intra-DSP communication hardware on
these devices was buried inside a Zephyr IPM implementation.
Unfortunately IPM is a two-endpoint point-to-point communication
layer, it can't represent the idea of devices with more than 2 cores.
And our usage (to push a no-argument/no-response scheduler IPI) was
sort of an abuse of that metaphor anyway.

Add a new IDC interface at the SOC layer, borrowing the C struct
convention already used for the DSP shim registers.

Augment with extensive documentation, extracted via a ton of
experimentation on cAVS 2.5 hardware.

Note that this leaves the previous driver in place for the cavs_v15
and intel_s1000 devices.  In principle they should use it too (the
hardware registers are identical), but this hasn't been validated yet.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
Andy Ross 2021-08-07 09:44:09 -07:00 committed by Anas Nashif
commit 0228c05681
3 changed files with 191 additions and 42 deletions

View file

@ -0,0 +1,84 @@
/*
* Copyright (c) 2021 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_SOC_INTEL_ADSP_CAVS_IDC_H_
#define ZEPHYR_SOC_INTEL_ADSP_CAVS_IDC_H_
/*
* (I)ntra (D)SP (C)ommunication is the facility for sending
* interrupts directly between DSP cores. The interface
* is... somewhat needlessly complicated.
*
* Each core has a set of registers its is supposed to use, but all
* registers seem to behave symmetrically regardless of which CPU does
* the access.
*
* Each core has a "ITC" register associated with each other core in
* the system (including itself). When the high bit becomes 1 in an
* ITC register, an IDC interrupt is latched for the target core.
* Data in other bits is stored but otherwise ignored, it's merely
* data to be transmitted along with the interrupt.
*
* On the target core, there is a "TFC" register for each core that
* reflects the same value written to ITC. In fact experimentally
* these seem to be the same register at different addresses. When
* the high bit of TFC is written with a 1, the value becomes ZERO,
* indicating an acknowledgment of the interrupt. This action can
* also latch an interrupt to send back to the originator if unmasked
* (see below).
*
* (There is also an IETC/TEFC register pair that stores 30 bits of
* data but otherwise has no hardware behavior. This is probably best
* ignored for new protocols, as experimentally it seems to provide no
* performance benefit vs. storing a message in RAM. The cAVS 1.5/1.8
* ROM boot protocol uses it to store an entry point address, though.)
*
* So you can send a synchronous message from core "src" (where src is
* the PRID of the CPU, equal to arch_curr_cpu()->id in Zephyr) to
* core "dst" with:
*
* IDC[src].core[dst].itc = BIT(31) | message;
* while (IDC[src].core[dst].itc & BIT(31)) {}
*
* And the other side (on cpu "dst", generally in the IDC interruupt
* handler) will read and acknowledge those same values via:
*
* uint32_t my_msg = IDC[dst].core[src].tfc & 0x7fffffff;
* IDC[dst].core[src].tfc = BIT(31); // clear high bit to signal completion
*
* And for clarity, at all times and for all cores and all pairs of src/dst:
*
* IDC[src].core[dst].itc == IDC[dst].core[src].tfc
*
* Finally note the two control registers at the end of each core's
* register block, which store a bitmask of cores that are allowed to
* send that core an interrupt via either ITC (set high "BUSY" bit) or
* TFC (clear high "DONE" bit). This masking is in ADDITION to the
* level 2 bit for IDC in the per-core INTCTRL DSP register AND the
* Xtensa architectural INTENABLE SR. You must enable IDC interrupts
* form core "src" to core "dst" with:
*
* IDC[dst].busy_int |= BIT(src) // Or disable with "&= ~BIT(src)" of course
*/
struct cavs_idc {
struct {
uint32_t tfc; /* (T)arget (F)rom (C)ore */
uint32_t tefc; /* ^^ + (E)xtension */
uint32_t itc; /* (I)nitiator (T)o (C)ore */
uint32_t ietc; /* ^^ + (E)xtension */
} core[4];
uint32_t unused0[4];
uint8_t busy_int; /* bitmask of cores that can IDC via ITC */
uint8_t done_int; /* bitmask of cores that can IDC via TFC */
uint8_t unused1;
uint8_t unused2;
uint32_t unused3[11];
};
#define IDC ((volatile struct cavs_idc *)DT_REG_ADDR(DT_NODELABEL(idc)))
extern void soc_idc_init(void);
#endif /* ZEPHYR_SOC_INTEL_ADSP_CAVS_IDC_H_ */

View file

@ -11,6 +11,7 @@
#include <init.h>
#include <soc/shim.h>
#include <cavs-idc.h>
#include "soc.h"
#ifdef CONFIG_DYNAMIC_INTERRUPTS
@ -282,6 +283,11 @@ static int soc_init(const struct device *dev)
#endif
soc_set_power_and_clock();
#if CONFIG_MP_NUM_CPUS > 1
soc_idc_init();
#endif
return 0;
}

View file

@ -17,6 +17,7 @@
#include <logging/log.h>
LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);
#include <cavs-idc.h>
#include <soc.h>
#include <arch/xtensa/cache.h>
#include <adsp/io.h>
@ -26,9 +27,7 @@ LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);
#include <drivers/ipm.h>
#include <ipm/ipm_cavs_idc.h>
#if CONFIG_MP_NUM_CPUS > 1 && !defined(CONFIG_IPM_CAVS_IDC) && defined(CONFIG_SMP)
#error Need to enable the IPM driver for multiprocessing
#endif
extern void z_sched_ipi(void);
/* ROM wake version parsed by ROM during core wake up. */
#define IDC_ROM_WAKE_VERSION 0x2
@ -52,10 +51,6 @@ LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);
#define IDC_MSG_POWER_UP_EXT(x) IDC_EXTENSION((x) >> 2)
#ifdef CONFIG_IPM_CAVS_IDC
static const struct device *idc;
#endif
struct cpustart_rec {
uint32_t cpu;
@ -156,8 +151,10 @@ static ALWAYS_INLINE void enable_l1_cache(void)
* Also set bit 0 to enable the LOOP extension instruction
* fetch buffer.
*/
#ifdef XCHAL_HAVE_ICACHE_DYN_ENABLE
reg = 0xffffff01;
__asm__ volatile("wsr %0, MEMCTL; rsync" :: "r"(reg));
#endif
/* Likewise enable prefetching. Sadly these values are not
* architecturally defined by Xtensa (they're just documented
@ -189,7 +186,7 @@ static ALWAYS_INLINE void enable_l1_cache(void)
void z_mp_entry(void)
{
volatile int ie;
uint32_t idc_reg, reg;
uint32_t reg;
enable_l1_cache();
@ -229,14 +226,18 @@ void z_mp_entry(void)
__asm__ volatile(
"wsr." CONFIG_XTENSA_KERNEL_CPU_PTR_SR " %0" : : "r"(cpu));
/* Clear busy bit set by power up message */
idc_reg = idc_read(IPC_IDCTFC(0), start_rec.cpu) | IPC_IDCTFC_BUSY;
idc_write(IPC_IDCTFC(0), start_rec.cpu, idc_reg);
/* We got here via an IDC interrupt. Clear the TFC high bit
* (by writing a one!) to acknowledge and clear the latched
* hardware interrupt (so we don't have to service it as a
* spurious IPI when we enter user code). Remember: this
* could have come from any core, clear all of them.
*/
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
IDC[start_rec.cpu].core[i].tfc = BIT(31);
}
#ifdef CONFIG_IPM_CAVS_IDC
/* Interrupt must be enabled while running on current core */
irq_enable(DT_IRQN(DT_INST(0, intel_cavs_idc)));
#endif /* CONFIG_IPM_CAVS_IDC */
#ifdef CONFIG_SMP_BOOT_DELAY
cavs_idc_smp_init(NULL);
@ -266,8 +267,9 @@ bool arch_cpu_active(int cpu_num)
void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
arch_cpustart_t fn, void *arg)
{
uint32_t vecbase;
uint32_t idc_reg;
uint32_t vecbase, curr_cpu;
__asm__ volatile("rsr %0, PRID" : "=r"(curr_cpu));
#ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25
/* On cAVS v2.5, MP startup works differently. The core has
@ -312,28 +314,21 @@ void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
z_mp_stack_top = Z_THREAD_STACK_BUFFER(stack) + sz;
#ifdef CONFIG_IPM_CAVS_IDC
idc = device_get_binding(DT_LABEL(DT_INST(0, intel_cavs_idc)));
#endif
/* Enable IDC interrupt on the other core */
idc_reg = idc_read(IPC_IDCCTL, cpu_num);
idc_reg |= IPC_IDCCTL_IDCTBIE(0);
idc_write(IPC_IDCCTL, cpu_num, idc_reg);
/* FIXME: 8 is IRQ_BIT_LVL2_IDC / PLATFORM_IDC_INTERRUPT */
sys_set_bit(DT_REG_ADDR(DT_NODELABEL(cavs0)) + 0x04 +
CAVS_ICTL_INT_CPU_OFFSET(cpu_num), 8);
/* Send power up message to the other core */
uint32_t ietc = IDC_MSG_POWER_UP_EXT((long) z_soc_mp_asm_entry);
idc_write(IPC_IDCIETC(cpu_num), 0, ietc);
idc_write(IPC_IDCITC(cpu_num), 0, IDC_MSG_POWER_UP | IPC_IDCITC_BUSY);
IDC[curr_cpu].core[cpu_num].ietc = ietc;
IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP | IPC_IDCITC_BUSY;
/* Disable IDC interrupt on other core so IPI won't cause
* them to jump to ISR until the core is fully initialized.
#ifndef CONFIG_SOC_SERIES_INTEL_CAVS_V25
/* Early DSPs have a ROM that actually receives the startup
* IDC as an interrupt, and we don't want that to be confused
* by IPIs sent by the OS elsewhere. Mask the IDC interrupt
* on other core so IPI won't cause them to jump to ISR until
* the core is fully initialized.
*/
idc_reg = idc_read(IPC_IDCCTL, cpu_num);
uint32_t idc_reg = idc_read(IPC_IDCCTL, cpu_num);
idc_reg &= ~IPC_IDCCTL_IDCTBIE(0);
idc_write(IPC_IDCCTL, cpu_num, idc_reg);
sys_set_bit(DT_REG_ADDR(DT_NODELABEL(cavs0)) + 0x00 +
@ -344,10 +339,7 @@ void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
#ifdef CONFIG_SMP_BOOT_DELAY
cavs_idc_smp_init(NULL);
#endif
/* Clear done bit from responding the power up message */
idc_reg = idc_read(IPC_IDCIETC(cpu_num), 0) | IPC_IDCIETC_DONE;
idc_write(IPC_IDCIETC(cpu_num), 0, idc_reg);
#endif
while (!start_rec.alive)
;
@ -359,13 +351,80 @@ void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
cpu_mask |= BIT(cpu_num);
}
#ifdef CONFIG_SCHED_IPI_SUPPORTED
FUNC_ALIAS(soc_sched_ipi, arch_sched_ipi, void);
void soc_sched_ipi(void)
void arch_sched_ipi(void)
{
if (idc != NULL) {
ipm_send(idc, 0, IPM_CAVS_IDC_MSG_SCHED_IPI_ID,
IPM_CAVS_IDC_MSG_SCHED_IPI_DATA, 0);
#ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25
uint32_t prid;
__asm__ volatile("rsr %0, PRID" : "=r"(prid));
for (int c = 0; c < CONFIG_MP_NUM_CPUS; c++) {
if (c != prid) {
IDC[prid].core[c].itc = BIT(31);
}
}
#else
/* Legacy implementation for cavs15 based on the 2-core-only
* IPM driver. To be replaced with the general one when
* validated.
*/
const struct device *idcdev =
device_get_binding(DT_LABEL(DT_INST(0, intel_cavs_idc)));
ipm_send(idcdev, 0, IPM_CAVS_IDC_MSG_SCHED_IPI_ID,
IPM_CAVS_IDC_MSG_SCHED_IPI_DATA, 0);
#endif
}
void idc_isr(void *param)
{
ARG_UNUSED(param);
#ifdef CONFIG_SMP
/* Right now this interrupt is only used for IPIs */
z_sched_ipi();
#endif
/* ACK the interrupt to all the possible sources. This is a
* level-sensitive interrupt triggered by a logical OR of each
* of the ITC/TFC high bits, INCLUDING the one "from this
* CPU".
*/
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
IDC[start_rec.cpu].core[i].tfc = BIT(31);
}
}
#ifndef CONFIG_IPM_CAVS_IDC
/* Fallback stub for external SOF code */
int cavs_idc_smp_init(const struct device *dev)
{
ARG_UNUSED(dev);
return 0;
}
#endif
void soc_idc_init(void)
{
#ifndef CONFIG_IPM_CAVS_IDC
IRQ_CONNECT(DT_IRQN(DT_NODELABEL(idc)), 0, idc_isr, NULL, 0);
#endif
/* Every CPU should be able to receive an IDC interrupt from
* every other CPU, but not to be back-interrupted when the
* target core clears the busy bit.
*/
for (int core = 0; core < CONFIG_MP_NUM_CPUS; core++) {
uint32_t coremask = BIT(CONFIG_MP_NUM_CPUS) - 1;
IDC[core].busy_int |= coremask;
IDC[core].done_int &= ~coremask;
/* Also unmask the interrupt for every core in the L2
* mask register. Really this should have an API
* exposed out of the interrupt controller layer...
*/
sys_set_bit(DT_REG_ADDR(DT_NODELABEL(cavs0)) + 0x04 +
CAVS_ICTL_INT_CPU_OFFSET(core), 8);
}
}