arch: arm64: Use voting lock for multi-core boot race condition

The exclusive load/store instructions don't work well when MMU and cache
are disabled on some cores e.g. Cortex-A72. Change it to voting lock[1]
to select the primary core when multi-cores boot simultaneously.

The voting lock has reasonable but minimal requirements on the memory
system.

[1] https://www.kernel.org/doc/html/next/arch/arm/vlocks.html

Signed-off-by: Jaxson Han <jaxson.han@arm.com>
This commit is contained in:
Jaxson Han 2023-10-13 14:13:28 +08:00 committed by Carles Cufí
commit 7904c6f0f3
4 changed files with 78 additions and 17 deletions

View file

@ -20,7 +20,8 @@ extern void __start(void);
#endif /* _ASMLANGUAGE */
/* Offsets into the boot_params structure */
#define BOOT_PARAM_MPID_OFFSET 0
#define BOOT_PARAM_SP_OFFSET 8
#define BOOT_PARAM_MPID_OFFSET 0
#define BOOT_PARAM_SP_OFFSET 8
#define BOOT_PARAM_VOTING_OFFSET 16
#endif /* _BOOT_H_ */

View file

@ -21,6 +21,27 @@
ubfx \xreg0, \xreg0, #0, #24
.endm
/*
* Get CPU logic id by looking up cpu_node_list
* returns
* xreg0: MPID
* xreg1: logic id (0 ~ CONFIG_MP_MAX_NUM_CPUS - 1)
* clobbers: xreg0, xreg1, xreg2, xreg3
*/
.macro get_cpu_logic_id xreg0, xreg1, xreg2, xreg3
get_cpu_id \xreg0
ldr \xreg3, =cpu_node_list
mov \xreg1, 0
1: ldr \xreg2, [\xreg3, \xreg1, lsl 3]
cmp \xreg2, \xreg0
beq 2f
add \xreg1, \xreg1, 1
cmp \xreg1, #CONFIG_MP_MAX_NUM_CPUS
bne 1b
b .
2:
.endm
/*
* Get CPU pointer
* Note: keep in sync with `arch_curr_cpu` in include/zephyr/arch/arm64/arch_inlines.h

View file

@ -121,38 +121,75 @@ resetwait:
#if CONFIG_MP_MAX_NUM_CPUS > 1
/*
* Deal with multi core booting simultaneously to race for being the primary core.
* Use voting lock[1] with reasonable but minimal requirements on the memory system
* to make sure only one core wins at last.
*
* [1] kernel.org/doc/html/next/arch/arm/vlocks.html
*/
ldr x0, =arm64_cpu_boot_params
get_cpu_id x1
/*
* If the cores start up at the same time, we should atomically load and
* store the mpid into arm64_cpu_boot_params.
* Get the "logic" id defined by cpu_node_list statically for voting lock self-identify.
* It is worth noting that this is NOT the final logic id (arch_curr_cpu()->id)
*/
ldaxr x2, [x0, #BOOT_PARAM_MPID_OFFSET]
cmp x2, #-1
bne 1f
/* try to store x1 (mpid) */
stlxr w3, x1, [x0]
/* If succeed, go to primary_core */
cbz w3, primary_core
get_cpu_logic_id x1, x2, x3, x4 //x1: MPID, x2: logic id
add x4, x0, #BOOT_PARAM_VOTING_OFFSET
/* signal our desire to vote */
mov w5, #1
strb w5, [x4, x2]
ldr x3, [x0, #BOOT_PARAM_MPID_OFFSET]
cmn x3, #1
beq 1f
/* some core already won, release */
strb wzr, [x4, x2]
b secondary_core
/* suggest current core then release */
1: str x1, [x0, #BOOT_PARAM_MPID_OFFSET]
strb wzr, [x4, x2]
dmb ish
/* then wait until every core else is done voting */
mov x5, #0
2: ldrb w3, [x4, x5]
tst w3, #255
/* wait */
bne 2b
add x5, x5, #1
cmp x5, #CONFIG_MP_MAX_NUM_CPUS
bne 2b
/* check if current core won */
dmb ish
ldr x3, [x0, #BOOT_PARAM_MPID_OFFSET]
cmp x3, x1
beq primary_core
/* fallthrough secondary */
/* loop until our turn comes */
1: dmb ld
secondary_core:
dmb ish
ldr x2, [x0, #BOOT_PARAM_MPID_OFFSET]
cmp x1, x2
bne 1b
bne secondary_core
/* we can now load our stack pointer value and move on */
ldr x24, [x0, #BOOT_PARAM_SP_OFFSET]
ldr x25, =z_arm64_secondary_prep_c
b 2f
b boot
primary_core:
#endif
/* load primary stack and entry point */
ldr x24, =(z_interrupt_stacks + __z_interrupt_stack_SIZEOF)
ldr x25, =z_arm64_prep_c
2:
boot:
/* Prepare for calling C code */
bl __reset_prep_c

View file

@ -35,6 +35,7 @@
struct boot_params {
uint64_t mpid;
char *sp;
uint8_t voting[CONFIG_MP_MAX_NUM_CPUS];
arch_cpustart_t fn;
void *arg;
int cpu_num;
@ -43,12 +44,13 @@ struct boot_params {
/* Offsets used in reset.S */
BUILD_ASSERT(offsetof(struct boot_params, mpid) == BOOT_PARAM_MPID_OFFSET);
BUILD_ASSERT(offsetof(struct boot_params, sp) == BOOT_PARAM_SP_OFFSET);
BUILD_ASSERT(offsetof(struct boot_params, voting) == BOOT_PARAM_VOTING_OFFSET);
volatile struct boot_params __aligned(L1_CACHE_BYTES) arm64_cpu_boot_params = {
.mpid = -1,
};
static const uint64_t cpu_node_list[] = {
const uint64_t cpu_node_list[] = {
DT_FOREACH_CHILD_STATUS_OKAY_SEP(DT_PATH(cpus), DT_REG_ADDR, (,))
};