arch: arm: Collect full register state in Cortex-M Exception Stack Frame
To debug hard-to-reproduce faults/panics, it's helpful to get the full register state at the time a fault occurred. This enables recovering full backtraces and the state of local variables at the time of a crash. This PR introduces a new Kconfig option, CONFIG_EXTRA_EXCEPTION_INFO, to facilitate this use case. The option enables the capturing of the callee-saved register state (r4-r11 & exc_return) during a fault. The info is forwarded to `k_sys_fatal_error_handler` in the z_arch_esf_t parameter. From there, the data can be saved for post-mortem analysis. To test the functionality a new unit test was added to tests/arch/arm_interrupt which verifies the register contents passed in the argument match the state leading up to a crash. Signed-off-by: Chris Coleman <chris@memfault.com>
This commit is contained in:
parent
db0e559b74
commit
99a268fa16
10 changed files with 264 additions and 8 deletions
12
arch/Kconfig
12
arch/Kconfig
|
@ -367,6 +367,15 @@ config IRQ_OFFLOAD
|
|||
run in interrupt context. Only useful for test cases that need
|
||||
to validate the correctness of kernel objects in IRQ context.
|
||||
|
||||
|
||||
config EXTRA_EXCEPTION_INFO
|
||||
bool "Collect extra exception info"
|
||||
depends on ARCH_HAS_EXTRA_EXCEPTION_INFO
|
||||
help
|
||||
This option enables the collection of extra information, such as
|
||||
register state, when a fault occurs. This information can be useful
|
||||
to collect for post-mortem analysis and debug of issues.
|
||||
|
||||
endmenu # Interrupt configuration
|
||||
|
||||
endmenu
|
||||
|
@ -399,6 +408,9 @@ config ARCH_HAS_NESTED_EXCEPTION_DETECTION
|
|||
config ARCH_SUPPORTS_COREDUMP
|
||||
bool
|
||||
|
||||
config ARCH_HAS_EXTRA_EXCEPTION_INFO
|
||||
bool
|
||||
|
||||
#
|
||||
# Other architecture related options
|
||||
#
|
||||
|
|
|
@ -17,6 +17,7 @@ config CPU_CORTEX_M
|
|||
select ARCH_HAS_RAMFUNC_SUPPORT
|
||||
select ARCH_HAS_NESTED_EXCEPTION_DETECTION
|
||||
select SWAP_NONATOMIC
|
||||
select ARCH_HAS_EXTRA_EXCEPTION_INFO
|
||||
imply XIP
|
||||
help
|
||||
This option signifies the use of a CPU of the Cortex-M family.
|
||||
|
|
|
@ -929,9 +929,11 @@ static inline z_arch_esf_t *get_esf(uint32_t msp, uint32_t psp, uint32_t exc_ret
|
|||
* @param msp MSP value immediately after the exception occurred
|
||||
* @param psp PSP value immediately after the exception occurred
|
||||
* @param exc_return EXC_RETURN value present in LR after exception entry.
|
||||
* @param callee_regs Callee-saved registers (R4-R11, PSP)
|
||||
*
|
||||
*/
|
||||
void z_arm_fault(uint32_t msp, uint32_t psp, uint32_t exc_return)
|
||||
void z_arm_fault(uint32_t msp, uint32_t psp, uint32_t exc_return,
|
||||
_callee_saved_t *callee_regs)
|
||||
{
|
||||
uint32_t reason = K_ERR_CPU_EXCEPTION;
|
||||
int fault = SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk;
|
||||
|
@ -963,7 +965,20 @@ void z_arm_fault(uint32_t msp, uint32_t psp, uint32_t exc_return)
|
|||
}
|
||||
|
||||
/* Copy ESF */
|
||||
#if !defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
memcpy(&esf_copy, esf, sizeof(z_arch_esf_t));
|
||||
ARG_UNUSED(callee_regs);
|
||||
#else
|
||||
/* the extra exception info is not present in the original esf
|
||||
* so we only copy the fields before those.
|
||||
*/
|
||||
memcpy(&esf_copy, esf, offsetof(z_arch_esf_t, extra_info));
|
||||
esf_copy.extra_info = (struct __extra_esf_info) {
|
||||
.callee = callee_regs,
|
||||
.exc_return = exc_return,
|
||||
.msp = msp
|
||||
};
|
||||
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
|
||||
|
||||
/* Overwrite stacked IPSR to mark a nested exception,
|
||||
* or a return to Thread mode. Note that this may be
|
||||
|
|
|
@ -46,9 +46,10 @@ GTEXT(z_arm_exc_spurious)
|
|||
* - the MSP
|
||||
* - the PSP
|
||||
* - the EXC_RETURN value
|
||||
* - callee saved register state (r4-r11, psp)
|
||||
* as parameters to the z_arm_fault() C function that will perform the
|
||||
* rest of the fault handling (i.e. z_arm_fault(MSP, PSP, EXC_RETURN)).
|
||||
|
||||
* rest of the fault handling:
|
||||
* (i.e. z_arm_fault(MSP, PSP, EXC_RETURN, CALLEE_REGS)).
|
||||
* Provides these symbols:
|
||||
*
|
||||
* z_arm_hard_fault
|
||||
|
@ -78,12 +79,35 @@ SECTION_SUBSEC_FUNC(TEXT,__fault,z_arm_exc_spurious)
|
|||
|
||||
mrs r0, MSP
|
||||
mrs r1, PSP
|
||||
mov r2, lr /* EXC_RETURN */
|
||||
|
||||
push {r0, lr}
|
||||
|
||||
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
/* Build _callee_saved_t. To match the struct
|
||||
* definition we push the psp & then r11-r4
|
||||
*/
|
||||
push { r1, r2 }
|
||||
#if defined(CONFIG_ARMV6_M_ARMV8_M_BASELINE)
|
||||
mov r3, r11
|
||||
mov r2, r10
|
||||
push {r2, r3}
|
||||
mov r3, r9
|
||||
mov r2, r8
|
||||
push {r2, r3}
|
||||
push {r4-r7}
|
||||
#elif defined(CONFIG_ARMV7_M_ARMV8_M_MAINLINE)
|
||||
push {r4-r11}
|
||||
#endif
|
||||
mov r3, sp /* pointer to _callee_saved_t */
|
||||
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
|
||||
mov r2, lr /* EXC_RETURN */
|
||||
bl z_arm_fault
|
||||
|
||||
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
/* We do not need to restore any register state here
|
||||
* because we did not use any callee-saved registers
|
||||
* in this routine. Therefore, we can just reset
|
||||
* the MSP to its value prior to entering the function
|
||||
*/
|
||||
add sp, #40
|
||||
#endif
|
||||
pop {r0, pc}
|
||||
|
||||
.end
|
||||
|
|
|
@ -34,6 +34,18 @@ static void esf_dump(const z_arch_esf_t *esf)
|
|||
}
|
||||
LOG_ERR("fpscr: 0x%08x", esf->fpscr);
|
||||
#endif
|
||||
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
const struct _callee_saved *callee = esf->extra_info.callee;
|
||||
|
||||
if (callee != NULL) {
|
||||
LOG_ERR("r4/v1: 0x%08x r5/v2: 0x%08x r6/v3: 0x%08x",
|
||||
callee->v1, callee->v2, callee->v3);
|
||||
LOG_ERR("r7/v4: 0x%08x r8/v5: 0x%08x r9/v6: 0x%08x",
|
||||
callee->v4, callee->v5, callee->v6);
|
||||
LOG_ERR("r10/v7: 0x%08x r11/v8: 0x%08x psp: 0x%08x",
|
||||
callee->v7, callee->v8, callee->psp);
|
||||
}
|
||||
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
|
||||
LOG_ERR("Faulting instruction address (r15/pc): 0x%08x",
|
||||
esf->basic.pc);
|
||||
}
|
||||
|
@ -83,7 +95,20 @@ void z_do_kernel_oops(const z_arch_esf_t *esf)
|
|||
}
|
||||
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
#if !defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
z_arm_fatal_error(reason, esf);
|
||||
#else
|
||||
/* extra exception info is not collected for kernel oops
|
||||
* path today so we make a copy of the ESF and zero out
|
||||
* that information
|
||||
*/
|
||||
z_arch_esf_t esf_copy;
|
||||
|
||||
memcpy(&esf_copy, esf, offsetof(z_arch_esf_t, extra_info));
|
||||
esf_copy.extra_info = (struct __extra_esf_info) { 0 };
|
||||
z_arm_fatal_error(reason, &esf_copy);
|
||||
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
|
||||
}
|
||||
|
||||
FUNC_NORETURN void arch_syscall_oops(void *ssf_ptr)
|
||||
|
|
|
@ -14,6 +14,7 @@ config CPU_NIOS2_GEN2
|
|||
bool
|
||||
default y
|
||||
select BUILD_OUTPUT_HEX
|
||||
select ARCH_HAS_EXTRA_EXCEPTION_INFO
|
||||
help
|
||||
This option signifies the use of a Nios II Gen 2 CPU
|
||||
|
||||
|
|
|
@ -73,6 +73,20 @@ GTEXT(z_arm_exc_exit);
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Additional register state that is not stacked by hardware on exception
|
||||
* entry.
|
||||
*
|
||||
* These fields are ONLY valid in the ESF copy passed into z_arm_fatal_error().
|
||||
* When information for a member is unavailable, the field is set to zero.
|
||||
*/
|
||||
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
struct __extra_esf_info {
|
||||
_callee_saved_t *callee;
|
||||
uint32_t msp;
|
||||
uint32_t exc_return;
|
||||
};
|
||||
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
|
||||
|
||||
struct __esf {
|
||||
struct __basic_sf {
|
||||
sys_define_gpr_with_alias(a1, r0);
|
||||
|
@ -89,6 +103,9 @@ struct __esf {
|
|||
uint32_t fpscr;
|
||||
uint32_t undefined;
|
||||
#endif
|
||||
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
struct __extra_esf_info extra_info;
|
||||
#endif
|
||||
};
|
||||
|
||||
extern uint32_t z_arm_coredump_fault_sp;
|
||||
|
|
|
@ -11,6 +11,82 @@
|
|||
static volatile int test_flag;
|
||||
static volatile int expected_reason = -1;
|
||||
|
||||
/* Used to validate ESF collection during a fault */
|
||||
static volatile int run_esf_validation;
|
||||
static volatile int esf_validation_rv;
|
||||
static volatile uint32_t expected_msp;
|
||||
static K_THREAD_STACK_DEFINE(esf_collection_stack, 1024);
|
||||
static struct k_thread esf_collection_thread;
|
||||
#define MAIN_PRIORITY 7
|
||||
#define PRIORITY 5
|
||||
|
||||
/**
|
||||
* Validates that pEsf matches state from set_regs_with_known_pattern()
|
||||
*/
|
||||
static int check_esf_matches_expectations(const z_arch_esf_t *pEsf)
|
||||
{
|
||||
const uint16_t expected_fault_instruction = 0xde5a; /* udf #90 */
|
||||
const bool caller_regs_match_expected =
|
||||
(pEsf->basic.r0 == 0) &&
|
||||
(pEsf->basic.r1 == 1) &&
|
||||
(pEsf->basic.r2 == 2) &&
|
||||
(pEsf->basic.r3 == 3) &&
|
||||
(pEsf->basic.lr == 15) &&
|
||||
(*(uint16_t *)pEsf->basic.pc == expected_fault_instruction);
|
||||
if (!caller_regs_match_expected) {
|
||||
printk("__basic_sf member of ESF is incorrect\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
|
||||
const struct _callee_saved *callee_regs = pEsf->extra_info.callee;
|
||||
const bool callee_regs_match_expected =
|
||||
(callee_regs->v1 /* r4 */ == 4) &&
|
||||
(callee_regs->v2 /* r5 */ == 5) &&
|
||||
(callee_regs->v3 /* r6 */ == 6) &&
|
||||
(callee_regs->v4 /* r7 */ == 7) &&
|
||||
(callee_regs->v5 /* r8 */ == 8) &&
|
||||
(callee_regs->v6 /* r9 */ == 9) &&
|
||||
(callee_regs->v7 /* r10 */ == 10) &&
|
||||
(callee_regs->v8 /* r11 */ == 11);
|
||||
if (!callee_regs_match_expected) {
|
||||
printk("_callee_saved_t member of ESF is incorrect\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* we expect the EXC_RETURN value to have:
|
||||
* - PREFIX: bits [31:24] = 0xFF
|
||||
* - Mode, bit [3] = 1 since exception occurred from thread mode
|
||||
* - SPSEL, bit [2] = 1 since frame should reside on PSP
|
||||
*/
|
||||
const uint32_t exc_bits_set_mask = 0xff00000C;
|
||||
|
||||
if ((pEsf->extra_info.exc_return & exc_bits_set_mask) !=
|
||||
exc_bits_set_mask) {
|
||||
printk("Incorrect EXC_RETURN of 0x%08x",
|
||||
pEsf->extra_info.exc_return);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* the psp should match the contents of the esf copy up
|
||||
* to the xpsr. (the xpsr value in the copy used for pEsf
|
||||
* is overwritten in fault.c)
|
||||
*/
|
||||
if (memcmp((void *)callee_regs->psp, pEsf,
|
||||
offsetof(struct __esf, basic.xpsr)) != 0) {
|
||||
printk("psp does not match __basic_sf provided\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pEsf->extra_info.msp != expected_msp) {
|
||||
printk("MSP is 0x%08x but should be 0x%08x",
|
||||
pEsf->extra_info.msp, expected_msp);
|
||||
return -1;
|
||||
}
|
||||
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void k_sys_fatal_error_handler(unsigned int reason, const z_arch_esf_t *pEsf)
|
||||
{
|
||||
TC_PRINT("Caught system error -- reason %d\n", reason);
|
||||
|
@ -22,13 +98,90 @@ void k_sys_fatal_error_handler(unsigned int reason, const z_arch_esf_t *pEsf)
|
|||
|
||||
if (reason != expected_reason) {
|
||||
printk("Wrong crash type got %d expected %d\n", reason,
|
||||
expected_reason);
|
||||
expected_reason);
|
||||
k_fatal_halt(reason);
|
||||
}
|
||||
|
||||
if (run_esf_validation) {
|
||||
if (check_esf_matches_expectations(pEsf) == 0) {
|
||||
esf_validation_rv = TC_PASS;
|
||||
}
|
||||
run_esf_validation = 0;
|
||||
}
|
||||
|
||||
expected_reason = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set ARM registers with a known pattern:
|
||||
* r0-r12 are set to 0...12, respectively
|
||||
* r13 (sp) is left untouched
|
||||
* r14 (pc) will point to the faulting instruction (udf #90)
|
||||
* r15 (lr) is set to 15 (since a fault takes place, we never use the value)
|
||||
*
|
||||
* Note: Routine was written to be ARMV6M compatible
|
||||
*
|
||||
* In k_sys_fatal_error_handler above we will check that the ESF provided
|
||||
* as a parameter matches these expectations.
|
||||
*/
|
||||
void set_regs_with_known_pattern(void)
|
||||
{
|
||||
__asm__ volatile(
|
||||
"mov r1, #1\n"
|
||||
"mov r2, #2\n"
|
||||
"mov r3, #3\n"
|
||||
"mov r4, #4\n"
|
||||
"mov r5, #5\n"
|
||||
"mov r6, #6\n"
|
||||
"mov r7, #7\n"
|
||||
"mov r0, #8\n"
|
||||
"mov r8, r0\n"
|
||||
"add r0, r0, #1\n"
|
||||
"mov r9, r0\n"
|
||||
"add r0, r0, #1\n"
|
||||
"mov r10, r0\n"
|
||||
"add r0, r0, #1\n"
|
||||
"mov r11, r0\n"
|
||||
"add r0, r0, #1\n"
|
||||
"mov r12, r0\n"
|
||||
"add r0, r0, #3\n"
|
||||
"mov lr, r0\n"
|
||||
"mov r0, #0\n"
|
||||
"udf #90\n"
|
||||
);
|
||||
}
|
||||
|
||||
void test_arm_esf_collection(void)
|
||||
{
|
||||
/* if the check in the fault handler succeeds,
|
||||
* this will be set to TC_PASS
|
||||
*/
|
||||
esf_validation_rv = TC_FAIL;
|
||||
|
||||
/* since the fault is from a task, the interrupt stack (msp)
|
||||
* should match whatever the current value is
|
||||
*/
|
||||
expected_msp = __get_MSP();
|
||||
|
||||
run_esf_validation = 1;
|
||||
expected_reason = K_ERR_CPU_EXCEPTION;
|
||||
|
||||
/* Run test thread and main thread at same priority to guarantee the
|
||||
* crashy thread we create below runs to completion before we get
|
||||
* to the end of this function
|
||||
*/
|
||||
k_thread_priority_set(_current, K_PRIO_PREEMPT(MAIN_PRIORITY));
|
||||
|
||||
TC_PRINT("Testing ESF Reporting\n");
|
||||
k_thread_create(&esf_collection_thread, esf_collection_stack,
|
||||
K_THREAD_STACK_SIZEOF(esf_collection_stack),
|
||||
(k_thread_entry_t)set_regs_with_known_pattern,
|
||||
NULL, NULL, NULL, K_PRIO_COOP(PRIORITY), 0,
|
||||
K_NO_WAIT);
|
||||
zassert_not_equal(esf_validation_rv, TC_FAIL,
|
||||
"ESF fault collection failed");
|
||||
}
|
||||
|
||||
void arm_isr_handler(void *args)
|
||||
{
|
||||
ARG_UNUSED(args);
|
||||
|
|
|
@ -8,11 +8,13 @@
|
|||
|
||||
extern void test_arm_interrupt(void);
|
||||
extern void test_arm_user_interrupt(void);
|
||||
extern void test_arm_esf_collection(void);
|
||||
|
||||
void test_main(void)
|
||||
{
|
||||
ztest_test_suite(arm_interrupt,
|
||||
ztest_unit_test(test_arm_interrupt),
|
||||
ztest_unit_test(test_arm_esf_collection),
|
||||
ztest_user_unit_test(test_arm_user_interrupt));
|
||||
ztest_run_test_suite(arm_interrupt);
|
||||
}
|
||||
|
|
|
@ -11,3 +11,9 @@ tests:
|
|||
- CONFIG_NO_OPTIMIZATIONS=y
|
||||
- CONFIG_IDLE_STACK_SIZE=512
|
||||
- CONFIG_MAIN_STACK_SIZE=1024
|
||||
arch.interrupt.extra_exception_info:
|
||||
filter: CONFIG_ARMV6_M_ARMV8_M_BASELINE or CONFIG_ARMV7_M_ARMV8_M_MAINLINE
|
||||
tags: arm interrupt ignore_faults
|
||||
arch_allow: arm
|
||||
extra_configs:
|
||||
- CONFIG_EXTRA_EXCEPTION_INFO=y
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue