arch: arm: Collect full register state in Cortex-M Exception Stack Frame

To debug hard-to-reproduce faults/panics, it's helpful to get the full
register state at the time a fault occurred. This enables recovering
full backtraces and the state of local variables at the time of a
crash.

This PR introduces a new Kconfig option, CONFIG_EXTRA_EXCEPTION_INFO,
to facilitate this use case. The option enables the capturing of the
callee-saved register state (r4-r11 & exc_return) during a fault. The
info is forwarded to `k_sys_fatal_error_handler` in the z_arch_esf_t
parameter. From there, the data can be saved for post-mortem analysis.

To test the functionality a new unit test was added to
tests/arch/arm_interrupt which verifies the register contents passed
in the argument match the state leading up to a crash.

Signed-off-by: Chris Coleman <chris@memfault.com>
This commit is contained in:
Chris Coleman 2020-08-28 09:02:20 -04:00 committed by Ioannis Glaropoulos
commit 99a268fa16
10 changed files with 264 additions and 8 deletions

View file

@ -367,6 +367,15 @@ config IRQ_OFFLOAD
run in interrupt context. Only useful for test cases that need
to validate the correctness of kernel objects in IRQ context.
config EXTRA_EXCEPTION_INFO
bool "Collect extra exception info"
depends on ARCH_HAS_EXTRA_EXCEPTION_INFO
help
This option enables the collection of extra information, such as
register state, when a fault occurs. This information can be useful
to collect for post-mortem analysis and debug of issues.
endmenu # Interrupt configuration
endmenu
@ -399,6 +408,9 @@ config ARCH_HAS_NESTED_EXCEPTION_DETECTION
config ARCH_SUPPORTS_COREDUMP
bool
config ARCH_HAS_EXTRA_EXCEPTION_INFO
bool
#
# Other architecture related options
#

View file

@ -17,6 +17,7 @@ config CPU_CORTEX_M
select ARCH_HAS_RAMFUNC_SUPPORT
select ARCH_HAS_NESTED_EXCEPTION_DETECTION
select SWAP_NONATOMIC
select ARCH_HAS_EXTRA_EXCEPTION_INFO
imply XIP
help
This option signifies the use of a CPU of the Cortex-M family.

View file

@ -929,9 +929,11 @@ static inline z_arch_esf_t *get_esf(uint32_t msp, uint32_t psp, uint32_t exc_ret
* @param msp MSP value immediately after the exception occurred
* @param psp PSP value immediately after the exception occurred
* @param exc_return EXC_RETURN value present in LR after exception entry.
* @param callee_regs Callee-saved registers (R4-R11, PSP)
*
*/
void z_arm_fault(uint32_t msp, uint32_t psp, uint32_t exc_return)
void z_arm_fault(uint32_t msp, uint32_t psp, uint32_t exc_return,
_callee_saved_t *callee_regs)
{
uint32_t reason = K_ERR_CPU_EXCEPTION;
int fault = SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk;
@ -963,7 +965,20 @@ void z_arm_fault(uint32_t msp, uint32_t psp, uint32_t exc_return)
}
/* Copy ESF */
#if !defined(CONFIG_EXTRA_EXCEPTION_INFO)
memcpy(&esf_copy, esf, sizeof(z_arch_esf_t));
ARG_UNUSED(callee_regs);
#else
/* the extra exception info is not present in the original esf
* so we only copy the fields before those.
*/
memcpy(&esf_copy, esf, offsetof(z_arch_esf_t, extra_info));
esf_copy.extra_info = (struct __extra_esf_info) {
.callee = callee_regs,
.exc_return = exc_return,
.msp = msp
};
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
/* Overwrite stacked IPSR to mark a nested exception,
* or a return to Thread mode. Note that this may be

View file

@ -46,9 +46,10 @@ GTEXT(z_arm_exc_spurious)
* - the MSP
* - the PSP
* - the EXC_RETURN value
* - callee saved register state (r4-r11, psp)
* as parameters to the z_arm_fault() C function that will perform the
* rest of the fault handling (i.e. z_arm_fault(MSP, PSP, EXC_RETURN)).
* rest of the fault handling:
* (i.e. z_arm_fault(MSP, PSP, EXC_RETURN, CALLEE_REGS)).
* Provides these symbols:
*
* z_arm_hard_fault
@ -78,12 +79,35 @@ SECTION_SUBSEC_FUNC(TEXT,__fault,z_arm_exc_spurious)
mrs r0, MSP
mrs r1, PSP
mov r2, lr /* EXC_RETURN */
push {r0, lr}
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
/* Build _callee_saved_t. To match the struct
* definition we push the psp & then r11-r4
*/
push { r1, r2 }
#if defined(CONFIG_ARMV6_M_ARMV8_M_BASELINE)
mov r3, r11
mov r2, r10
push {r2, r3}
mov r3, r9
mov r2, r8
push {r2, r3}
push {r4-r7}
#elif defined(CONFIG_ARMV7_M_ARMV8_M_MAINLINE)
push {r4-r11}
#endif
mov r3, sp /* pointer to _callee_saved_t */
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
mov r2, lr /* EXC_RETURN */
bl z_arm_fault
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
/* We do not need to restore any register state here
* because we did not use any callee-saved registers
* in this routine. Therefore, we can just reset
* the MSP to its value prior to entering the function
*/
add sp, #40
#endif
pop {r0, pc}
.end

View file

@ -34,6 +34,18 @@ static void esf_dump(const z_arch_esf_t *esf)
}
LOG_ERR("fpscr: 0x%08x", esf->fpscr);
#endif
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
const struct _callee_saved *callee = esf->extra_info.callee;
if (callee != NULL) {
LOG_ERR("r4/v1: 0x%08x r5/v2: 0x%08x r6/v3: 0x%08x",
callee->v1, callee->v2, callee->v3);
LOG_ERR("r7/v4: 0x%08x r8/v5: 0x%08x r9/v6: 0x%08x",
callee->v4, callee->v5, callee->v6);
LOG_ERR("r10/v7: 0x%08x r11/v8: 0x%08x psp: 0x%08x",
callee->v7, callee->v8, callee->psp);
}
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
LOG_ERR("Faulting instruction address (r15/pc): 0x%08x",
esf->basic.pc);
}
@ -83,7 +95,20 @@ void z_do_kernel_oops(const z_arch_esf_t *esf)
}
#endif /* CONFIG_USERSPACE */
#if !defined(CONFIG_EXTRA_EXCEPTION_INFO)
z_arm_fatal_error(reason, esf);
#else
/* extra exception info is not collected for kernel oops
* path today so we make a copy of the ESF and zero out
* that information
*/
z_arch_esf_t esf_copy;
memcpy(&esf_copy, esf, offsetof(z_arch_esf_t, extra_info));
esf_copy.extra_info = (struct __extra_esf_info) { 0 };
z_arm_fatal_error(reason, &esf_copy);
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
}
FUNC_NORETURN void arch_syscall_oops(void *ssf_ptr)

View file

@ -14,6 +14,7 @@ config CPU_NIOS2_GEN2
bool
default y
select BUILD_OUTPUT_HEX
select ARCH_HAS_EXTRA_EXCEPTION_INFO
help
This option signifies the use of a Nios II Gen 2 CPU

View file

@ -73,6 +73,20 @@ GTEXT(z_arm_exc_exit);
extern "C" {
#endif
/* Additional register state that is not stacked by hardware on exception
* entry.
*
* These fields are ONLY valid in the ESF copy passed into z_arm_fatal_error().
* When information for a member is unavailable, the field is set to zero.
*/
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
struct __extra_esf_info {
_callee_saved_t *callee;
uint32_t msp;
uint32_t exc_return;
};
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
struct __esf {
struct __basic_sf {
sys_define_gpr_with_alias(a1, r0);
@ -89,6 +103,9 @@ struct __esf {
uint32_t fpscr;
uint32_t undefined;
#endif
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
struct __extra_esf_info extra_info;
#endif
};
extern uint32_t z_arm_coredump_fault_sp;

View file

@ -11,6 +11,82 @@
static volatile int test_flag;
static volatile int expected_reason = -1;
/* Used to validate ESF collection during a fault */
static volatile int run_esf_validation;
static volatile int esf_validation_rv;
static volatile uint32_t expected_msp;
static K_THREAD_STACK_DEFINE(esf_collection_stack, 1024);
static struct k_thread esf_collection_thread;
#define MAIN_PRIORITY 7
#define PRIORITY 5
/**
* Validates that pEsf matches state from set_regs_with_known_pattern()
*/
static int check_esf_matches_expectations(const z_arch_esf_t *pEsf)
{
const uint16_t expected_fault_instruction = 0xde5a; /* udf #90 */
const bool caller_regs_match_expected =
(pEsf->basic.r0 == 0) &&
(pEsf->basic.r1 == 1) &&
(pEsf->basic.r2 == 2) &&
(pEsf->basic.r3 == 3) &&
(pEsf->basic.lr == 15) &&
(*(uint16_t *)pEsf->basic.pc == expected_fault_instruction);
if (!caller_regs_match_expected) {
printk("__basic_sf member of ESF is incorrect\n");
return -1;
}
#if defined(CONFIG_EXTRA_EXCEPTION_INFO)
const struct _callee_saved *callee_regs = pEsf->extra_info.callee;
const bool callee_regs_match_expected =
(callee_regs->v1 /* r4 */ == 4) &&
(callee_regs->v2 /* r5 */ == 5) &&
(callee_regs->v3 /* r6 */ == 6) &&
(callee_regs->v4 /* r7 */ == 7) &&
(callee_regs->v5 /* r8 */ == 8) &&
(callee_regs->v6 /* r9 */ == 9) &&
(callee_regs->v7 /* r10 */ == 10) &&
(callee_regs->v8 /* r11 */ == 11);
if (!callee_regs_match_expected) {
printk("_callee_saved_t member of ESF is incorrect\n");
return -1;
}
/* we expect the EXC_RETURN value to have:
* - PREFIX: bits [31:24] = 0xFF
* - Mode, bit [3] = 1 since exception occurred from thread mode
* - SPSEL, bit [2] = 1 since frame should reside on PSP
*/
const uint32_t exc_bits_set_mask = 0xff00000C;
if ((pEsf->extra_info.exc_return & exc_bits_set_mask) !=
exc_bits_set_mask) {
printk("Incorrect EXC_RETURN of 0x%08x",
pEsf->extra_info.exc_return);
return -1;
}
/* the psp should match the contents of the esf copy up
* to the xpsr. (the xpsr value in the copy used for pEsf
* is overwritten in fault.c)
*/
if (memcmp((void *)callee_regs->psp, pEsf,
offsetof(struct __esf, basic.xpsr)) != 0) {
printk("psp does not match __basic_sf provided\n");
return -1;
}
if (pEsf->extra_info.msp != expected_msp) {
printk("MSP is 0x%08x but should be 0x%08x",
pEsf->extra_info.msp, expected_msp);
return -1;
}
#endif /* CONFIG_EXTRA_EXCEPTION_INFO */
return 0;
}
void k_sys_fatal_error_handler(unsigned int reason, const z_arch_esf_t *pEsf)
{
TC_PRINT("Caught system error -- reason %d\n", reason);
@ -22,13 +98,90 @@ void k_sys_fatal_error_handler(unsigned int reason, const z_arch_esf_t *pEsf)
if (reason != expected_reason) {
printk("Wrong crash type got %d expected %d\n", reason,
expected_reason);
expected_reason);
k_fatal_halt(reason);
}
if (run_esf_validation) {
if (check_esf_matches_expectations(pEsf) == 0) {
esf_validation_rv = TC_PASS;
}
run_esf_validation = 0;
}
expected_reason = -1;
}
/**
* Set ARM registers with a known pattern:
* r0-r12 are set to 0...12, respectively
* r13 (sp) is left untouched
* r14 (pc) will point to the faulting instruction (udf #90)
* r15 (lr) is set to 15 (since a fault takes place, we never use the value)
*
* Note: Routine was written to be ARMV6M compatible
*
* In k_sys_fatal_error_handler above we will check that the ESF provided
* as a parameter matches these expectations.
*/
void set_regs_with_known_pattern(void)
{
__asm__ volatile(
"mov r1, #1\n"
"mov r2, #2\n"
"mov r3, #3\n"
"mov r4, #4\n"
"mov r5, #5\n"
"mov r6, #6\n"
"mov r7, #7\n"
"mov r0, #8\n"
"mov r8, r0\n"
"add r0, r0, #1\n"
"mov r9, r0\n"
"add r0, r0, #1\n"
"mov r10, r0\n"
"add r0, r0, #1\n"
"mov r11, r0\n"
"add r0, r0, #1\n"
"mov r12, r0\n"
"add r0, r0, #3\n"
"mov lr, r0\n"
"mov r0, #0\n"
"udf #90\n"
);
}
void test_arm_esf_collection(void)
{
/* if the check in the fault handler succeeds,
* this will be set to TC_PASS
*/
esf_validation_rv = TC_FAIL;
/* since the fault is from a task, the interrupt stack (msp)
* should match whatever the current value is
*/
expected_msp = __get_MSP();
run_esf_validation = 1;
expected_reason = K_ERR_CPU_EXCEPTION;
/* Run test thread and main thread at same priority to guarantee the
* crashy thread we create below runs to completion before we get
* to the end of this function
*/
k_thread_priority_set(_current, K_PRIO_PREEMPT(MAIN_PRIORITY));
TC_PRINT("Testing ESF Reporting\n");
k_thread_create(&esf_collection_thread, esf_collection_stack,
K_THREAD_STACK_SIZEOF(esf_collection_stack),
(k_thread_entry_t)set_regs_with_known_pattern,
NULL, NULL, NULL, K_PRIO_COOP(PRIORITY), 0,
K_NO_WAIT);
zassert_not_equal(esf_validation_rv, TC_FAIL,
"ESF fault collection failed");
}
void arm_isr_handler(void *args)
{
ARG_UNUSED(args);

View file

@ -8,11 +8,13 @@
extern void test_arm_interrupt(void);
extern void test_arm_user_interrupt(void);
extern void test_arm_esf_collection(void);
void test_main(void)
{
ztest_test_suite(arm_interrupt,
ztest_unit_test(test_arm_interrupt),
ztest_unit_test(test_arm_esf_collection),
ztest_user_unit_test(test_arm_user_interrupt));
ztest_run_test_suite(arm_interrupt);
}

View file

@ -11,3 +11,9 @@ tests:
- CONFIG_NO_OPTIMIZATIONS=y
- CONFIG_IDLE_STACK_SIZE=512
- CONFIG_MAIN_STACK_SIZE=1024
arch.interrupt.extra_exception_info:
filter: CONFIG_ARMV6_M_ARMV8_M_BASELINE or CONFIG_ARMV7_M_ARMV8_M_MAINLINE
tags: arm interrupt ignore_faults
arch_allow: arm
extra_configs:
- CONFIG_EXTRA_EXCEPTION_INFO=y