panic: Add support for SoC-level panic

SoC level exceptions such as watchdog timer and cache errors are now supported.
Such exceptions now triggers a panic, giving more information about how
and when it happened.
pull/6365/head
Omar Chebib 2020-12-03 17:17:43 +08:00
rodzic e20833124e
commit b6a450f824
9 zmienionych plików z 346 dodań i 20 usunięć

Wyświetl plik

@ -19,13 +19,81 @@
data from the bus.
*/
#include <stdint.h>
#include "sdkconfig.h"
#include "esp_attr.h"
#include "freertos/FreeRTOS.h"
#include "esp_intr_alloc.h"
#include "soc/extmem_reg.h"
#include "soc/periph_defs.h"
#include "riscv/interrupt.h"
// TODO ESP32-C3 IDF-2450
void esp_cache_err_int_init(void)
{
const uint32_t core_id = 0;
/* Disable cache interrupts if enabled. */
ESP_INTR_DISABLE(ETS_CACHEERR_INUM);
/**
* Bind all cache errors to ETS_CACHEERR_INUM interrupt. we will deal with
* them in handler by different types
* I) Cache access error
* 1. dbus trying to write to icache
* 2. dbus authentication fail
* 3. cpu access icache while dbus is disabled [1]
* 4. ibus authentication fail
* 5. ibus trying to write icache
* 6. cpu access icache while ibus is disabled
* II) Cache illegal error
* 1. dbus counter overflow
* 2. ibus counter overflow
* 3. mmu entry fault
* 4. icache preload configurations fault
* 5. icache sync configuration fault
*
* [1]: On ESP32C3 boards, the caches are shared but buses are still
* distinct. So, we have an ibus and a dbus sharing the same cache.
* This error can occur if the dbus performs a request but the icache
* (or simply cache) is disabled.
*/
intr_matrix_set(core_id, ETS_CACHE_IA_INTR_SOURCE, ETS_CACHEERR_INUM);
intr_matrix_set(core_id, ETS_CACHE_CORE0_ACS_INTR_SOURCE, ETS_CACHEERR_INUM);
/* Set the type and priority to cache error interrupts. */
esprv_intc_int_set_type(BIT(ETS_CACHEERR_INUM), INTR_TYPE_LEVEL);
esprv_intc_int_set_priority(ETS_CACHEERR_INUM, 4);
/* On the hardware side, stat by clearing all the bits reponsible for
* enabling cache access error interrupts. */
SET_PERI_REG_MASK(EXTMEM_CORE0_ACS_CACHE_INT_CLR_REG,
EXTMEM_CORE0_DBUS_WR_IC_INT_CLR |
EXTMEM_CORE0_DBUS_REJECT_INT_CLR |
EXTMEM_CORE0_DBUS_ACS_MSK_IC_INT_CLR |
EXTMEM_CORE0_IBUS_REJECT_INT_CLR |
EXTMEM_CORE0_IBUS_WR_IC_INT_CLR |
EXTMEM_CORE0_IBUS_ACS_MSK_IC_INT_CLR);
/* Enable these interrupts. */
SET_PERI_REG_MASK(EXTMEM_CORE0_ACS_CACHE_INT_ENA_REG,
EXTMEM_CORE0_DBUS_WR_IC_INT_ENA |
EXTMEM_CORE0_DBUS_REJECT_INT_ENA |
EXTMEM_CORE0_DBUS_ACS_MSK_IC_INT_ENA |
EXTMEM_CORE0_IBUS_REJECT_INT_ENA |
EXTMEM_CORE0_IBUS_WR_IC_INT_ENA |
EXTMEM_CORE0_IBUS_ACS_MSK_IC_INT_ENA);
/* Same goes for cache illegal error: start by clearing the bits and then
* set them back. */
SET_PERI_REG_MASK(EXTMEM_CACHE_ILG_INT_CLR_REG,
EXTMEM_MMU_ENTRY_FAULT_INT_CLR |
EXTMEM_ICACHE_PRELOAD_OP_FAULT_INT_CLR |
EXTMEM_ICACHE_SYNC_OP_FAULT_INT_CLR);
SET_PERI_REG_MASK(EXTMEM_CACHE_ILG_INT_ENA_REG,
EXTMEM_MMU_ENTRY_FAULT_INT_ENA |
EXTMEM_ICACHE_PRELOAD_OP_FAULT_INT_ENA |
EXTMEM_ICACHE_SYNC_OP_FAULT_INT_ENA);
/* Enable the interrupts for cache error. */
ESP_INTR_ENABLE(ETS_CACHEERR_INUM);
}
int IRAM_ATTR esp_cache_err_get_cpuid(void)

Wyświetl plik

@ -31,6 +31,7 @@
#include "esp_private/system_internal.h"
#include "hal/timer_types.h"
#include "hal/wdt_hal.h"
#include "hal/interrupt_controller_hal.h"
#if CONFIG_ESP_INT_WDT
@ -106,6 +107,7 @@ void esp_int_wdt_init(void) {
wdt_hal_write_protect_disable(&iwdt_context);
//The timer configs initially are set to 5 seconds, to make sure the CPU can start up. The tick hook sets
//it to their actual value.
//1st stage timeout: interrupt
wdt_hal_config_stage(&iwdt_context, WDT_STAGE0, IWDT_INITIAL_TIMEOUT_S * 1000000 / IWDT_TICKS_PER_US, WDT_STAGE_ACTION_INT);
//2nd stage timeout: reset system
@ -121,6 +123,16 @@ void esp_int_wdt_cpu_init(void)
esp_register_freertos_tick_hook_for_cpu(tick_hook, xPortGetCoreID());
ESP_INTR_DISABLE(WDT_INT_NUM);
intr_matrix_set(xPortGetCoreID(), ETS_TG1_WDT_LEVEL_INTR_SOURCE, WDT_INT_NUM);
/* Set the type and priority to cache error interrupts, if supported. */
#if SOC_INTERRUPT_TYPE_CAN_SET
interrupt_controller_hal_set_type(BIT(WDT_INT_NUM), INTR_TYPE_LEVEL);
#endif
#if SOC_INTERRUPT_LEVEL_CAN_SET
interrupt_controller_hal_set_level(WDT_INT_NUM, 4);
#endif
#if CONFIG_ESP32_ECO3_CACHE_LOCK_FIX
/*
* This is a workaround for issue 3.15 in "ESP32 ECO and workarounds for
@ -133,6 +145,7 @@ void esp_int_wdt_cpu_init(void)
_l4_intr_livelock_max = CONFIG_ESP_INT_WDT_TIMEOUT_MS/IWDT_LIVELOCK_TIMEOUT_MS - 1;
}
#endif
//We do not register a handler for the interrupt because it is interrupt level 4 which
//is not servicable from C. Instead, xtensa_vectors.S has a call to the panic handler for
//this interrupt.

Wyświetl plik

@ -14,10 +14,136 @@
// limitations under the License.
#include <stdio.h>
#include "soc/extmem_reg.h"
#include "esp_private/panic_internal.h"
#include "esp_private/panic_reason.h"
#include "riscv/rvruntime-frames.h"
extern void esp_panic_handler(panic_info_t *);
#if CONFIG_IDF_TARGET_ESP32C3
#include "esp32c3/cache_err_int.h"
#endif
#define DIM(array) (sizeof(array)/sizeof(*array))
/**
* Structure used to define a flag/bit to test in case of cache error.
* The message describes the cause of the error when the bit is set in
* a given status register.
*/
typedef struct {
const uint32_t bit;
const char* msg;
} register_bit_t;
/**
* Function to check each bits defined in the array reg_bits in the given
* status register. The first bit from the array to be set in the status
* register will have its associated message printed. This function returns
* true. If not bit was set in the register, it returns false.
* The order of the bits in the array is important as only the first bit to
* be set in the register will have its associated message printed.
*/
static inline bool test_and_print_register_bits(const uint32_t status,
const register_bit_t* reg_bits,
const uint32_t size)
{
/* Browse the flag/bit array and test each one with the given status
* register. */
for (int i = 0; i < size; i++) {
const uint32_t bit = reg_bits[i].bit;
if ((status & bit) == bit) {
/* Reason of the panic found, print the reason. */
panic_print_str(reg_bits[i].msg);
panic_print_str("\r\n");
return true;
}
}
/* Panic cause not found, no message was printed. */
return false;
}
/**
* Function called when a cache error occurs. It prints details such as the
* explanation of why the panic occured.
*/
static inline void print_cache_err_details(const void* frame)
{
/* Define the array that contains the status (bits) to test on the register
* EXTMEM_CORE0_ACS_CACHE_INT_ST_REG. each bit is accompanied by a small
* message.
* The messages have been pulled from the header file where the status bit
* are defined. */
const register_bit_t core0_acs_bits[] = {
{
.bit = EXTMEM_CORE0_DBUS_WR_ICACHE_ST,
.msg = "dbus tried to write cache"
},
{
.bit = EXTMEM_CORE0_DBUS_REJECT_ST,
.msg = "dbus authentication failed"
},
{
.bit = EXTMEM_CORE0_DBUS_ACS_MSK_ICACHE_ST,
.msg = "access to cache while dbus or cache is disabled"
},
{
.bit = EXTMEM_CORE0_IBUS_REJECT_ST,
.msg = "ibus authentication failed"
},
{
.bit = EXTMEM_CORE0_IBUS_WR_ICACHE_ST,
.msg = "ibus tried to write cache"
},
{
.bit = EXTMEM_CORE0_IBUS_ACS_MSK_ICACHE_ST,
.msg = "access to cache while ibus or cache is disabled"
},
};
/* Same goes for the register EXTMEM_CACHE_ILG_INT_ST_REG and its bits. */
const register_bit_t cache_ilg_bits[] = {
{
.bit = EXTMEM_MMU_ENTRY_FAULT_ST,
.msg = "MMU entry fault"
},
{
.bit = EXTMEM_ICACHE_PRELOAD_OP_FAULT_ST,
.msg = "preload configurations fault"
},
{
.bit = EXTMEM_ICACHE_SYNC_OP_FAULT_ST,
.msg = "sync configurations fault"
},
};
/* Read the status register EXTMEM_CORE0_ACS_CACHE_INT_ST_REG. This status
* register is not equal to 0 when a cache access error occured. */
const uint32_t core0_status = REG_READ(EXTMEM_CORE0_ACS_CACHE_INT_ST_REG);
/* If the panic is due to a cache access error, one of the bit of the
* register is set. Thus, this function will return true. */
bool handled = test_and_print_register_bits(core0_status, core0_acs_bits, DIM(core0_acs_bits));
/* If the panic was due to a cache illegal error, the previous call returned false and this
* EXTMEM_CACHE_ILG_INT_ST_REG register should not me equal to 0.
* Check each bit of it and print the message associated if found. */
if (!handled) {
const uint32_t cache_ilg_status = REG_READ(EXTMEM_CACHE_ILG_INT_ST_REG);
handled = test_and_print_register_bits(cache_ilg_status, cache_ilg_bits, DIM(cache_ilg_bits));
/* If the error was not found, print the both registers value */
if (!handled) {
panic_print_str("EXTMEM_CORE0_ACS_CACHE_INT_ST_REG = 0x");
panic_print_hex(core0_status);
panic_print_str("\r\nEXTMEM_CACHE_ILG_INT_ST_REG = 0x");
panic_print_hex(cache_ilg_status);
panic_print_str("\r\n");
}
}
}
void panic_print_registers(const void *f, int core)
{
@ -49,10 +175,48 @@ void panic_print_registers(const void *f, int core)
}
}
/**
* This function will be called when a SoC-level panic occurs.
* SoC-level panics include cache errors and watchdog interrupts.
*/
void panic_soc_fill_info(void *f, panic_info_t *info)
{
// TODO ESP32-C3 IDF-2386 / support soc panic
return;
RvExcFrame *frame = (RvExcFrame *) f;
/* Please keep in sync with PANIC_RSN_* defines */
static const char *pseudo_reason[PANIC_RSN_MAX + 1] = {
"Unknown reason",
"Interrupt wdt timeout on CPU0",
"Interrupt wdt timeout on CPU1",
"Cache exception",
};
info->reason = pseudo_reason[0];
info->addr = (void *) frame->mepc;
/* The mcause has been set by the CPU when the panic occured.
* All SoC-level panic will call this function, thus, this register
* lets us know which error was triggered. */
if (frame->mcause == ETS_CACHEERR_INUM) {
/* Panic due to a cache error, multiple cache error are possible,
* assign function print_cache_err_details to our structure's
* details field. As its name states, it will give more details
* about why the error happened. */
info->core = esp_cache_err_get_cpuid();
info->reason = pseudo_reason[PANIC_RSN_CACHEERR];
info->details = print_cache_err_details;
} else if (frame->mcause == ETS_T1_WDT_INUM) {
/* Watchdog interrupt occured, get the core on which it happened
* and update the reason/message accordingly. */
const int core = esp_cache_err_get_cpuid();
info->core = core;
info->exception = PANIC_EXCEPTION_TWDT;
_Static_assert(PANIC_RSN_INTWDT_CPU0 + 1 == PANIC_RSN_INTWDT_CPU1,
"PANIC_RSN_INTWDT_CPU1 must be equal to PANIC_RSN_INTWDT_CPU0 + 1");
info->reason = pseudo_reason[PANIC_RSN_INTWDT_CPU0 + core];
}
}
void panic_arch_fill_info(void *frame, panic_info_t *info)

Wyświetl plik

@ -115,6 +115,8 @@ static void frame_to_panic_info(void *frame, panic_info_t *info, bool pseudo_exc
static void panic_handler(void *frame, bool pseudo_excause)
{
panic_info_t info = { 0 };
/*
* Setup environment and perform necessary architecture/chip specific
* steps here prior to the system panic handler.
@ -178,7 +180,6 @@ static void panic_handler(void *frame, bool pseudo_excause)
}
// Convert architecture exception frame into abstracted panic info
panic_info_t info;
frame_to_panic_info(frame, &info, pseudo_excause);
// Call the system panic handler

Wyświetl plik

@ -115,6 +115,29 @@ static inline void intr_cntrl_ll_enable_int_mask(uint32_t newmask)
RV_SET_CSR(mstatus, old_mstatus & MSTATUS_MIE);
}
/**
* @brief Set the interrupt type given an interrupt number.
*
* @param interrupt_number number of the interrupt
* @param type new type for this interrupt
*/
static inline void intr_cntrl_ll_set_type(int interrupt_number, int_type_t type)
{
esprv_intc_int_set_type(BIT(interrupt_number), type);
}
/**
* @brief Set the interrupt level (priority) given an interrupt number.
*
* @param interrupt_number number of the interrupt
* @param level new level for this interrupt
*/
static inline void intr_cntrl_ll_set_level(int interrupt_number, int level)
{
esprv_intc_int_set_priority(interrupt_number, level);
}
#ifdef __cplusplus
}
#endif

Wyświetl plik

@ -45,8 +45,8 @@ const int_desc_t interrupt_descriptor_table[32] = {
{ 2, INTTP_LEVEL, {INTDESC_NORMAL } }, //21
{ 3, INTTP_EDGE, {INTDESC_NORMAL } }, //22
{ 3, INTTP_LEVEL, {INTDESC_NORMAL } }, //23
{ 4, INTTP_LEVEL, {INTDESC_NORMAL } }, //24
{ 4, INTTP_LEVEL, {INTDESC_NORMAL } }, //25
{ 4, INTTP_LEVEL, {INTDESC_RESVD } }, //24
{ 4, INTTP_LEVEL, {INTDESC_RESVD } }, //25
{ 5, INTTP_LEVEL, {INTDESC_NORMAL } }, //26
{ 3, INTTP_LEVEL, {INTDESC_NORMAL } }, //27
{ 4, INTTP_EDGE, {INTDESC_NORMAL } }, //28

Wyświetl plik

@ -17,6 +17,7 @@
#include <stdbool.h>
#include "hal/interrupt_controller_types.h"
#include "hal/interrupt_controller_ll.h"
#include "soc/soc_caps.h"
#ifdef __cplusplus
extern "C" {
@ -54,6 +55,24 @@ __attribute__((pure)) int interrupt_controller_hal_desc_level(int interrupt_num
*/
__attribute__((pure)) int_desc_flag_t interrupt_controller_hal_desc_flags(int interrupt_number, int cpu_number);
#if CONFIG_IDF_TARGET_ARCH_RISCV
/**
* @brief Set the interrupt level given an interrupt number.
*
* @param interrupt_number number of the interrupt
* @param level new level for this interrupt
*/
void interrupt_controller_hal_set_level(int interrupt_number, int level);
/**
* @brief Set the interrupt type given an interrupt number.
*
* @param interrupt_number number of the interrupt
* @param type new type for this interrupt
*/
void interrupt_controller_hal_set_type(int interrupt_number, int_type_t type);
#endif
/**
* @brief Gets the interrupt type given an interrupt number.
*

Wyświetl plik

@ -31,3 +31,19 @@ int_desc_flag_t interrupt_controller_hal_desc_flags(int interrupt_number, int cp
const int_desc_t *int_desc = interrupt_controller_hal_desc_table();
return(int_desc[interrupt_number].cpuflags[cpu_number]);
}
#if SOC_INTERRUPT_LEVEL_CAN_SET
void interrupt_controller_hal_set_level(int interrupt_number, int level) {
intr_cntrl_ll_set_level(interrupt_number, level);
}
#endif
#if SOC_INTERRUPT_TYPE_CAN_SET
void interrupt_controller_hal_set_type(int interrupt_number, int_type_t type) {
intr_cntrl_ll_set_type(interrupt_number, type);
}
#endif

Wyświetl plik

@ -18,6 +18,8 @@
.equ SAVE_REGS, 32
.equ CONTEXT_SIZE, (SAVE_REGS * 4)
.equ exception_from_panic, xt_unhandled_exception
.equ exception_from_isr, panicHandler
.macro save_regs
addi sp, sp, -CONTEXT_SIZE
@ -119,15 +121,20 @@
_vector_table:
.option push
.option norvc
j _panic_handler /* exception handler, entry 0 */
.rept 31
j _interrupt_handler /* 31 identical entries, all pointing to the interrupt handler */
j _panic_handler /* exception handler, entry 0 */
.rept 23
j _interrupt_handler /* 24 identical entries, all pointing to the interrupt handler */
.endr
j _panic_handler /* Call panic handler for ETS_T1_WDT_INUM interrupt (soc-level panic)*/
j _panic_handler /* Call panic handler for ETS_CACHEERR_INUM interrupt (soc-level panic)*/
.rept 6
j _interrupt_handler /* 6 identical entries, all pointing to the interrupt handler */
.endr
.option pop
.size _vector_table, .-_vector_table
/* Exception handler.*/
.global xt_unhandled_exception
.type _panic_handler, @function
_panic_handler:
addi sp, sp, -RV_STK_FRMSZ /* allocate space on stack to store necessary registers */
@ -170,19 +177,34 @@ _panic_handler:
sw t0, RV_STK_MSTATUS(sp)
csrr t0, mtvec
sw t0, RV_STK_MTVEC(sp)
csrr t0, mcause
sw t0, RV_STK_MCAUSE(sp)
csrr t0, mtval
sw t0, RV_STK_MTVAL(sp)
csrr t0, mhartid
sw t0, RV_STK_MHARTID(sp)
/* call xt_unhandled_exception(sp,cause) */
/* Call exception_from_panic(sp) or exception_from_isr(sp)
* depending on whether we have a pseudo excause or not.
* If mcause's highest bit is 1, then an interrupt called this routine,
* so we have a pseudo excause. Else, it is due to a panic, we don't have
* an excause */
mv a0, sp
csrr a1, mcause
jal zero, xt_unhandled_exception
/* panicHandler never returns */
.size _panic_handler, .-_panic_handler
/* Branches instructions don't accept immediates values, so use t1 to store
* our comparator */
li t0, 0x80000000
bgeu a1, t0, _call_panic_handler
sw a1, RV_STK_MCAUSE(sp)
/* exception_from_panic never returns */
j exception_from_panic
_call_panic_handler:
/* Remove highest bit from mcause (a1) register and save it in the
* structure */
not t0, t0
and a1, a1, t0
sw a1, RV_STK_MCAUSE(sp)
/* exception_from_isr never returns */
j exception_from_isr
.size exception_from_isr, .-exception_from_isr
/* This is the interrupt handler.
* It saves the registers on the stack,