From e25cda2c400b28362436b3ae01b969386892ab8e Mon Sep 17 00:00:00 2001 From: Omar Chebib Date: Wed, 13 Jul 2022 17:27:37 +0800 Subject: [PATCH 1/2] Task WDT: Interuptee task stack is now used for backtracing, regardless of the CPU core For RISC-V and Xtensa targets, in case a panic needs to happen when Task WDT is triggered (ESP_TASK_WDT_PANIC), the interruptee's stack is now used for printing the backtrace. Abort after Task Watchdog is triggered can happen on APP CPU (second core). --- components/esp_system/Kconfig | 1 + components/esp_system/crosscore_int.c | 16 +- .../include/esp_private/crosscore_int.h | 14 +- components/esp_system/panic.c | 4 + components/esp_system/port/panic_handler.c | 3 +- components/esp_system/task_wdt.c | 254 +++++++++++++++--- components/espcoredump/src/core_dump_common.c | 14 +- .../src/port/riscv/core_dump_port.c | 12 +- components/freertos/Kconfig | 2 +- components/freertos/linker.lf | 6 +- 10 files changed, 278 insertions(+), 48 deletions(-) diff --git a/components/esp_system/Kconfig b/components/esp_system/Kconfig index 68c16ad280..bff5a3e38c 100644 --- a/components/esp_system/Kconfig +++ b/components/esp_system/Kconfig @@ -394,6 +394,7 @@ menu "ESP System Settings" config ESP_TASK_WDT bool "Initialize Task Watchdog Timer on startup" default y + select FREERTOS_ENABLE_TASK_SNAPSHOT help The Task Watchdog Timer can be used to make sure individual tasks are still running. Enabling this option will cause the Task Watchdog Timer to be diff --git a/components/esp_system/crosscore_int.c b/components/esp_system/crosscore_int.c index 34c43d90a1..4aae2a2540 100644 --- a/components/esp_system/crosscore_int.c +++ b/components/esp_system/crosscore_int.c @@ -31,6 +31,7 @@ #if !CONFIG_IDF_TARGET_ESP32C3 && !CONFIG_IDF_TARGET_ESP32H2 && !IDF_TARGET_ESP32C2 #define REASON_PRINT_BACKTRACE BIT(2) +#define REASON_TWDT_ABORT BIT(4) #endif static portMUX_TYPE reason_spinlock = portMUX_INITIALIZER_UNLOCKED; @@ -90,11 +91,18 @@ static void IRAM_ATTR esp_crosscore_isr(void *arg) { update_breakpoints(); } #endif // !CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME -#if !CONFIG_IDF_TARGET_ESP32C3 && !CONFIG_IDF_TARGET_ESP32H2 && !CONFIG_IDF_TARGET_ESP32C2 // IDF-2986 +#if CONFIG_IDF_TARGET_ARCH_XTENSA // IDF-2986 if (my_reason_val & REASON_PRINT_BACKTRACE) { esp_backtrace_print(100); } -#endif + + if (my_reason_val & REASON_TWDT_ABORT) { + extern void task_wdt_timeout_abort_xtensa(bool); + /* Called from a crosscore interrupt, thus, we are not the core that received + * the TWDT interrupt, call the function with `false` as a parameter. */ + task_wdt_timeout_abort_xtensa(false); + } +#endif // CONFIG_IDF_TARGET_ARCH_XTENSA } //Initialize the crosscore interrupt on this core. Call this once @@ -162,4 +170,8 @@ void IRAM_ATTR esp_crosscore_int_send_print_backtrace(int core_id) { esp_crosscore_int_send(core_id, REASON_PRINT_BACKTRACE); } + +void IRAM_ATTR esp_crosscore_int_send_twdt_abort(int core_id) { + esp_crosscore_int_send(core_id, REASON_TWDT_ABORT); +} #endif diff --git a/components/esp_system/include/esp_private/crosscore_int.h b/components/esp_system/include/esp_private/crosscore_int.h index 2046a05f7b..0acdbb8159 100644 --- a/components/esp_system/include/esp_private/crosscore_int.h +++ b/components/esp_system/include/esp_private/crosscore_int.h @@ -54,12 +54,24 @@ void esp_crosscore_int_send_gdb_call(int core_id); /** * Send an interrupt to a CPU indicating it should print its current backtrace * - * This is use internally by the Task Watchdog to dump the backtrace of the + * This is used internally by the Task Watchdog to dump the backtrace of the * opposite core and should not be called from application code. * * @param core_id Core that should print its backtrace */ void esp_crosscore_int_send_print_backtrace(int core_id); + +/** + * Send an interrupt to a CPU indicating it call `task_wdt_timeout_abort_xtensa`. + * This will make the CPU abort, using the interrupted task frame. + * + * This is used internally by the Task Watchdog when it should abort after a task, + * running on the other core than the one running the TWDT ISR, failed to reset + * its timer. + * + * @param core_id Core that should abort + */ +void esp_crosscore_int_send_twdt_abort(int core_id); #endif #ifdef __cplusplus diff --git a/components/esp_system/panic.c b/components/esp_system/panic.c index ab001e2c08..3903796a79 100644 --- a/components/esp_system/panic.c +++ b/components/esp_system/panic.c @@ -348,6 +348,10 @@ void esp_panic_handler(panic_info_t *info) } else { disable_all_wdts(); s_dumping_core = true; + /* No matter if we come here from abort or an exception, this variable must be reset. + * Else, any exception/error occuring during the current panic handler would considered + * an abort. */ + g_panic_abort = false; #if CONFIG_ESP_COREDUMP_ENABLE_TO_FLASH esp_core_dump_to_flash(info); #endif diff --git a/components/esp_system/port/panic_handler.c b/components/esp_system/port/panic_handler.c index 96f81a7ab0..07782b6fe5 100644 --- a/components/esp_system/port/panic_handler.c +++ b/components/esp_system/port/panic_handler.c @@ -64,7 +64,8 @@ static void print_state_for_core(const void *f, int core) * Don't print it on abort to reduce clutter. * On other architectures, register values need to be known for backtracing. */ -#if defined(__XTENSA__) && defined(XCHAL_HAVE_WINDOWED) +#if (CONFIG_IDF_TARGET_ARCH_XTENSA && defined(XCHAL_HAVE_WINDOWED)) || \ + (CONFIG_IDF_TARGET_ARCH_RISCV && CONFIG_ESP_SYSTEM_USE_EH_FRAME) if (!g_panic_abort) { #else if (true) { diff --git a/components/esp_system/task_wdt.c b/components/esp_system/task_wdt.c index bf99cea0ed..58d1247bf0 100644 --- a/components/esp_system/task_wdt.c +++ b/components/esp_system/task_wdt.c @@ -23,6 +23,28 @@ #include "esp_private/periph_ctrl.h" #include "esp_private/system_internal.h" #include "esp_private/crosscore_int.h" +#include "freertos/task_snapshot.h" + +#if CONFIG_ESP_SYSTEM_USE_EH_FRAME +#include "esp_private/eh_frame_parser.h" +#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME + +#if CONFIG_IDF_TARGET_ARCH_RISCV && !CONFIG_ESP_SYSTEM_USE_EH_FRAME +/* Function used to print all the registers pointed by the given frame .*/ +extern void panic_print_registers(const void *frame, int core); +#endif // CONFIG_IDF_TARGET_ARCH_RISCV && !CONFIG_ESP_SYSTEM_USE_EH_FRAME + +/* We will use this function in order to simulate an `abort()` occurring in + * a different context than the one it's called from. */ +extern void xt_unhandled_exception(void *frame); + +/* Global flag set to make the `panic` mechanism think a real `abort()` was + * called. This is used in the ISR handler, in case we have to panic when + * a task doesn't feed its timer. */ +extern bool g_panic_abort; + +/* Global flag marking whether the current ISR is a Task Watchdog ISR. */ +bool g_twdt_isr = false; // --------------------------------------------------- Definitions ----------------------------------------------------- @@ -271,15 +293,15 @@ static void unsubscribe_idle(uint32_t core_mask) while (core_mask != 0) { if (core_mask & 0x1) { #if CONFIG_FREERTOS_SMP - assert(core_user_handles[core_num]); - esp_deregister_freertos_idle_hook_for_cpu(idle_hook_cb, core_num); - ESP_ERROR_CHECK(esp_task_wdt_delete_user(core_user_handles[core_num])); - core_user_handles[core_num] = NULL; + assert(core_user_handles[core_num]); + esp_deregister_freertos_idle_hook_for_cpu(idle_hook_cb, core_num); + ESP_ERROR_CHECK(esp_task_wdt_delete_user(core_user_handles[core_num])); + core_user_handles[core_num] = NULL; #else - TaskHandle_t idle_task_handle = xTaskGetIdleTaskHandleForCPU(core_num); - assert(idle_task_handle); - esp_deregister_freertos_idle_hook_for_cpu(idle_hook_cb, core_num); - ESP_ERROR_CHECK(esp_task_wdt_delete(idle_task_handle)); + TaskHandle_t idle_task_handle = xTaskGetIdleTaskHandleForCPU(core_num); + assert(idle_task_handle); + esp_deregister_freertos_idle_hook_for_cpu(idle_hook_cb, core_num); + ESP_ERROR_CHECK(esp_task_wdt_delete(idle_task_handle)); #endif } core_mask >>= 1; @@ -299,14 +321,14 @@ static void subscribe_idle(uint32_t core_mask) while (core_mask != 0) { if (core_mask & 0x1) { #if CONFIG_FREERTOS_SMP - snprintf(core_user_names[core_num], CORE_USER_NAME_LEN, "CPU %d", (uint8_t)core_num); - ESP_ERROR_CHECK(esp_task_wdt_add_user((const char *)core_user_names[core_num], &core_user_handles[core_num])); - ESP_ERROR_CHECK(esp_register_freertos_idle_hook_for_cpu(idle_hook_cb, core_num)); + snprintf(core_user_names[core_num], CORE_USER_NAME_LEN, "CPU %d", (uint8_t)core_num); + ESP_ERROR_CHECK(esp_task_wdt_add_user((const char *)core_user_names[core_num], &core_user_handles[core_num])); + ESP_ERROR_CHECK(esp_register_freertos_idle_hook_for_cpu(idle_hook_cb, core_num)); #else - TaskHandle_t idle_task_handle = xTaskGetIdleTaskHandleForCPU(core_num); - assert(idle_task_handle); - ESP_ERROR_CHECK(esp_task_wdt_add(idle_task_handle)); - ESP_ERROR_CHECK(esp_register_freertos_idle_hook_for_cpu(idle_hook_cb, core_num)); + TaskHandle_t idle_task_handle = xTaskGetIdleTaskHandleForCPU(core_num); + assert(idle_task_handle); + ESP_ERROR_CHECK(esp_task_wdt_add(idle_task_handle)); + ESP_ERROR_CHECK(esp_register_freertos_idle_hook_for_cpu(idle_hook_cb, core_num)); #endif } core_mask >>= 1; @@ -314,6 +336,168 @@ static void subscribe_idle(uint32_t core_mask) } } + +/** + * The behavior of the Task Watchdog depends on the configuration from the `menuconfig`. + * It can be summarized as follow, regardless of the target: + * +------------------------+--------------------------------+--------------------------+ + * | \ Panic configuration | | | + * | +------------------+ | Panic Enabled | Panic Disabled | + * | TWDT triggered on \ | | | + * +------------------------+--------------------------------+--------------------------+ + * | | - Current core backtrace | - Current core backtrace | + * | Both Cores | - Crosscore TWDT abort | - Crosscore backtrace | + * | | - Wait for other core to abort | | + * +------------------------+--------------------------------+--------------------------+ + * | Other Core | - Crosscore TWDT abort | - Crosscore backtrace | + * +------------------------+--------------------------------+--------------------------+ + * | Current Core | - Abort from current CPU | - Current core backtrace | + * +------------------------+--------------------------------+--------------------------+ + * + */ + +#if CONFIG_IDF_TARGET_ARCH_RISCV + +static void task_wdt_timeout_handling(int cores_fail, bool panic) +{ + /* For RISC-V, make sure the cores that fail is only composed of core 0. */ + assert(cores_fail == BIT(0)); + + const int current_core = 0; + TaskSnapshot_t snapshot = { 0 }; + BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot); + + if (p_twdt_obj->panic) { + assert(ret == pdTRUE); + ESP_EARLY_LOGE(TAG, "Aborting."); + esp_reset_reason_set_hint(ESP_RST_TASK_WDT); + /** + * We cannot simply use `abort` here because the `panic` handler would + * interpret it as if the task watchdog ISR aborted and so, print this + * current ISR backtrace/context. We want to trick the `panic` handler + * to think the task itself is aborting. + * To do so, we need to get the interruptee's top of the stack. It contains + * its own context, saved when the interrupt occurred. + * We must also set the global flag that states that an abort occurred + * (and not a panic) + **/ + g_panic_abort = true; + g_twdt_isr = true; + void *frame = (void *) snapshot.pxTopOfStack; +#if CONFIG_ESP_SYSTEM_USE_EH_FRAME + ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core); +#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME + xt_unhandled_exception(frame); + } else { + /* Targets based on a RISC-V CPU cannot perform backtracing that easily. + * We have two options here: + * - Perform backtracing at runtime. + * - Let IDF monitor do the backtracing for us. Used during panic already. + * This could be configurable, choosing one or the other depending on + * CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option. + * + * In both cases, this takes time, and we are in an ISR, we must + * exit this handler as fast as possible, then we will simply print + * the interruptee's registers. + */ + if (ret == pdTRUE) { + void *frame = (void *) snapshot.pxTopOfStack; +#if CONFIG_ESP_SYSTEM_USE_EH_FRAME + ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core); + esp_eh_frame_print_backtrace(frame); +#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME + ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) registers", current_core); + panic_print_registers(frame, current_core); + esp_rom_printf("\r\n"); +#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME + } + } +} + +#else // CONFIG_IDF_TARGET_ARCH_RISCV + +/** + * Function simulating an abort coming from the interrupted task of the current + * core. + * It is called either by the function right below or by a crosscore interrupt, + * in the case where the other core (than the main one) has to abort because one + * of his tasks didn't reset the TWDT on time. + */ +void task_wdt_timeout_abort_xtensa(bool current_core) +{ + TaskSnapshot_t snapshot = { 0 }; + BaseType_t ret = pdTRUE; + + ESP_EARLY_LOGE(TAG, "Aborting."); + esp_reset_reason_set_hint(ESP_RST_TASK_WDT); + ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot); + assert(ret == pdTRUE); + g_panic_abort = true; + /* For Xtensa, we should set this flag as late as possible, as this function may + * be called after a crosscore interrupt. Indeed, a higher interrupt may occur + * after calling the crosscore interrupt, if its handler fails, this flag + * shall not be set. + * This flag will tell the coredump component (if activated) that yes, we are in + * an ISR context, but it is intended, it is not because an ISR encountered an + * exception. If we don't set such flag, later tested by coredump, the later would + * switch the execution frame/context we are giving it to the interrupt stack. + * For details about this behavior in the TODO task: IDF-5694 + */ + g_twdt_isr = true; + void *frame = (void *) snapshot.pxTopOfStack; + if (current_core) { + ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", xPortGetCoreID()); + } else { + ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", xPortGetCoreID()); + } + xt_unhandled_exception(frame); +} + +static void task_wdt_timeout_handling(int cores_fail, bool panic) +{ + const int current_core = xPortGetCoreID(); + + if (panic) { +#if !CONFIG_FREERTOS_UNICORE + const int other_core = !current_core; + + if ((cores_fail & BIT(0)) && (cores_fail & BIT(1))) { + /* In the case where both CPUs have failing tasks, print the current CPU backtrace and then let the + * other core fail. */ + ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core); + esp_backtrace_print(100); + /* TODO: the interrupt we send should have the highest priority */ + esp_crosscore_int_send_twdt_abort(other_core); + /* We are going to abort, on the other core, we have nothing to + * do anymore here, just wait until we crash */ + while (1) {} + } else if (cores_fail & BIT(other_core)) { + /* If only the other core is failing, we can tell it to abort. */ + esp_crosscore_int_send_twdt_abort(other_core); + while (1) {} + } +#endif // !CONFIG_FREERTOS_UNICORE + /* Current core is failing, abort right now */ + task_wdt_timeout_abort_xtensa(true); + } else { + /* Print backtrace of the core that failed to reset the watchdog */ + if (cores_fail & BIT(current_core)) { + ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core); + esp_backtrace_print(100); + } +#if !CONFIG_FREERTOS_UNICORE + const int other_core = !current_core; + if (cores_fail & BIT(other_core)) { + ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", other_core); + esp_crosscore_int_send_print_backtrace(other_core); + } +#endif // !CONFIG_FREERTOS_UNICORE + } +} + +#endif // CONFIG_IDF_TARGET_ARCH_RISCV + + /** * @brief TWDT timeout ISR function * @@ -342,30 +526,48 @@ static void task_wdt_isr(void *arg) */ ESP_EARLY_LOGE(TAG, "Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:"); twdt_entry_t *entry; + /* Keep a bitmap of CPU cores having tasks that have not reset TWDT. + * Bit 0 represents core 0, bit 1 represents core 1, and so on. */ + int cpus_fail = 0; + bool panic = p_twdt_obj->panic; + SLIST_FOREACH(entry, &p_twdt_obj->entries_slist, slist_entry) { if (!entry->has_reset) { if (entry->task_handle) { #if CONFIG_FREERTOS_SMP #if configNUM_CORES > 1 // Log the task's name and its affinity - ESP_EARLY_LOGE(TAG, " - %s (0x%x)", pcTaskGetName(entry->task_handle), vTaskCoreAffinityGet(entry->task_handle)); + const UBaseType_t affinity = vTaskCoreAffinityGet(entry->task_handle); + ESP_EARLY_LOGE(TAG, " - %s (0x%x)", pcTaskGetName(entry->task_handle), affinity); + cpus_fail |= affinity; #else // configNUM_CORES > 1 // Log the task's name ESP_EARLY_LOGE(TAG, " - %s", pcTaskGetName(entry->task_handle)); + cpus_fail |= BIT(0); #endif // configNUM_CORES > 1 #else // CONFIG_FREERTOS_SMP BaseType_t task_affinity = xTaskGetAffinity(entry->task_handle); const char *cpu; if (task_affinity == 0) { cpu = DRAM_STR("CPU 0"); + cpus_fail |= BIT(0); } else if (task_affinity == 1) { cpu = DRAM_STR("CPU 1"); + cpus_fail |= BIT(1); } else { cpu = DRAM_STR("CPU 0/1"); + cpus_fail |= BIT(1) | BIT(0); } ESP_EARLY_LOGE(TAG, " - %s (%s)", pcTaskGetName(entry->task_handle), cpu); #endif // CONFIG_FREERTOS_SMP } else { + /* User entry, we cannot predict on which core it is scheduled to run, + * so let's mark all cores as failing */ +#if configNUM_CORES > 1 + cpus_fail = BIT(1) | BIT(0); +#else // configNUM_CORES > 1 + cpus_fail = BIT(0); +#endif // configNUM_CORES > 1 ESP_EARLY_LOGE(TAG, " - %s", entry->user_name); } } @@ -378,24 +580,10 @@ static void task_wdt_isr(void *arg) // Run user ISR handler esp_task_wdt_isr_user_handler(); + // Trigger configured timeout behavior (e.g., panic or print backtrace) - if (p_twdt_obj->panic) { - ESP_EARLY_LOGE(TAG, "Aborting."); - esp_reset_reason_set_hint(ESP_RST_TASK_WDT); - abort(); - } else { // Print -#if !CONFIG_IDF_TARGET_ESP32C3 && !CONFIG_IDF_TARGET_ESP32H2 && !CONFIG_IDF_TARGET_ESP32C2 // TODO: ESP32-C3 IDF-2986 - int current_core = xPortGetCoreID(); - // Print backtrace of current core - ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core); - esp_backtrace_print(100); -#if !CONFIG_FREERTOS_UNICORE - // Print backtrace of other core - ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", !current_core); - esp_crosscore_int_send_print_backtrace(!current_core); -#endif -#endif - } + assert(cpus_fail != 0); + task_wdt_timeout_handling(cpus_fail, panic); } // ----------------------------------------------------- Public -------------------------------------------------------- diff --git a/components/espcoredump/src/core_dump_common.c b/components/espcoredump/src/core_dump_common.c index 766afe5090..2ea3502701 100644 --- a/components/espcoredump/src/core_dump_common.c +++ b/components/espcoredump/src/core_dump_common.c @@ -201,7 +201,7 @@ bool esp_core_dump_get_task_snapshot(void *handle, core_dump_task_header_t *task task->stack_start = (uint32_t)rtos_snapshot.pxTopOfStack; task->stack_end = (uint32_t)rtos_snapshot.pxEndOfStack; - if (!xPortInterruptedFromISRContext() && handle == esp_core_dump_get_current_task_handle()) { + if (!esp_core_dump_in_isr_context() && handle == esp_core_dump_get_current_task_handle()) { // Set correct stack top for current task; only modify if we came from the task, // and not an ISR that crashed. task->stack_start = (uint32_t) s_exc_frame; @@ -213,7 +213,7 @@ bool esp_core_dump_get_task_snapshot(void *handle, core_dump_task_header_t *task if (handle == esp_core_dump_get_current_task_handle()) { ESP_COREDUMP_LOG_PROCESS("Crashed task %x", handle); esp_core_dump_port_set_crashed_tcb((uint32_t)handle); - if (xPortInterruptedFromISRContext()) { + if (esp_core_dump_in_isr_context()) { esp_core_dump_switch_task_stack_to_isr(task, interrupted_stack); } } @@ -286,7 +286,15 @@ inline bool esp_core_dump_tcb_addr_is_sane(uint32_t addr) inline bool esp_core_dump_in_isr_context(void) { - return xPortInterruptedFromISRContext(); + /* This function will be used to check whether a panic occurred in an ISR. + * In that case, the execution frame must be switch to the interrupt stack. + * However, in case where the task watchdog ISR calls the panic handler, + * `xPortInterruptedFromISRContext` returns true, BUT, we don't want to + * switch the frame to the ISR context. Thus, check that we are not + * coming from TWDT ISR. This should be refactored. + * TODO: IDF-5694. */ + extern bool g_twdt_isr; + return xPortInterruptedFromISRContext() && !g_twdt_isr; } inline core_dump_task_handle_t esp_core_dump_get_current_task_handle() diff --git a/components/espcoredump/src/port/riscv/core_dump_port.c b/components/espcoredump/src/port/riscv/core_dump_port.c index f5370c2d01..19d9017705 100644 --- a/components/espcoredump/src/port/riscv/core_dump_port.c +++ b/components/espcoredump/src/port/riscv/core_dump_port.c @@ -83,7 +83,7 @@ typedef union { /** * The following structure represents the NOTE section in the coredump. - * Its type must be PR_STATUS as it contains the regsiters values and the + * Its type must be PR_STATUS as it contains the registers value and the * program status. * As our coredump will be used with GDB, we only need to fill the info * it needs. We are going to use the macros taken from GDB's elf32-riscv.c @@ -106,7 +106,7 @@ typedef union { #define PRSTATUS_OFFSET_PR_REG 72 #define ELF_GREGSET_T_SIZE 128 -/* We can determine the padding thank to the previous macros */ +/* We can determine the padding thanks to the previous macros */ #define PRSTATUS_SIG_PADDING (PRSTATUS_OFFSET_PR_CURSIG) #define PRSTATUS_PID_PADDING (PRSTATUS_OFFSET_PR_PID - PRSTATUS_OFFSET_PR_CURSIG - sizeof(uint16_t)) #define PRSTATUS_REG_PADDING (PRSTATUS_OFFSET_PR_REG - PRSTATUS_OFFSET_PR_PID - sizeof(uint32_t)) @@ -172,7 +172,7 @@ inline uint16_t esp_core_dump_get_arch_id() } /** - * Reset fake tasks' stack counter. This lets use reuse the previously allocated + * Reset fake tasks' stack counter. This lets us reuse the previously allocated * fake stacks. */ void esp_core_dump_reset_fake_stacks(void) @@ -358,9 +358,9 @@ void esp_core_dump_port_set_crashed_tcb(uint32_t handle) { } /** - * Function returning the extra info to be written in the dedicated section in - * the core file. - * info must not be NULL, it will be affected to the extra info data. + * Function returning the extra info to be written to the dedicated section in + * the core file. *info must not be NULL, it will be assigned to the extra info + * data. * The size, in bytes, of the data pointed by info is returned. */ uint32_t esp_core_dump_get_extra_info(void **info) diff --git a/components/freertos/Kconfig b/components/freertos/Kconfig index eb55bf2b48..7927e5cf75 100644 --- a/components/freertos/Kconfig +++ b/components/freertos/Kconfig @@ -463,7 +463,7 @@ menu "FreeRTOS" default y help When enabled, the functions related to snapshots, such as vTaskGetSnapshot or uxTaskGetSnapshotAll, are - compiled and linked. Task snapshots are primarily used by GDB Stub and Core dump. + compiled and linked. Task snapshots are used by Task Watchdog (TWDT), GDB Stub and Core dump. endmenu # Port diff --git a/components/freertos/linker.lf b/components/freertos/linker.lf index df6cdefd96..58fda61f49 100644 --- a/components/freertos/linker.lf +++ b/components/freertos/linker.lf @@ -4,7 +4,11 @@ archive: libfreertos.a entries: * (noflash_text) if FREERTOS_PLACE_SNAPSHOT_FUNS_INTO_FLASH = y: - task_snapshot (default) + # vTaskGetSnapshot is omitted on purpose: as it is used to by the Task Watchdog (TWDT) interrupt + # handler, we want to always keep it in IRAM + tasks: pxTaskGetNext (default) + tasks: uxTaskGetSnapshotAll (default) + tasks: pxGetNextTaskList (default) if FREERTOS_PLACE_FUNCTIONS_INTO_FLASH = y: port: pxPortInitialiseStack (default) port: xPortStartScheduler (default) From 2f7bae7a6edb1d57e0452eeba12ad353a21f59f4 Mon Sep 17 00:00:00 2001 From: Omar Chebib Date: Fri, 15 Jul 2022 19:38:23 +0800 Subject: [PATCH 2/2] Task WDT: add a panic test case for to test panic on both CPU cores --- .../esp_system/test/test_reset_reason.c | 2 +- .../system/panic/main/test_panic_main.c | 42 ++++++++- tools/test_apps/system/panic/pytest_panic.py | 89 ++++++++++++++++--- 3 files changed, 119 insertions(+), 14 deletions(-) diff --git a/components/esp_system/test/test_reset_reason.c b/components/esp_system/test/test_reset_reason.c index 793e5e8fc9..9347b8b013 100644 --- a/components/esp_system/test/test_reset_reason.c +++ b/components/esp_system/test/test_reset_reason.c @@ -248,7 +248,7 @@ static void check_reset_reason_task_wdt(void) } TEST_CASE_MULTIPLE_STAGES("reset reason ESP_RST_TASK_WDT after task watchdog", - "[reset_reason][reset=abort,"RESET"]", + "[reset_reason][reset="RESET"]", do_task_wdt, check_reset_reason_task_wdt); diff --git a/tools/test_apps/system/panic/main/test_panic_main.c b/tools/test_apps/system/panic/main/test_panic_main.c index 85e3d5bfbd..2b4c4512ef 100644 --- a/tools/test_apps/system/panic/main/test_panic_main.c +++ b/tools/test_apps/system/panic/main/test_panic_main.c @@ -16,7 +16,11 @@ static const char* get_test_name(void); static void test_abort(void); static void test_abort_cache_disabled(void); static void test_int_wdt(void); -static void test_task_wdt(void); +static void test_task_wdt_cpu0(void); +#if !CONFIG_FREERTOS_UNICORE +static void test_task_wdt_cpu1(void); +static void test_task_wdt_both_cpus(void); +#endif static void test_storeprohibited(void); static void test_cache_error(void); static void test_int_wdt_cache_disabled(void); @@ -50,7 +54,11 @@ void app_main(void) HANDLE_TEST(test_abort); HANDLE_TEST(test_abort_cache_disabled); HANDLE_TEST(test_int_wdt); - HANDLE_TEST(test_task_wdt); + HANDLE_TEST(test_task_wdt_cpu0); +#if !CONFIG_FREERTOS_UNICORE + HANDLE_TEST(test_task_wdt_cpu1); + HANDLE_TEST(test_task_wdt_both_cpus); +#endif HANDLE_TEST(test_storeprohibited); HANDLE_TEST(test_cache_error); HANDLE_TEST(test_int_wdt_cache_disabled); @@ -87,13 +95,41 @@ static void test_int_wdt(void) } } -static void test_task_wdt(void) +static void test_task_wdt_cpu0(void) { while (true) { ; } } +#if !CONFIG_FREERTOS_UNICORE +static void infinite_loop(void* arg) { + (void) arg; + while(1) { + ; + } +} + +static void test_task_wdt_cpu1(void) +{ + xTaskCreatePinnedToCore(infinite_loop, "Infinite loop", 1024, NULL, 1, NULL, 1); + while (true) { + vTaskDelay(1); + } +} + +static void test_task_wdt_both_cpus(void) +{ + xTaskCreatePinnedToCore(infinite_loop, "Infinite loop", 1024, NULL, 4, NULL, 1); + /* Give some time to the task on CPU 1 to be scheduled */ + vTaskDelay(1); + xTaskCreatePinnedToCore(infinite_loop, "Infinite loop", 1024, NULL, 4, NULL, 0); + while (true) { + ; + } +} +#endif + static void __attribute__((no_sanitize_undefined)) test_storeprohibited(void) { *(int*) 0x1 = 0; diff --git a/tools/test_apps/system/panic/pytest_panic.py b/tools/test_apps/system/panic/pytest_panic.py index f53a59a1f1..fe11577e6a 100644 --- a/tools/test_apps/system/panic/pytest_panic.py +++ b/tools/test_apps/system/panic/pytest_panic.py @@ -18,6 +18,16 @@ CONFIGS = [ pytest.param('panic', marks=[pytest.mark.esp32, pytest.mark.esp32s2]), ] +# An ESP32-only config, used for tests requiring two cores +CONFIGS_ESP32 = [ + pytest.param('coredump_flash_bin_crc', marks=[pytest.mark.esp32]), + pytest.param('coredump_flash_elf_sha', marks=[pytest.mark.esp32]), + pytest.param('coredump_uart_bin_crc', marks=[pytest.mark.esp32]), + pytest.param('coredump_uart_elf_crc', marks=[pytest.mark.esp32]), + pytest.param('gdbstub', marks=[pytest.mark.esp32]), + pytest.param('panic', marks=[pytest.mark.esp32]), +] + def get_default_backtrace(config: str) -> List[str]: return [config, 'app_main', 'main_task', 'vPortTaskWrapper'] @@ -28,11 +38,16 @@ def common_test(dut: PanicTestDut, config: str, expected_backtrace: Optional[Lis dut.expect_exact('Entering gdb stub now.') dut.start_gdb() frames = dut.gdb_backtrace() - if not dut.match_backtrace(frames, expected_backtrace): - raise AssertionError( - 'Unexpected backtrace in test {}:\n{}'.format(config, pformat(frames)) - ) - dut.revert_log_level() + # Make sure frames and the expected_backtrace have the same size, else, an exception will occur + if expected_backtrace is not None: + size = min(len(frames), len(expected_backtrace)) + frames = frames[0:size] + expected_backtrace = expected_backtrace[0:size] + if not dut.match_backtrace(frames, expected_backtrace): + raise AssertionError( + 'Unexpected backtrace in test {}:\n{}'.format(config, pformat(frames)) + ) + dut.revert_log_level() return if 'uart' in config: @@ -47,14 +62,14 @@ def common_test(dut: PanicTestDut, config: str, expected_backtrace: Optional[Lis @pytest.mark.parametrize('config', CONFIGS, indirect=True) @pytest.mark.generic -def test_task_wdt(dut: PanicTestDut, config: str, test_func_name: str) -> None: +def test_task_wdt_cpu0(dut: PanicTestDut, config: str, test_func_name: str) -> None: dut.expect_test_func_name(test_func_name) dut.expect_exact( 'Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:' ) dut.expect_exact('CPU 0: main') - dut.expect(r'abort\(\) was called at PC [0-9xa-f]+ on core 0') dut.expect_none('register dump:') + dut.expect_exact('Print CPU 0 (current core) backtrace') dut.expect_backtrace() dut.expect_elf_sha256() dut.expect_none('Guru Meditation') @@ -64,9 +79,63 @@ def test_task_wdt(dut: PanicTestDut, config: str, test_func_name: str) -> None: dut, config, expected_backtrace=[ - # Backtrace interrupted when abort is called, IDF-842 - 'panic_abort', - 'esp_system_abort', + 'test_task_wdt_cpu0', + 'app_main' + ], + ) + else: + common_test(dut, config) + + +@pytest.mark.parametrize('config', CONFIGS_ESP32, indirect=True) +@pytest.mark.generic +def test_task_wdt_cpu1(dut: PanicTestDut, config: str, test_func_name: str) -> None: + dut.expect_test_func_name(test_func_name) + dut.expect_exact( + 'Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:' + ) + dut.expect_exact('CPU 1: Infinite loop') + dut.expect_none('register dump:') + dut.expect_exact('Print CPU 1 backtrace') + dut.expect_backtrace() + dut.expect_elf_sha256() + dut.expect_none('Guru Meditation') + + if config == 'gdbstub': + common_test( + dut, + config, + expected_backtrace=[ + 'infinite_loop' + ], + ) + else: + common_test(dut, config) + + +@pytest.mark.parametrize('config', CONFIGS_ESP32, indirect=True) +@pytest.mark.generic +def test_task_wdt_both_cpus(dut: PanicTestDut, config: str, test_func_name: str) -> None: + dut.expect_test_func_name(test_func_name) + dut.expect_exact( + 'Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:' + ) + dut.expect_exact('CPU 0: Infinite loop') + dut.expect_exact('CPU 1: Infinite loop') + dut.expect_none('register dump:') + dut.expect_exact('Print CPU 0 (current core) backtrace') + dut.expect_backtrace() + dut.expect_exact('Print CPU 1 backtrace') + dut.expect_backtrace() + dut.expect_elf_sha256() + dut.expect_none('Guru Meditation') + + if config == 'gdbstub': + common_test( + dut, + config, + expected_backtrace=[ + 'infinite_loop' ], ) else: