From d18562516268fee7e61715cb4ccdd9dd153e91ea Mon Sep 17 00:00:00 2001 From: Felipe Neves Date: Mon, 7 Oct 2019 17:59:26 -0300 Subject: [PATCH] freertos/xtensa_context: added infrastructure to receive the spill register optimized code --- .../include/freertos/xtensa_context.h | 7 +++ components/freertos/xt_asm_utils.h | 63 +++++++++++++++++++ components/freertos/xtensa_context.S | 5 ++ 3 files changed, 75 insertions(+) create mode 100644 components/freertos/xt_asm_utils.h diff --git a/components/freertos/include/freertos/xtensa_context.h b/components/freertos/include/freertos/xtensa_context.h index 120676dad4..980d79494c 100644 --- a/components/freertos/include/freertos/xtensa_context.h +++ b/components/freertos/include/freertos/xtensa_context.h @@ -129,11 +129,18 @@ STRUCT_FIELD (long, 4, XT_STK_LEND, lend) STRUCT_FIELD (long, 4, XT_STK_LCOUNT, lcount) #endif #ifndef __XTENSA_CALL0_ABI__ +#ifdef CONFIG_FREERTOS_PORT_OPTIMIZE_INTERRUPT_HANDLING +/* Todo prepare the stack frame to receive all windows regisster */ +STRUCT_FIELD (long, 4, XT_STK_TMP0, tmp0) +STRUCT_FIELD (long, 4, XT_STK_TMP1, tmp1) +STRUCT_FIELD (long, 4, XT_STK_TMP2, tmp2) +#else /* Temporary space for saving stuff during window spill */ STRUCT_FIELD (long, 4, XT_STK_TMP0, tmp0) STRUCT_FIELD (long, 4, XT_STK_TMP1, tmp1) STRUCT_FIELD (long, 4, XT_STK_TMP2, tmp2) #endif +#endif #ifdef XT_USE_SWPRI /* Storage for virtual priority mask */ STRUCT_FIELD (long, 4, XT_STK_VPRI, vpri) diff --git a/components/freertos/xt_asm_utils.h b/components/freertos/xt_asm_utils.h new file mode 100644 index 0000000000..b3caad1cb6 --- /dev/null +++ b/components/freertos/xt_asm_utils.h @@ -0,0 +1,63 @@ +#ifndef __XT_ASM_UTILS_H +#define __XT_ASM_UTILS_H + +/* + * SPILL_ALL_WINDOWS + * + * Spills all windowed registers (i.e. registers not visible as + * A0-A15) to their ABI-defined spill regions on the stack. + * + * Unlike the Xtensa HAL implementation, this code requires that the + * EXCM and WOE bit be enabled in PS, and relies on repeated hardware + * exception handling to do the register spills. The trick is to do a + * noop write to the high registers, which the hardware will trap + * (into an overflow exception) in the case where those registers are + * already used by an existing call frame. Then it rotates the window + * and repeats until all but the A0-A3 registers of the original frame + * are guaranteed to be spilled, eventually rotating back around into + * the original frame. Advantages: + * + * - Vastly smaller code size + * + * - More easily maintained if changes are needed to window over/underflow + * exception handling. + * + * - Requires no scratch registers to do its work, so can be used safely in any + * context. + * + * - If the WOE bit is not enabled (for example, in code written for + * the CALL0 ABI), this becomes a silent noop and operates compatbily. + * + * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not + * just a little bit, it's MUCH faster. With a mostly full register + * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill + * registers with this vs. 279 (!) to do it with + * xthal_spill_windows(). + */ + +.macro SPILL_ALL_WINDOWS +#if XCHAL_NUM_AREGS == 64 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 4 +#elif XCHAL_NUM_AREGS == 32 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a4, a4, a4 + rotw 2 +#else +#error Unrecognized XCHAL_NUM_AREGS +#endif +.endm + + +#endif \ No newline at end of file diff --git a/components/freertos/xtensa_context.S b/components/freertos/xtensa_context.S index a8a19be7e4..f5192e25a7 100644 --- a/components/freertos/xtensa_context.S +++ b/components/freertos/xtensa_context.S @@ -51,6 +51,7 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\ #include "xtensa_rtos.h" #include "xtensa_context.h" +#include "xt_asm_utils.h" #ifdef XT_USE_OVLY #include @@ -143,6 +144,9 @@ _xt_context_save: mov a9, a0 /* preserve ret addr */ #endif + #ifdef CONFIG_FREERTOS_PORT_OPTIMIZE_INTERRUPT_HANDLING + SPILL_ALL_WINDOWS /* add the optimized spill reg */ + #else #ifndef __XTENSA_CALL0_ABI__ /* To spill the reg windows, temp. need pre-interrupt stack ptr and a4-15. @@ -175,6 +179,7 @@ _xt_context_save: l32i a13, sp, XT_STK_TMP1 l32i a9, sp, XT_STK_TMP2 #endif + #endif #if XCHAL_EXTRA_SA_SIZE > 0 /*