From 45badf864fa9ac9106b65a8c24b3964b8f0eb3b5 Mon Sep 17 00:00:00 2001 From: Darian Leung Date: Wed, 14 Dec 2022 21:07:48 +0800 Subject: [PATCH] freertos(IDF): Allow cross-core freeing of task memory when deleting tasks Previously, IDF FreeRTOS would restrict the clean up of task memory (done by vTaskDelete() or the Idle task) to only tasks pinned to the current core or unpinned tasks. This was due to the need to clear the task's coprocessor ownership on the other core (i.e., "_xt_coproc_owner_sa"). But this restriction can be lifted by simply protecting access of "_xt_coproc_owner_sa" with a spinlock. This commit implements a "_xt_coproc_owner_sa_lock" to protect the access of "_xt_coproc_owner_sa", thus vTaskDelete() and prvDeleteTCB() can now delete tasks pinned to the other core so long as that task is not currently running. Note: This fix was copied from the Xtensa port of Amazon SMP FreeRTOS --- .../FreeRTOS-Kernel/portable/xtensa/port.c | 13 ++- .../portable/xtensa/xt_asm_utils.h | 64 ++++++++++- .../portable/xtensa/xtensa_context.S | 31 +++-- .../portable/xtensa/xtensa_vectors.S | 47 ++++++-- components/freertos/FreeRTOS-Kernel/tasks.c | 106 ++++++++---------- 5 files changed, 176 insertions(+), 85 deletions(-) diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c index 430308ac91..b32af0935d 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c @@ -603,14 +603,21 @@ void vPortSetStackWatchpoint( void *pxStackStart ) // -------------------- Co-Processor ----------------------- #if XCHAL_CP_NUM > 0 -void _xt_coproc_release(volatile void *coproc_sa_base); +void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xTargetCoreID); void vPortCleanUpCoprocArea(void *pvTCB) { - /* Get a pointer to the task's coprocessor save area */ UBaseType_t uxCoprocArea; + BaseType_t xTargetCoreID; + + /* Get a pointer to the task's coprocessor save area */ uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pvTCB )->pxDummy8 ); /* Get TCB_t.pxEndOfStack */ uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE); - _xt_coproc_release( ( void * ) uxCoprocArea ); + + /* Get xTargetCoreID from the TCB.xCoreID */ + xTargetCoreID = ( ( StaticTask_t * ) pvTCB )->xDummyCoreID; + + /* If task has live floating point registers somewhere, release them */ + _xt_coproc_release( (void *)uxCoprocArea, xTargetCoreID ); } #endif /* XCHAL_CP_NUM > 0 */ diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h index d4f25b79f7..42a0af6db2 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h @@ -72,4 +72,66 @@ #endif .endm -#endif +/* +-------------------------------------------------------------------------------- + Macro spinlock_take + + This macro will repeatedley attempt to atomically set a spinlock variable + using the s32c1i instruciton. A spinlock is considered free if its value is 0. + + Entry: + - "reg_A/B" as scratch registers + - "lock_var" spinlock variable's symbol + - Interrupts must already be disabled by caller + Exit: + - Spinlock set to current core's ID (PRID) + - "reg_A/B" clobbered +-------------------------------------------------------------------------------- +*/ + +#if portNUM_PROCESSORS > 1 + + .macro spinlock_take reg_A reg_B lock_var + + movi \reg_A, \lock_var /* reg_A = &lock_var */ +.L_spinlock_loop: + movi \reg_B, 0 /* Load spinlock free value (0) into SCOMPARE1 */ + wsr \reg_B, SCOMPARE1 + rsync /* Ensure that SCOMPARE1 is set before s32c1i executes */ + rsr \reg_B, PRID /* Load the current core's ID into reg_B */ + s32c1i \reg_B, \reg_A, 0 /* Attempt *lock_var = reg_B */ + bnez \reg_B, .L_spinlock_loop /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */ + + .endm + +#endif /* portNUM_PROCESSORS > 1 */ + +/* +-------------------------------------------------------------------------------- + Macro spinlock_release + + This macro will release a spinlock variable previously taken by the + spinlock_take macro. + + Entry: + - "reg_A/B" as scratch registers + - "lock_var" spinlock variable's symbol + - Interrupts must already be disabled by caller + Exit: + - "reg_A/B" clobbered +-------------------------------------------------------------------------------- +*/ + +#if portNUM_PROCESSORS > 1 + + .macro spinlock_release reg_A reg_B lock_var + + movi \reg_A, \lock_var /* reg_A = &lock_var */ + movi \reg_B, 0 + s32i \reg_B, \reg_A, 0 /* Release the spinlock (*reg_A = 0) */ + + .endm + +#endif /* portNUM_PROCESSORS > 1 */ + +#endif /* __XT_ASM_UTILS_H */ diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S index df0b2324d2..cde52ea40e 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S @@ -397,17 +397,16 @@ May be called when a thread terminates or completes but does not delete the co-proc save area, to avoid the exception handler having to save the thread's co-proc state before another thread can use it (optimization). -Needs to be called on the processor the thread was running on. Unpinned threads -won't have an entry here because they get pinned as soon they use a coprocessor. - Entry Conditions: A2 = Pointer to base of co-processor state save area. + A3 = Core ID of the task (must be pinned) who's coproc ownership we are + releasing. Exit conditions: None. Obeys ABI conventions per prototype: - void _xt_coproc_release(void * coproc_sa_base) + void _xt_coproc_release(void * coproc_sa_base, BaseType_t xTargetCoreID) *******************************************************************************/ @@ -420,17 +419,21 @@ Obeys ABI conventions per prototype: .align 4 _xt_coproc_release: ENTRY0 /* a2 = base of save area */ + /* a3 = xTargetCoreID */ - getcoreid a5 - movi a3, XCHAL_CP_MAX << 2 - mull a5, a5, a3 - movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */ - add a3, a3, a5 - - addi a4, a3, XCHAL_CP_MAX << 2 /* a4 = top+1 of owner array */ + movi a4, XCHAL_CP_MAX << 2 /* a4 = size of an owner array */ + mull a4, a3, a4 /* a4 = offset to the owner array of the target core */ + movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */ + add a3, a3, a4 /* a3 = base of owner array of the target core */ + addi a4, a3, XCHAL_CP_MAX << 2 /* a4 = top+1 of owner array of the target core */ movi a5, 0 /* a5 = 0 (unowned) */ rsil a6, XCHAL_EXCM_LEVEL /* lock interrupts */ +#if portNUM_PROCESSORS > 1 + /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock + * to ensure thread safe access of _xt_coproc_owner_sa between cores. */ + spinlock_take a7 a8 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ 1: l32i a7, a3, 0 /* a7 = owner at a3 */ bne a2, a7, 2f /* if (coproc_sa_base == owner) */ @@ -438,7 +441,11 @@ _xt_coproc_release: 2: addi a3, a3, 1<<2 /* a3 = next entry in owner array */ bltu a3, a4, 1b /* repeat until end of array */ -3: wsr a6, PS /* restore interrupts */ +#if portNUM_PROCESSORS > 1 + /* Release previously taken spinlock */ + spinlock_release a7 a8 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ + wsr a6, PS /* restore interrupts */ RET0 diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S index a6571c6113..2b42dec014 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S @@ -102,6 +102,7 @@ #include "esp_private/panic_reason.h" #include "sdkconfig.h" #include "soc/soc.h" +#include "xt_asm_utils.h" /* @@ -849,9 +850,25 @@ _xt_coproc_mask: _xt_coproc_owner_sa: .space (XCHAL_CP_MAX * portNUM_PROCESSORS) << 2 +/* Spinlock per core for accessing _xt_coproc_owner_sa array + * + * 0 = Spinlock available + * PRID = Spinlock taken + * + * The lock provides mutual exclusion for accessing the _xt_coproc_owner_sa array. + * The array can be modified by multiple cores simultaneously (via _xt_coproc_exc + * and _xt_coproc_release). Therefore, this spinlock is defined to ensure thread + * safe access of the _xt_coproc_owner_sa array. + */ +#if portNUM_PROCESSORS > 1 + .global _xt_coproc_owner_sa_lock + .type _xt_coproc_owner_sa_lock,@object + .align 16 /* minimize crossing cache boundaries */ +_xt_coproc_owner_sa_lock: + .space 4 +#endif /* portNUM_PROCESSORS > 1 */ + .section .iram1,"ax" - - .align 4 .L_goto_invalid: j .L_xt_coproc_invalid /* not in a thread (invalid) */ @@ -919,17 +936,18 @@ _xt_coproc_exc: or a4, a4, a2 /* a4 = CPENABLE | (1 << n) */ wsr a4, CPENABLE -/* -Keep loading _xt_coproc_owner_sa[n] atomic (=load once, then use that value -everywhere): _xt_coproc_release assumes it works like this in order not to need -locking. -*/ - /* Grab correct xt_coproc_owner_sa for this core */ + /* Grab the xt_coproc_owner_sa owner array for current core */ getcoreid a3 /* a3 = current core ID */ - movi a2, XCHAL_CP_MAX << 2 - mull a2, a2, a3 /* multiply by current processor id */ - movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */ - add a3, a3, a2 /* a3 = owner area needed for this processor */ + movi a2, XCHAL_CP_MAX << 2 /* a2 = size of an owner array */ + mull a2, a2, a3 /* a2 = offset to the owner array of the current core*/ + movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */ + add a3, a3, a2 /* a3 = base of owner array of the current core */ + +#if portNUM_PROCESSORS > 1 + /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock + * to ensure thread safe access of _xt_coproc_owner_sa between cores. */ + spinlock_take a0 a2 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ /* Get old coprocessor owner thread (save area ptr) and assign new one. */ addx4 a3, a5, a3 /* a3 = &_xt_coproc_owner_sa[n] */ @@ -937,6 +955,11 @@ locking. s32i a15, a3, 0 /* _xt_coproc_owner_sa[n] = new */ rsync /* ensure wsr.CPENABLE is complete */ +#if portNUM_PROCESSORS > 1 + /* Release previously taken spinlock */ + spinlock_release a0 a2 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ + /* Only need to context switch if new owner != old owner. */ /* If float is necessary on ISR, we need to remove this check */ /* below, because on restoring from ISR we may have new == old condition used diff --git a/components/freertos/FreeRTOS-Kernel/tasks.c b/components/freertos/FreeRTOS-Kernel/tasks.c index d4a1b48e22..c86cc3b0d4 100644 --- a/components/freertos/FreeRTOS-Kernel/tasks.c +++ b/components/freertos/FreeRTOS-Kernel/tasks.c @@ -1415,18 +1415,8 @@ static void prvAddNewTaskToReadyList( TCB_t * pxNewTCB ) * not return. */ uxTaskNumber++; - /* - * We cannot immediately a task that is - * - Currently running on either core - * - If the task is not currently running but is pinned to the other (due to FPU cleanup) - * Todo: Allow deletion of tasks pinned to other core (IDF-5803) - */ - #if ( configNUM_CORES > 1 ) - xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) || ( pxTCB->xCoreID == !xCurCoreID ) ) ? pdFALSE : pdTRUE; - #else - xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) ) ? pdFALSE : pdTRUE; - #endif /* configNUM_CORES > 1 */ - + /* We cannot free the task immediately if it is currently running (on either core) */ + xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) ) ? pdFALSE : pdTRUE; if( xFreeNow == pdFALSE ) { /* A task is deleting itself. This cannot complete within the @@ -1455,7 +1445,7 @@ static void prvAddNewTaskToReadyList( TCB_t * pxNewTCB ) #if ( configNUM_CORES > 1 ) if( taskIS_CURRENTLY_RUNNING_ON_CORE( pxTCB, !xCurCoreID ) ) { - /* SMP case of deleting a task running on a different core. Same issue + /* SMP case of deleting a task currently running on a different core. Same issue * as a task deleting itself, but we need to send a yield to this task now * before we release xKernelLock. * @@ -4505,71 +4495,73 @@ static void prvInitialiseTaskLists( void ) } /*-----------------------------------------------------------*/ + static void prvCheckTasksWaitingTermination( void ) { /** THIS FUNCTION IS CALLED FROM THE RTOS IDLE TASK **/ #if ( INCLUDE_vTaskDelete == 1 ) { - BaseType_t xListIsEmpty; - BaseType_t core = xPortGetCoreID(); + TCB_t * pxTCB; - /* uxDeletedTasksWaitingCleanUp is used to prevent taskENTER_CRITICAL( &xKernelLock ) + /* uxDeletedTasksWaitingCleanUp is used to prevent taskENTER_CRITICAL() * being called too often in the idle task. */ while( uxDeletedTasksWaitingCleanUp > ( UBaseType_t ) 0U ) { - TCB_t * pxTCB = NULL; - - taskENTER_CRITICAL( &xKernelLock ); - { - xListIsEmpty = listLIST_IS_EMPTY( &xTasksWaitingTermination ); - - if( xListIsEmpty == pdFALSE ) + #if ( configNUM_CORES > 1 ) + pxTCB = NULL; + taskENTER_CRITICAL( &xKernelLock ); { - /* We only want to kill tasks that ran on this core because e.g. _xt_coproc_release needs to - * be called on the core the process is pinned on, if any */ - ListItem_t * target = listGET_HEAD_ENTRY( &xTasksWaitingTermination ); - - for( ; target != listGET_END_MARKER( &xTasksWaitingTermination ); target = listGET_NEXT( target ) ) /*Walk the list */ + /* List may have already been cleared by the other core. Check again */ + if ( listLIST_IS_EMPTY( &xTasksWaitingTermination ) == pdFALSE ) { - TCB_t * tgt_tcb = ( TCB_t * ) listGET_LIST_ITEM_OWNER( target ); - int affinity = tgt_tcb->xCoreID; - - /*Self deleting tasks are added to Termination List before they switch context. Ensure they aren't still currently running */ - if( ( pxCurrentTCB[ core ] == tgt_tcb ) || ( ( configNUM_CORES > 1 ) && ( pxCurrentTCB[ !core ] == tgt_tcb ) ) ) + /* We can't delete a task if it is still running on + * the other core. Keep walking the list until we + * find a task we can free, or until we walk the + * entire list. */ + ListItem_t *xEntry; + for ( xEntry = listGET_HEAD_ENTRY( &xTasksWaitingTermination ); xEntry != listGET_END_MARKER( &xTasksWaitingTermination ); xEntry = listGET_NEXT( xEntry ) ) { - continue; /*Can't free memory of task that is still running */ + if ( !taskIS_CURRENTLY_RUNNING( ( ( TCB_t * ) listGET_LIST_ITEM_OWNER( xEntry ) ) ) ) + { + pxTCB = ( TCB_t * ) listGET_LIST_ITEM_OWNER( xEntry ); + ( void ) uxListRemove( &( pxTCB->xStateListItem ) ); + --uxCurrentNumberOfTasks; + --uxDeletedTasksWaitingCleanUp; + break; + } } - - if( ( affinity == core ) || ( affinity == tskNO_AFFINITY ) ) /*Find first item not pinned to other core */ - { - pxTCB = tgt_tcb; - break; - } - } - - if( pxTCB != NULL ) - { - ( void ) uxListRemove( target ); /*Remove list item from list */ - --uxCurrentNumberOfTasks; - --uxDeletedTasksWaitingCleanUp; } } - } - taskEXIT_CRITICAL( &xKernelLock ); /*Need to call deletion callbacks outside critical section */ + taskEXIT_CRITICAL( &xKernelLock ); + + if ( pxTCB != NULL ) + { + #if ( configNUM_THREAD_LOCAL_STORAGE_POINTERS > 0 ) && ( configTHREAD_LOCAL_STORAGE_DELETE_CALLBACKS == 1 ) + prvDeleteTLS( pxTCB ); + #endif + prvDeleteTCB( pxTCB ); + } + else + { + /* No task found to delete, break out of loop */ + break; + } + #else + taskENTER_CRITICAL( &xKernelLock ); + { + pxTCB = listGET_OWNER_OF_HEAD_ENTRY( ( &xTasksWaitingTermination ) ); /*lint !e9079 void * is used as this macro is used with timers and co-routines too. Alignment is known to be fine as the type of the pointer stored and retrieved is the same. */ + ( void ) uxListRemove( &( pxTCB->xStateListItem ) ); + --uxCurrentNumberOfTasks; + --uxDeletedTasksWaitingCleanUp; + } + taskEXIT_CRITICAL( &xKernelLock ); - if( pxTCB != NULL ) /*Call deletion callbacks and free TCB memory */ - { #if ( configNUM_THREAD_LOCAL_STORAGE_POINTERS > 0 ) && ( configTHREAD_LOCAL_STORAGE_DELETE_CALLBACKS == 1 ) prvDeleteTLS( pxTCB ); #endif prvDeleteTCB( pxTCB ); - } - else - { - mtCOVERAGE_TEST_MARKER(); - break; /*No TCB found that could be freed by this core, break out of loop */ - } + #endif /* configNUM_CORES > 1 */ } } #endif /* INCLUDE_vTaskDelete */