diff --git a/components/esp32/ld/esp32.common.ld b/components/esp32/ld/esp32.common.ld index 79262bb9ac..c6a92356c8 100644 --- a/components/esp32/ld/esp32.common.ld +++ b/components/esp32/ld/esp32.common.ld @@ -194,6 +194,13 @@ SECTIONS *(.gnu.linkonce.lit4.*) _lit4_end = ABSOLUTE(.); . = ALIGN(4); + _thread_local_start = ABSOLUTE(.); + *(.tdata) + *(.tdata.*) + *(.tbss) + *(.tbss.*) + _thread_local_end = ABSOLUTE(.); + . = ALIGN(4); } >drom0_0_seg .flash.text : diff --git a/components/freertos/include/freertos/task.h b/components/freertos/include/freertos/task.h index ab45f1c43e..5af62c69c6 100644 --- a/components/freertos/include/freertos/task.h +++ b/components/freertos/include/freertos/task.h @@ -380,6 +380,9 @@ is used in assert() statements. */ * @return pdPASS if the task was successfully created and added to a ready * list, otherwise an error code defined in the file projdefs.h * + * @note If program uses thread local variables (ones specified with "__thread" keyword) + * then storage for them will be allocated on the task's stack. + * * Example usage: * @code{c} * // Task to be created. @@ -530,6 +533,9 @@ is used in assert() statements. */ * are NULL then the task will not be created and * errCOULD_NOT_ALLOCATE_REQUIRED_MEMORY is returned. * + * @note If program uses thread local variables (ones specified with "__thread" keyword) + * then storage for them will be allocated on the task's stack. + * * Example usage: * @code{c} * diff --git a/components/freertos/port.c b/components/freertos/port.c index 5e2c3c8f8b..f3b514f8d7 100644 --- a/components/freertos/port.c +++ b/components/freertos/port.c @@ -92,6 +92,7 @@ */ #include +#include #include #include "xtensa_rtos.h" @@ -146,9 +147,24 @@ StackType_t *pxPortInitialiseStack( StackType_t *pxTopOfStack, TaskFunction_t px #if XCHAL_CP_NUM > 0 uint32_t *p; #endif + uint32_t *threadptr; + void *task_thread_local_start; + extern int _thread_local_start, _thread_local_end, _rodata_start; + // TODO: check that TLS area fits the stack + uint32_t thread_local_sz = (uint8_t *)&_thread_local_end - (uint8_t *)&_thread_local_start; - /* Create interrupt stack frame aligned to 16 byte boundary */ - sp = (StackType_t *) (((UBaseType_t)(pxTopOfStack + 1) - XT_CP_SIZE - XT_STK_FRMSZ) & ~0xf); + thread_local_sz = ALIGNUP(0x10, thread_local_sz); + + /* Initialize task's stack so that we have the following structure at the top: + + ----LOW ADDRESSES ----------------------------------------HIGH ADDRESSES---------- + task stack | interrupt stack frame | thread local vars | co-processor save area | + ---------------------------------------------------------------------------------- + | | + SP pxTopOfStack + + All parts are aligned to 16 byte boundary. */ + sp = (StackType_t *) (((UBaseType_t)(pxTopOfStack + 1) - XT_CP_SIZE - thread_local_sz - XT_STK_FRMSZ) & ~0xf); /* Clear the entire frame (do not use memset() because we don't depend on C library) */ for (tp = sp; tp <= pxTopOfStack; ++tp) @@ -178,6 +194,14 @@ StackType_t *pxPortInitialiseStack( StackType_t *pxTopOfStack, TaskFunction_t px frame->vpri = 0xFFFFFFFF; #endif + /* Init threadptr reg and TLS vars */ + task_thread_local_start = (void *)(((uint32_t)pxTopOfStack - XT_CP_SIZE - thread_local_sz) & ~0xf); + memcpy(task_thread_local_start, &_thread_local_start, thread_local_sz); + threadptr = (uint32_t *)(sp + XT_STK_EXTRA); + /* shift threadptr by the offset of _thread_local_start from DROM start; + need to take into account extra 16 bytes offset */ + *threadptr = (uint32_t)task_thread_local_start - ((uint32_t)&_thread_local_start - (uint32_t)&_rodata_start) - 0x10; + #if XCHAL_CP_NUM > 0 /* Init the coprocessor save area (see xtensa_context.h) */ /* No access to TCB here, so derive indirectly. Stack growth is top to bottom. diff --git a/components/freertos/test/test_thread_local.c b/components/freertos/test/test_thread_local.c new file mode 100644 index 0000000000..d5f782b43c --- /dev/null +++ b/components/freertos/test/test_thread_local.c @@ -0,0 +1,105 @@ +/* + Test for thread local storage support. +*/ + +#include +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "unity.h" +#include "sdkconfig.h" + +static __thread int tl_test_var1; +static __thread uint8_t tl_test_var2 = 55; +static __thread uint16_t tl_test_var3 = 44; +static __thread uint8_t tl_test_arr_var[10]; +static __thread struct test_tls_var { + int f32; + uint8_t f8; + uint16_t f16; + uint8_t farr[10]; +} tl_test_struct_var; + +static void task_test_tls(void *arg) +{ + bool *running = (bool *)arg; + uint32_t tp = (uint32_t)-1; + int test_var1_old = 0; + uint8_t test_var2_old = 0; + uint16_t test_var3_old = 0; + int f32_old = 0; + uint8_t f8_old = 0; + uint16_t f16_old = 0; + + asm volatile ("rur.threadptr %0":"=r"(tp)); + for (int i = 0; i < 5; i++) { + printf("Task[%x]: var = 0x%x 0x%x\n", tp, tl_test_var1, tl_test_var2); + if (i == 0) { + TEST_ASSERT_EQUAL(0, tl_test_var1); + TEST_ASSERT_EQUAL(55, tl_test_var2); + TEST_ASSERT_EQUAL(44, tl_test_var3); + for (int k = 0; k < sizeof(tl_test_arr_var); k++) { + TEST_ASSERT_EQUAL(0, tl_test_arr_var[k]); + } + TEST_ASSERT_EQUAL(0, tl_test_struct_var.f32); + TEST_ASSERT_EQUAL(0, tl_test_struct_var.f8); + TEST_ASSERT_EQUAL(0, tl_test_struct_var.f16); + for (int k = 0; k < sizeof(tl_test_struct_var.farr); k++) { + TEST_ASSERT_EQUAL(0, tl_test_struct_var.farr[k]); + } + } else { + TEST_ASSERT_EQUAL(test_var1_old+1, tl_test_var1); + TEST_ASSERT_EQUAL(test_var2_old+1, tl_test_var2); + TEST_ASSERT_EQUAL(test_var3_old+1, tl_test_var3); + for (int k = 0; k < sizeof(tl_test_arr_var); k++) { + TEST_ASSERT_EQUAL(i-1, tl_test_arr_var[k]); + } + TEST_ASSERT_EQUAL(f32_old+1, tl_test_struct_var.f32); + TEST_ASSERT_EQUAL(f8_old+1, tl_test_struct_var.f8); + TEST_ASSERT_EQUAL(f16_old+1, tl_test_struct_var.f16); + for (int k = 0; k < sizeof(tl_test_struct_var.farr); k++) { + TEST_ASSERT_EQUAL(i-1, tl_test_struct_var.farr[k]); + } + } + test_var1_old = tl_test_var1; + test_var2_old = tl_test_var2; + test_var3_old = tl_test_var3; + f32_old = tl_test_struct_var.f32; + f8_old = tl_test_struct_var.f8; + f16_old = tl_test_struct_var.f16; + tl_test_var1++; + tl_test_var2++; + tl_test_var3++; + memset(tl_test_arr_var, i, sizeof(tl_test_arr_var)); + tl_test_struct_var.f32++; + tl_test_struct_var.f8++; + tl_test_struct_var.f16++; + memset(tl_test_struct_var.farr, i, sizeof(tl_test_struct_var.farr)); + vTaskDelay(10); + } + + if (running) { + *running = false; + vTaskDelete(NULL); + } +} + +TEST_CASE("TLS test", "[freertos]") +{ + static StackType_t s_stack[2048]; + StaticTask_t s_task; + bool running[2] = {true, true}; +#if CONFIG_FREERTOS_UNICORE == 0 + int other_core = 1; +#else + int other_core = 0; +#endif + + xTaskCreatePinnedToCore((TaskFunction_t)&task_test_tls, "task_test_tls", 3072, &running[0], 5, NULL, 0); + xTaskCreateStaticPinnedToCore((TaskFunction_t)&task_test_tls, "task_test_tls", sizeof(s_stack), &running[1], + 5, s_stack, &s_task, other_core); + while (running[0] || running[1]) { + vTaskDelay(10); + } +} diff --git a/docs/api-guides/index.rst b/docs/api-guides/index.rst index 662ab0e791..fcacd187d4 100644 --- a/docs/api-guides/index.rst +++ b/docs/api-guides/index.rst @@ -10,6 +10,7 @@ API Guides ESP32 Core Dump Flash Encryption <../security/flash-encryption> FreeRTOS SMP Changes + Thread Local Storage High Level Interrupts JTAG Debugging Partition Tables diff --git a/docs/api-guides/thread-local-storage.rst b/docs/api-guides/thread-local-storage.rst new file mode 100644 index 0000000000..3c380db4b4 --- /dev/null +++ b/docs/api-guides/thread-local-storage.rst @@ -0,0 +1,65 @@ +Thread Local Storage +==================== + +Overview +-------- + +Thread-local storage (TLS) is a mechanism by which variables are allocated such that there +is one instance of the variable per extant thread. ESP-IDF provides three ways to make use +of such variables: + + - :ref:`freertos-native`: ESP-IDF FreeRTOS native API. + - :ref:`pthread-api`: ESP-IDF's pthread API. + - :ref:`c11-std`: C11 standard introduces special keyword to declare variables as thread local. + +.. _freertos-native: + +FreeRTOS Native API +-------------------- + +The ESP-IDF FreeRTOS provides the following API to manage thread local variables: + + - :cpp:func:`vTaskSetThreadLocalStoragePointer` + - :cpp:func:`pvTaskGetThreadLocalStoragePointer` + - :cpp:func:`vTaskSetThreadLocalStoragePointerAndDelCallback` + +In this case maximum number of variables that can be allocated is limited by +``configNUM_THREAD_LOCAL_STORAGE_POINTERS`` macro. Variables are kept in the task control block (TCB) +and accessed by their index. Note that index 0 is reserved for ESP-IDF internal uses. +Using that API user can allocate thread local variables of an arbitrary size and assign them to any number of tasks. +Different tasks can have different sets of TLS variables. +If size of the variable is more then 4 bytes then user is responsible for allocating/deallocating memory for it. +Variable's deallocation is initiated by FreeRTOS when task is deleted, but user must provide function (callback) +to do proper cleanup. + +.. _pthread-api: + +Pthread API +---------------- + +The ESP-IDF provides the following pthread API to manage thtread local variables: + + - :cpp:func:`pthread_key_create` + - :cpp:func:`pthread_key_delete` + - :cpp:func:`pthread_getspecific` + - :cpp:func:`pthread_setspecific` + +This API has all benefits of the one above, but eliminates some its limits. The number of variables is +limited only by size of available memory on the heap. +Due to the dynamic nature this API introduces additional performance overhead compared to the native one. + +.. _c11-std: + +C11 Standard +------------ + +The ESP-IDF FreeRTOS supports thread local variables according to C11 standard (ones specified with ``__thread`` keyword). +For details on this GCC feature please see https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/Thread-Local.html#Thread-Local. +Storage for that kind of variables is allocated on the task's stack. +Note that area for all such variables in the program will be allocated on the stack of +every task in the system even if that task does not use such variables at all. For example +ESP-IDF system tasks (like ``ipc``, ``timer`` tasks etc.) will also have that extra stack space allocated. +So this feature should be used with care. There is a tradeoff: C11 thread local variables are quite handy +to use in programming and can be accessed using just a few Xtensa instructions, but this benefit goes +with the cost of additional stack usage for all tasks in the system. +Due to static nature of variables allocation all tasks in the system have the same sets of C11 thread local variables.