diff --git a/components/idf_test/include/esp32s2/idf_performance_target.h b/components/idf_test/include/esp32s2/idf_performance_target.h index f37c2a9e00..dc8a060f54 100644 --- a/components/idf_test/include/esp32s2/idf_performance_target.h +++ b/components/idf_test/include/esp32s2/idf_performance_target.h @@ -1,16 +1,8 @@ -// Copyright 2020 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +/* + * SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ #pragma once diff --git a/components/idf_test/include/esp32s3/idf_performance_target.h b/components/idf_test/include/esp32s3/idf_performance_target.h index 62f996fc65..b744cda8fa 100644 --- a/components/idf_test/include/esp32s3/idf_performance_target.h +++ b/components/idf_test/include/esp32s3/idf_performance_target.h @@ -1,23 +1,15 @@ -// Copyright 2020 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +/* + * SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ #pragma once #define IDF_PERFORMANCE_MIN_AES_CBC_THROUGHPUT_MBSEC 43 // SHA256 hardware throughput at 240MHz, threshold set lower than worst case -#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 19.8 +#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 90 // esp_sha() time to process 32KB of input data from RAM #define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 1000 #define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 900 diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index c3b19d45b2..0e85efbd60 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -31,6 +31,7 @@ #include "esp_intr_alloc.h" #include "esp_private/periph_ctrl.h" #include "esp_log.h" +#include "esp_attr.h" #include "soc/lldesc.h" #include "esp_heap_caps.h" #include "sys/param.h" @@ -81,6 +82,14 @@ static esp_pm_lock_handle_t s_pm_sleep_lock; static const char *TAG = "esp-aes"; +/* These are static due to: + * * Must be in DMA capable memory, so stack is not a safe place to put them + * * To avoid having to malloc/free them for every DMA operation + */ +static DRAM_ATTR lldesc_t s_stream_in_desc; +static DRAM_ATTR lldesc_t s_stream_out_desc; +static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; +static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; static inline void esp_aes_wait_dma_done(lldesc_t *output) { @@ -289,15 +298,12 @@ cleanup: /* Encrypt/decrypt the input using DMA */ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) { - lldesc_t stream_in_desc, stream_out_desc; lldesc_t *in_desc_head = NULL, *out_desc_head = NULL; lldesc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ lldesc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; size_t lldesc_num; - uint8_t stream_in[16] = {}; unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block unsigned block_bytes = len - stream_bytes; // bytes which are in a full block - unsigned char *non_icache_input = NULL; unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); bool use_intr = false; bool input_needs_realloc = false; @@ -321,7 +327,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, /* Flush cache if input in external ram */ #if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC) if (esp_ptr_external_ram(input)) { - Cache_WriteBack_All(); + Cache_WriteBack_Addr((uint32_t)input, len); } if (esp_ptr_external_ram(output)) { if (((intptr_t)(output) & 0xF) != 0) { @@ -348,7 +354,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, lldesc_num = lldesc_get_required_num(block_bytes); /* Allocate both in and out descriptors to save a malloc/free per function call */ - block_desc = heap_caps_malloc(sizeof(lldesc_t) * lldesc_num * 2, MALLOC_CAP_DMA); + block_desc = heap_caps_calloc(lldesc_num * 2, sizeof(lldesc_t), MALLOC_CAP_DMA); if (block_desc == NULL) { ESP_LOGE(TAG, "Failed to allocate memory"); ret = -1; @@ -367,23 +373,30 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, /* Any leftover bytes which are appended as an additional DMA list */ if (stream_bytes > 0) { - memcpy(stream_in, input + block_bytes, stream_bytes); - lldesc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, 0); - lldesc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, 0); + memset(&s_stream_in_desc, 0, sizeof(lldesc_t)); + memset(&s_stream_out_desc, 0, sizeof(lldesc_t)); + + memset(s_stream_in, 0, AES_BLOCK_BYTES); + memset(s_stream_out, 0, AES_BLOCK_BYTES); + + memcpy(s_stream_in, input + block_bytes, stream_bytes); + + lldesc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, 0); + lldesc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, 0); if (block_bytes > 0) { /* Link with block descriptors*/ - block_in_desc[lldesc_num - 1].empty = (uint32_t)&stream_in_desc; - block_out_desc[lldesc_num - 1].empty = (uint32_t)&stream_out_desc; + block_in_desc[lldesc_num - 1].empty = (uint32_t)&s_stream_in_desc; + block_out_desc[lldesc_num - 1].empty = (uint32_t)&s_stream_out_desc; } - out_desc_tail = &stream_out_desc; + out_desc_tail = &s_stream_out_desc; } // block buffers are sent to DMA first, unless there aren't any - in_desc_head = (block_bytes > 0) ? block_in_desc : &stream_in_desc; - out_desc_head = (block_bytes > 0) ? block_out_desc : &stream_out_desc; + in_desc_head = (block_bytes > 0) ? block_in_desc : &s_stream_in_desc; + out_desc_head = (block_bytes > 0) ? block_out_desc : &s_stream_out_desc; #if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) @@ -412,18 +425,18 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, #if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC) if (block_bytes > 0) { if (esp_ptr_external_ram(output)) { - Cache_Invalidate_DCache_All(); + Cache_Invalidate_Addr((uint32_t)output, block_bytes); } } #endif aes_hal_transform_dma_finish(); if (stream_bytes > 0) { - memcpy(output + block_bytes, stream_out, stream_bytes); + memcpy(output + block_bytes, s_stream_out, stream_bytes); + memcpy(stream_out, s_stream_out, AES_BLOCK_BYTES); } cleanup: - free(non_icache_input); free(block_desc); return ret; } diff --git a/components/mbedtls/port/aes/esp_aes_gcm.c b/components/mbedtls/port/aes/esp_aes_gcm.c index dcbfac4ce9..81a5bfebd6 100644 --- a/components/mbedtls/port/aes/esp_aes_gcm.c +++ b/components/mbedtls/port/aes/esp_aes_gcm.c @@ -490,7 +490,7 @@ int esp_aes_gcm_finish( esp_gcm_context *ctx, /* Due to restrictions in the hardware (e.g. need to do the whole conversion in one go), some combinations of inputs are not supported */ static bool esp_aes_gcm_input_support_hw_accel(size_t length, const unsigned char *aad, size_t aad_len, - const unsigned char *input, unsigned char *output) + const unsigned char *input, unsigned char *output, uint8_t *stream_in) { bool support_hw_accel = true; @@ -505,10 +505,15 @@ static bool esp_aes_gcm_input_support_hw_accel(size_t length, const unsigned cha } else if (!esp_ptr_dma_capable(output) && length > 0) { /* output in non internal DMA memory */ support_hw_accel = false; + } else if (!esp_ptr_dma_capable(stream_in)) { + /* Stream in (and therefor other descriptors and buffers that come from the stack) + in non internal DMA memory */ + support_hw_accel = false; } else if (length == 0) { support_hw_accel = false; } + return support_hw_accel; } @@ -562,7 +567,7 @@ int esp_aes_gcm_crypt_and_tag( esp_gcm_context *ctx, unsigned block_bytes = aad_len - stream_bytes; // bytes which are in a full block /* Due to hardware limition only certain cases are fully supported in HW */ - if (!esp_aes_gcm_input_support_hw_accel(length, aad, aad_len, input, output)) { + if (!esp_aes_gcm_input_support_hw_accel(length, aad, aad_len, input, output, stream_in)) { return esp_aes_gcm_crypt_and_tag_partial_hw(ctx, mode, length, iv, iv_len, aad, aad_len, input, output, tag_len, tag); } diff --git a/components/mbedtls/port/sha/dma/sha.c b/components/mbedtls/port/sha/dma/sha.c index c8f77f00f5..af9f0aa99b 100644 --- a/components/mbedtls/port/sha/dma/sha.c +++ b/components/mbedtls/port/sha/dma/sha.c @@ -31,6 +31,7 @@ #include "esp_log.h" #include "esp_crypto_lock.h" +#include "esp_attr.h" #include "soc/lldesc.h" #include "soc/cache_memory.h" #include "soc/periph_defs.h" @@ -68,6 +69,12 @@ const static char *TAG = "esp-sha"; +/* These are static due to: + * * Must be in DMA capable memory, so stack is not a safe place to put them + * * To avoid having to malloc/free them for every DMA operation + */ +static DRAM_ATTR lldesc_t s_dma_descr_input; +static DRAM_ATTR lldesc_t s_dma_descr_buf; void esp_sha_write_digest_state(esp_sha_type sha_type, void *digest_state) { @@ -277,34 +284,35 @@ static esp_err_t esp_sha_dma_process(esp_sha_type sha_type, const void *input, u const void *buf, uint32_t buf_len, bool is_first_block) { int ret = 0; - lldesc_t dma_descr_input = {}; - lldesc_t dma_descr_buf = {}; lldesc_t *dma_descr_head; size_t num_blks = (ilen + buf_len) / block_length(sha_type); + memset(&s_dma_descr_input, 0, sizeof(lldesc_t)); + memset(&s_dma_descr_buf, 0, sizeof(lldesc_t)); + /* DMA descriptor for Memory to DMA-SHA transfer */ if (ilen) { - dma_descr_input.length = ilen; - dma_descr_input.size = ilen; - dma_descr_input.owner = 1; - dma_descr_input.eof = 1; - dma_descr_input.buf = (uint8_t *)input; - dma_descr_head = &dma_descr_input; + s_dma_descr_input.length = ilen; + s_dma_descr_input.size = ilen; + s_dma_descr_input.owner = 1; + s_dma_descr_input.eof = 1; + s_dma_descr_input.buf = (uint8_t *)input; + dma_descr_head = &s_dma_descr_input; } /* Check after input to overide head if there is any buf*/ if (buf_len) { - dma_descr_buf.length = buf_len; - dma_descr_buf.size = buf_len; - dma_descr_buf.owner = 1; - dma_descr_buf.eof = 1; - dma_descr_buf.buf = (uint8_t *)buf; - dma_descr_head = &dma_descr_buf; + s_dma_descr_buf.length = buf_len; + s_dma_descr_buf.size = buf_len; + s_dma_descr_buf.owner = 1; + s_dma_descr_buf.eof = 1; + s_dma_descr_buf.buf = (uint8_t *)buf; + dma_descr_head = &s_dma_descr_buf; } /* Link DMA lists */ if (buf_len && ilen) { - dma_descr_buf.eof = 0; - dma_descr_buf.empty = (uint32_t)(&dma_descr_input); + s_dma_descr_buf.eof = 0; + s_dma_descr_buf.empty = (uint32_t)(&s_dma_descr_input); } if (esp_sha_dma_start(dma_descr_head) != ESP_OK) { diff --git a/components/mbedtls/test/test_aes.c b/components/mbedtls/test/test_aes.c index d66cef22fd..013431e25d 100644 --- a/components/mbedtls/test/test_aes.c +++ b/components/mbedtls/test/test_aes.c @@ -12,6 +12,9 @@ #include "esp_timer.h" #include "esp_heap_caps.h" #include "test_utils.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/semphr.h" static const uint8_t key_256[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -296,7 +299,7 @@ TEST_CASE("mbedtls CFB-128 AES-256 test", "[aes]") free(decryptedtext); } -TEST_CASE("mbedtls CTR stream test", "[aes]") +static void aes_ctr_stream_test(void) { const unsigned SZ = 100; mbedtls_aes_context ctx; @@ -396,6 +399,11 @@ TEST_CASE("mbedtls CTR stream test", "[aes]") free(decryptedtext); } +TEST_CASE("mbedtls CTR stream test", "[aes]") +{ + aes_ctr_stream_test(); +} + TEST_CASE("mbedtls OFB stream test", "[aes]") { @@ -1464,3 +1472,32 @@ TEST_CASE("mbedtls AES external flash tests", "[aes]") aes_ext_flash_ctr_test(MALLOC_CAP_DMA | MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); } #endif // CONFIG_SPIRAM_USE_MALLOC + + +#if CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK + +RTC_FAST_ATTR uint8_t rtc_stack[4096]; +static xSemaphoreHandle done_sem; + +static void aes_ctr_stream_test_task(void *pv) +{ + aes_ctr_stream_test(); + xSemaphoreGive(done_sem); + vTaskDelete(NULL); +} + +TEST_CASE("mbedtls AES stack in RTC RAM", "[mbedtls]") +{ + done_sem = xSemaphoreCreateBinary(); + static StaticTask_t rtc_task; + memset(rtc_stack, 0, sizeof(rtc_stack)); + + TEST_ASSERT(esp_ptr_in_rtc_dram_fast(rtc_stack)); + + TEST_ASSERT_NOT_NULL(xTaskCreateStatic(aes_ctr_stream_test_task, "aes_ctr_task", sizeof(rtc_stack), NULL, + 3, rtc_stack, &rtc_task)); + TEST_ASSERT_TRUE(xSemaphoreTake(done_sem, 10000 / portTICK_PERIOD_MS)); + vSemaphoreDelete(done_sem); +} + +#endif //CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK diff --git a/components/mbedtls/test/test_mbedtls_sha.c b/components/mbedtls/test/test_mbedtls_sha.c index 005d33c1e4..fba5419b24 100644 --- a/components/mbedtls/test/test_mbedtls_sha.c +++ b/components/mbedtls/test/test_mbedtls_sha.c @@ -518,3 +518,25 @@ TEST_CASE("mbedtls SHA256 PSRAM DMA", "[mbedtls]") } #endif //CONFIG_SPIRAM_USE_MALLOC + +#if CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK + +extern RTC_FAST_ATTR uint8_t rtc_stack[4096]; + +static xSemaphoreHandle done_sem; + +TEST_CASE("mbedtls SHA stack in RTC RAM", "[mbedtls]") +{ + done_sem = xSemaphoreCreateBinary(); + static StaticTask_t rtc_task; + memset(rtc_stack, 0, sizeof(rtc_stack)); + + TEST_ASSERT(esp_ptr_in_rtc_dram_fast(rtc_stack)); + + TEST_ASSERT_NOT_NULL(xTaskCreateStatic(tskRunSHA256Test, "tskRunSHA256Test_task", sizeof(rtc_stack), NULL, + 3, rtc_stack, &rtc_task)); + TEST_ASSERT_TRUE(xSemaphoreTake(done_sem, 10000 / portTICK_PERIOD_MS)); + vSemaphoreDelete(done_sem); +} + +#endif //CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK diff --git a/tools/ci/check_copyright_ignore.txt b/tools/ci/check_copyright_ignore.txt index 35e8efcbaf..4464031adc 100644 --- a/tools/ci/check_copyright_ignore.txt +++ b/tools/ci/check_copyright_ignore.txt @@ -1376,8 +1376,6 @@ components/heap/test_multi_heap_host/test_multi_heap.cpp components/idf_test/include/esp32/idf_performance_target.h components/idf_test/include/esp32c3/idf_performance_target.h components/idf_test/include/esp32h2/idf_performance_target.h -components/idf_test/include/esp32s2/idf_performance_target.h -components/idf_test/include/esp32s3/idf_performance_target.h components/idf_test/include/idf_performance.h components/linux/include/sys/queue.h components/log/esp_log_private.h