crypto: allocate all DMA descriptors to DMA capable memory.

These were previously placed on the stack, but the stack could be placed in
RTC RAM which is not DMA capable.
pull/8057/head
Marius Vikhammer 2021-11-17 17:43:22 +08:00 zatwierdzone przez bot
rodzic ba355f924e
commit 7fc7c49e5d
8 zmienionych plików z 131 dodań i 64 usunięć

Wyświetl plik

@ -1,16 +1,8 @@
// Copyright 2020 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

Wyświetl plik

@ -1,23 +1,15 @@
// Copyright 2020 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#define IDF_PERFORMANCE_MIN_AES_CBC_THROUGHPUT_MBSEC 43
// SHA256 hardware throughput at 240MHz, threshold set lower than worst case
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 19.8
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 90
// esp_sha() time to process 32KB of input data from RAM
#define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 1000
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 900

Wyświetl plik

@ -31,6 +31,7 @@
#include "esp_intr_alloc.h"
#include "esp_private/periph_ctrl.h"
#include "esp_log.h"
#include "esp_attr.h"
#include "soc/lldesc.h"
#include "esp_heap_caps.h"
#include "sys/param.h"
@ -81,6 +82,14 @@ static esp_pm_lock_handle_t s_pm_sleep_lock;
static const char *TAG = "esp-aes";
/* These are static due to:
* * Must be in DMA capable memory, so stack is not a safe place to put them
* * To avoid having to malloc/free them for every DMA operation
*/
static DRAM_ATTR lldesc_t s_stream_in_desc;
static DRAM_ATTR lldesc_t s_stream_out_desc;
static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES];
static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES];
static inline void esp_aes_wait_dma_done(lldesc_t *output)
{
@ -289,15 +298,12 @@ cleanup:
/* Encrypt/decrypt the input using DMA */
static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out)
{
lldesc_t stream_in_desc, stream_out_desc;
lldesc_t *in_desc_head = NULL, *out_desc_head = NULL;
lldesc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */
lldesc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL;
size_t lldesc_num;
uint8_t stream_in[16] = {};
unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block
unsigned block_bytes = len - stream_bytes; // bytes which are in a full block
unsigned char *non_icache_input = NULL;
unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0);
bool use_intr = false;
bool input_needs_realloc = false;
@ -321,7 +327,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input,
/* Flush cache if input in external ram */
#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)
if (esp_ptr_external_ram(input)) {
Cache_WriteBack_All();
Cache_WriteBack_Addr((uint32_t)input, len);
}
if (esp_ptr_external_ram(output)) {
if (((intptr_t)(output) & 0xF) != 0) {
@ -348,7 +354,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input,
lldesc_num = lldesc_get_required_num(block_bytes);
/* Allocate both in and out descriptors to save a malloc/free per function call */
block_desc = heap_caps_malloc(sizeof(lldesc_t) * lldesc_num * 2, MALLOC_CAP_DMA);
block_desc = heap_caps_calloc(lldesc_num * 2, sizeof(lldesc_t), MALLOC_CAP_DMA);
if (block_desc == NULL) {
ESP_LOGE(TAG, "Failed to allocate memory");
ret = -1;
@ -367,23 +373,30 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input,
/* Any leftover bytes which are appended as an additional DMA list */
if (stream_bytes > 0) {
memcpy(stream_in, input + block_bytes, stream_bytes);
lldesc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, 0);
lldesc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, 0);
memset(&s_stream_in_desc, 0, sizeof(lldesc_t));
memset(&s_stream_out_desc, 0, sizeof(lldesc_t));
memset(s_stream_in, 0, AES_BLOCK_BYTES);
memset(s_stream_out, 0, AES_BLOCK_BYTES);
memcpy(s_stream_in, input + block_bytes, stream_bytes);
lldesc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, 0);
lldesc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, 0);
if (block_bytes > 0) {
/* Link with block descriptors*/
block_in_desc[lldesc_num - 1].empty = (uint32_t)&stream_in_desc;
block_out_desc[lldesc_num - 1].empty = (uint32_t)&stream_out_desc;
block_in_desc[lldesc_num - 1].empty = (uint32_t)&s_stream_in_desc;
block_out_desc[lldesc_num - 1].empty = (uint32_t)&s_stream_out_desc;
}
out_desc_tail = &stream_out_desc;
out_desc_tail = &s_stream_out_desc;
}
// block buffers are sent to DMA first, unless there aren't any
in_desc_head = (block_bytes > 0) ? block_in_desc : &stream_in_desc;
out_desc_head = (block_bytes > 0) ? block_out_desc : &stream_out_desc;
in_desc_head = (block_bytes > 0) ? block_in_desc : &s_stream_in_desc;
out_desc_head = (block_bytes > 0) ? block_out_desc : &s_stream_out_desc;
#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT)
@ -412,18 +425,18 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input,
#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)
if (block_bytes > 0) {
if (esp_ptr_external_ram(output)) {
Cache_Invalidate_DCache_All();
Cache_Invalidate_Addr((uint32_t)output, block_bytes);
}
}
#endif
aes_hal_transform_dma_finish();
if (stream_bytes > 0) {
memcpy(output + block_bytes, stream_out, stream_bytes);
memcpy(output + block_bytes, s_stream_out, stream_bytes);
memcpy(stream_out, s_stream_out, AES_BLOCK_BYTES);
}
cleanup:
free(non_icache_input);
free(block_desc);
return ret;
}

Wyświetl plik

@ -490,7 +490,7 @@ int esp_aes_gcm_finish( esp_gcm_context *ctx,
/* Due to restrictions in the hardware (e.g. need to do the whole conversion in one go),
some combinations of inputs are not supported */
static bool esp_aes_gcm_input_support_hw_accel(size_t length, const unsigned char *aad, size_t aad_len,
const unsigned char *input, unsigned char *output)
const unsigned char *input, unsigned char *output, uint8_t *stream_in)
{
bool support_hw_accel = true;
@ -505,10 +505,15 @@ static bool esp_aes_gcm_input_support_hw_accel(size_t length, const unsigned cha
} else if (!esp_ptr_dma_capable(output) && length > 0) {
/* output in non internal DMA memory */
support_hw_accel = false;
} else if (!esp_ptr_dma_capable(stream_in)) {
/* Stream in (and therefor other descriptors and buffers that come from the stack)
in non internal DMA memory */
support_hw_accel = false;
} else if (length == 0) {
support_hw_accel = false;
}
return support_hw_accel;
}
@ -562,7 +567,7 @@ int esp_aes_gcm_crypt_and_tag( esp_gcm_context *ctx,
unsigned block_bytes = aad_len - stream_bytes; // bytes which are in a full block
/* Due to hardware limition only certain cases are fully supported in HW */
if (!esp_aes_gcm_input_support_hw_accel(length, aad, aad_len, input, output)) {
if (!esp_aes_gcm_input_support_hw_accel(length, aad, aad_len, input, output, stream_in)) {
return esp_aes_gcm_crypt_and_tag_partial_hw(ctx, mode, length, iv, iv_len, aad, aad_len, input, output, tag_len, tag);
}

Wyświetl plik

@ -31,6 +31,7 @@
#include "esp_log.h"
#include "esp_crypto_lock.h"
#include "esp_attr.h"
#include "soc/lldesc.h"
#include "soc/cache_memory.h"
#include "soc/periph_defs.h"
@ -68,6 +69,12 @@
const static char *TAG = "esp-sha";
/* These are static due to:
* * Must be in DMA capable memory, so stack is not a safe place to put them
* * To avoid having to malloc/free them for every DMA operation
*/
static DRAM_ATTR lldesc_t s_dma_descr_input;
static DRAM_ATTR lldesc_t s_dma_descr_buf;
void esp_sha_write_digest_state(esp_sha_type sha_type, void *digest_state)
{
@ -277,34 +284,35 @@ static esp_err_t esp_sha_dma_process(esp_sha_type sha_type, const void *input, u
const void *buf, uint32_t buf_len, bool is_first_block)
{
int ret = 0;
lldesc_t dma_descr_input = {};
lldesc_t dma_descr_buf = {};
lldesc_t *dma_descr_head;
size_t num_blks = (ilen + buf_len) / block_length(sha_type);
memset(&s_dma_descr_input, 0, sizeof(lldesc_t));
memset(&s_dma_descr_buf, 0, sizeof(lldesc_t));
/* DMA descriptor for Memory to DMA-SHA transfer */
if (ilen) {
dma_descr_input.length = ilen;
dma_descr_input.size = ilen;
dma_descr_input.owner = 1;
dma_descr_input.eof = 1;
dma_descr_input.buf = (uint8_t *)input;
dma_descr_head = &dma_descr_input;
s_dma_descr_input.length = ilen;
s_dma_descr_input.size = ilen;
s_dma_descr_input.owner = 1;
s_dma_descr_input.eof = 1;
s_dma_descr_input.buf = (uint8_t *)input;
dma_descr_head = &s_dma_descr_input;
}
/* Check after input to overide head if there is any buf*/
if (buf_len) {
dma_descr_buf.length = buf_len;
dma_descr_buf.size = buf_len;
dma_descr_buf.owner = 1;
dma_descr_buf.eof = 1;
dma_descr_buf.buf = (uint8_t *)buf;
dma_descr_head = &dma_descr_buf;
s_dma_descr_buf.length = buf_len;
s_dma_descr_buf.size = buf_len;
s_dma_descr_buf.owner = 1;
s_dma_descr_buf.eof = 1;
s_dma_descr_buf.buf = (uint8_t *)buf;
dma_descr_head = &s_dma_descr_buf;
}
/* Link DMA lists */
if (buf_len && ilen) {
dma_descr_buf.eof = 0;
dma_descr_buf.empty = (uint32_t)(&dma_descr_input);
s_dma_descr_buf.eof = 0;
s_dma_descr_buf.empty = (uint32_t)(&s_dma_descr_input);
}
if (esp_sha_dma_start(dma_descr_head) != ESP_OK) {

Wyświetl plik

@ -12,6 +12,9 @@
#include "esp_timer.h"
#include "esp_heap_caps.h"
#include "test_utils.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
static const uint8_t key_256[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@ -296,7 +299,7 @@ TEST_CASE("mbedtls CFB-128 AES-256 test", "[aes]")
free(decryptedtext);
}
TEST_CASE("mbedtls CTR stream test", "[aes]")
static void aes_ctr_stream_test(void)
{
const unsigned SZ = 100;
mbedtls_aes_context ctx;
@ -396,6 +399,11 @@ TEST_CASE("mbedtls CTR stream test", "[aes]")
free(decryptedtext);
}
TEST_CASE("mbedtls CTR stream test", "[aes]")
{
aes_ctr_stream_test();
}
TEST_CASE("mbedtls OFB stream test", "[aes]")
{
@ -1464,3 +1472,32 @@ TEST_CASE("mbedtls AES external flash tests", "[aes]")
aes_ext_flash_ctr_test(MALLOC_CAP_DMA | MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
}
#endif // CONFIG_SPIRAM_USE_MALLOC
#if CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK
RTC_FAST_ATTR uint8_t rtc_stack[4096];
static xSemaphoreHandle done_sem;
static void aes_ctr_stream_test_task(void *pv)
{
aes_ctr_stream_test();
xSemaphoreGive(done_sem);
vTaskDelete(NULL);
}
TEST_CASE("mbedtls AES stack in RTC RAM", "[mbedtls]")
{
done_sem = xSemaphoreCreateBinary();
static StaticTask_t rtc_task;
memset(rtc_stack, 0, sizeof(rtc_stack));
TEST_ASSERT(esp_ptr_in_rtc_dram_fast(rtc_stack));
TEST_ASSERT_NOT_NULL(xTaskCreateStatic(aes_ctr_stream_test_task, "aes_ctr_task", sizeof(rtc_stack), NULL,
3, rtc_stack, &rtc_task));
TEST_ASSERT_TRUE(xSemaphoreTake(done_sem, 10000 / portTICK_PERIOD_MS));
vSemaphoreDelete(done_sem);
}
#endif //CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK

Wyświetl plik

@ -518,3 +518,25 @@ TEST_CASE("mbedtls SHA256 PSRAM DMA", "[mbedtls]")
}
#endif //CONFIG_SPIRAM_USE_MALLOC
#if CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK
extern RTC_FAST_ATTR uint8_t rtc_stack[4096];
static xSemaphoreHandle done_sem;
TEST_CASE("mbedtls SHA stack in RTC RAM", "[mbedtls]")
{
done_sem = xSemaphoreCreateBinary();
static StaticTask_t rtc_task;
memset(rtc_stack, 0, sizeof(rtc_stack));
TEST_ASSERT(esp_ptr_in_rtc_dram_fast(rtc_stack));
TEST_ASSERT_NOT_NULL(xTaskCreateStatic(tskRunSHA256Test, "tskRunSHA256Test_task", sizeof(rtc_stack), NULL,
3, rtc_stack, &rtc_task));
TEST_ASSERT_TRUE(xSemaphoreTake(done_sem, 10000 / portTICK_PERIOD_MS));
vSemaphoreDelete(done_sem);
}
#endif //CONFIG_ESP_SYSTEM_RTC_FAST_MEM_AS_HEAP_DEPCHECK

Wyświetl plik

@ -1376,8 +1376,6 @@ components/heap/test_multi_heap_host/test_multi_heap.cpp
components/idf_test/include/esp32/idf_performance_target.h
components/idf_test/include/esp32c3/idf_performance_target.h
components/idf_test/include/esp32h2/idf_performance_target.h
components/idf_test/include/esp32s2/idf_performance_target.h
components/idf_test/include/esp32s3/idf_performance_target.h
components/idf_test/include/idf_performance.h
components/linux/include/sys/queue.h
components/log/esp_log_private.h