From 595c3fe6a27bcae8a2924d6b11934b22075db128 Mon Sep 17 00:00:00 2001 From: morris Date: Tue, 15 Aug 2023 17:05:59 +0800 Subject: [PATCH] fix(async_memcpy): destination alignment check against cache line size On ESP32P4, becasue we need to invalidate the destination buffer, if the buffer is not aligned to cache line, then it might break other date structure, randomly. --- .../esp_hw_support/dma/async_memcpy_gdma.c | 16 ++++++++++---- .../test_apps/dma/main/test_async_memcpy.c | 22 ++++++++++++++----- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/components/esp_hw_support/dma/async_memcpy_gdma.c b/components/esp_hw_support/dma/async_memcpy_gdma.c index 7376f8fd3b..2538ece16c 100644 --- a/components/esp_hw_support/dma/async_memcpy_gdma.c +++ b/components/esp_hw_support/dma/async_memcpy_gdma.c @@ -321,17 +321,25 @@ static async_memcpy_transaction_t *try_pop_trans_from_idle_queue(async_memcpy_gd static bool check_buffer_aligned(async_memcpy_gdma_context_t *mcp_gdma, void *src, void *dst, size_t n) { bool valid = true; + uint32_t align_mask = 0; if (esp_ptr_external_ram(dst)) { if (mcp_gdma->psram_trans_align) { - valid = valid && (((intptr_t)dst & (mcp_gdma->psram_trans_align - 1)) == 0); - valid = valid && ((n & (mcp_gdma->psram_trans_align - 1)) == 0); + align_mask = mcp_gdma->psram_trans_align - 1; } } else { if (mcp_gdma->sram_trans_align) { - valid = valid && (((intptr_t)dst & (mcp_gdma->sram_trans_align - 1)) == 0); - valid = valid && ((n & (mcp_gdma->sram_trans_align - 1)) == 0); + align_mask = mcp_gdma->sram_trans_align - 1; } } +#if CONFIG_IDF_TARGET_ESP32P4 + uint32_t data_cache_line_mask = cache_hal_get_cache_line_size(CACHE_TYPE_DATA) - 1; + if (data_cache_line_mask > align_mask) { + align_mask = data_cache_line_mask; + } +#endif + // destination address must be cache line aligned + valid = valid && (((uint32_t)dst & align_mask) == 0); + valid = valid && ((n & align_mask) == 0); return valid; } diff --git a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c index affd3c6279..fa750b2012 100644 --- a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c +++ b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c @@ -25,6 +25,12 @@ #define ALIGN_UP(addr, align) (((addr) + (align)-1) & ~((align)-1)) #define ALIGN_DOWN(size, align) ((size) & ~((align) - 1)) +#if CONFIG_IDF_TARGET_ESP32P4 +#define TEST_MEMCPY_DST_BASE_ALIGN 64 +#else +#define TEST_MEMCPY_DST_BASE_ALIGN 4 +#endif + typedef struct { uint32_t seed; size_t buffer_size; @@ -68,8 +74,9 @@ static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_contex TEST_ASSERT_NOT_NULL_MESSAGE(dst_buf, "allocate destination buffer failed"); // adding extra offset from_addr = src_buf + test_context->offset; - to_addr = dst_buf + test_context->offset; + to_addr = dst_buf; copy_size -= test_context->offset; + copy_size &= ~(test_context->align - 1); printf("...to copy size %zu Bytes, from @%p, to @%p\r\n", copy_size, from_addr, to_addr); printf("fill src buffer with random data\r\n"); @@ -106,7 +113,7 @@ TEST_CASE("memory copy the same buffer with different content", "[async mcp]") async_memcpy_handle_t driver = NULL; TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint8_t *sbuf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - uint8_t *dbuf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); + uint8_t *dbuf = heap_caps_aligned_alloc(TEST_MEMCPY_DST_BASE_ALIGN, 256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); for (int j = 0; j < 20; j++) { TEST_ESP_OK(esp_async_memcpy(driver, dbuf, sbuf, 256, NULL, NULL)); vTaskDelay(pdMS_TO_TICKS(10)); @@ -128,7 +135,7 @@ static void test_memory_copy_one_by_one(async_memcpy_handle_t driver) { uint32_t test_buffer_len[] = {256, 512, 1024, 2048, 4096, 5011}; memcpy_testbench_context_t test_context = { - .align = 4, + .align = TEST_MEMCPY_DST_BASE_ALIGN, }; for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { @@ -194,7 +201,8 @@ TEST_CASE("memory copy done callback", "[async mcp]") TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint8_t *src_buf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - uint8_t *dst_buf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); + // destination address should aligned to data cache line + uint8_t *dst_buf = heap_caps_aligned_alloc(TEST_MEMCPY_DST_BASE_ALIGN, 256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); SemaphoreHandle_t sem = xSemaphoreCreateBinary(); TEST_ESP_OK(esp_async_memcpy(driver, dst_buf, src_buf, 256, test_async_memcpy_cb_v1, sem)); @@ -212,8 +220,10 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]") TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint32_t test_buffer_len[] = {512, 1024, 2048, 4096, 5011}; - memcpy_testbench_context_t test_context[] = { - {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, + memcpy_testbench_context_t test_context[5] = { + [0 ... 4] = { + .align = TEST_MEMCPY_DST_BASE_ALIGN, + } }; // Aligned case