gdma: set transfer ability

pull/7307/head
morris 2021-06-23 14:10:07 +08:00
rodzic d31b1f79e6
commit d9819bc7ae
10 zmienionych plików z 364 dodań i 100 usunięć

Wyświetl plik

@ -74,6 +74,8 @@ struct gdma_channel_t {
intr_handle_t intr; // per-channel interrupt handle
gdma_channel_direction_t direction; // channel direction
int periph_id; // Peripheral instance ID, indicates which peripheral is connected to this GDMA channel
size_t sram_alignment; // alignment for memory in SRAM
size_t psram_alignment; // alignment for memory in PSRAM
esp_err_t (*del)(gdma_channel_t *channel); // channel deletion function, it's polymorphic, see `gdma_del_tx_channel` or `gdma_del_rx_channel`
};
@ -271,6 +273,67 @@ err:
return ret;
}
esp_err_t gdma_set_transfer_ability(gdma_channel_handle_t dma_chan, const gdma_transfer_ability_t *ability)
{
esp_err_t ret = ESP_OK;
gdma_pair_t *pair = NULL;
gdma_group_t *group = NULL;
bool en_burst = true;
ESP_GOTO_ON_FALSE(dma_chan, ESP_ERR_INVALID_ARG, err, TAG, "invalid argument");
pair = dma_chan->pair;
group = pair->group;
size_t sram_alignment = ability->sram_trans_align;
size_t psram_alignment = ability->psram_trans_align;
// alignment should be 2^n
ESP_GOTO_ON_FALSE((sram_alignment & (sram_alignment - 1)) == 0, ESP_ERR_INVALID_ARG, err, TAG, "invalid sram alignment: %zu", sram_alignment);
#if SOC_GDMA_SUPPORT_PSRAM
int block_size_index = 0;
switch (psram_alignment) {
case 64: // 64 Bytes alignment
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_64B;
break;
case 32: // 32 Bytes alignment
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_32B;
break;
case 16: // 16 Bytes alignment
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_16B;
break;
case 0: // no alignment is requirement
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_16B;
psram_alignment = SOC_GDMA_PSRAM_MIN_ALIGN; // fall back to minimal alignment
break;
default:
ESP_GOTO_ON_FALSE(false, ESP_ERR_INVALID_ARG, err, TAG, "invalid psram alignment: %zu", psram_alignment);
break;
}
#endif // #if SOC_GDMA_SUPPORT_PSRAM
if (dma_chan->direction == GDMA_CHANNEL_DIRECTION_TX) {
// TX channel can always enable burst mode, no matter data alignment
gdma_ll_tx_enable_data_burst(group->hal.dev, pair->pair_id, true);
gdma_ll_tx_enable_descriptor_burst(group->hal.dev, pair->pair_id, true);
#if SOC_GDMA_SUPPORT_PSRAM
gdma_ll_tx_set_block_size_psram(group->hal.dev, pair->pair_id, block_size_index);
#endif // #if SOC_GDMA_SUPPORT_PSRAM
} else {
// RX channel burst mode depends on specific data alignment
en_burst = sram_alignment >= 4;
gdma_ll_rx_enable_data_burst(group->hal.dev, pair->pair_id, en_burst);
gdma_ll_rx_enable_descriptor_burst(group->hal.dev, pair->pair_id, en_burst);
#if SOC_GDMA_SUPPORT_PSRAM
gdma_ll_rx_set_block_size_psram(group->hal.dev, pair->pair_id, block_size_index);
#endif // #if SOC_GDMA_SUPPORT_PSRAM
}
dma_chan->sram_alignment = sram_alignment;
dma_chan->psram_alignment = psram_alignment;
ESP_LOGD(TAG, "%s channel (%d,%d), (%zu:%zu) bytes aligned, burst %s", dma_chan->direction == GDMA_CHANNEL_DIRECTION_TX ? "tx" : "rx",
group->group_id, pair->pair_id, sram_alignment, psram_alignment, en_burst ? "enabled" : "disabled");
err:
return ret;
}
esp_err_t gdma_apply_strategy(gdma_channel_handle_t dma_chan, const gdma_strategy_config_t *config)
{
esp_err_t ret = ESP_OK;

Wyświetl plik

@ -59,10 +59,23 @@ typedef struct {
gdma_channel_handle_t sibling_chan; /*!< DMA sibling channel handle (NULL means having sibling is not necessary) */
gdma_channel_direction_t direction; /*!< DMA channel direction */
struct {
int reserve_sibling: 1; /*!< If set, DMA channel allocator would prefer to allocate new channel in a new pair, and reserve sibling channel for future use */
int reserve_sibling: 1; /*!< If set, DMA channel allocator would prefer to allocate new channel in a new pair, and reserve sibling channel for future use */
} flags;
} gdma_channel_alloc_config_t;
/**
* @brief GDMA transfer ability
*
* @note The alignment set in this structure is **not** a guarantee that gdma driver will take care of the nonalignment cases.
* Actually the GDMA driver has no knowledge about the DMA buffer (address and size) used by upper layer.
* So it's the responsibility of the **upper layer** to take care of the buffer address and size.
*
*/
typedef struct {
size_t sram_trans_align; /*!< DMA transfer alignment for memory in SRAM, in bytes. The driver enables/disables burst mode based on this value. 0 means no alignment is required */
size_t psram_trans_align; /*!< DMA transfer alignment for memory in PSRAM, in bytes. The driver sets proper burst block size based on the alignment value. 0 means no alignment is required */
} gdma_transfer_ability_t;
/**
* @brief Type of GDMA event data
*
@ -80,6 +93,9 @@ typedef struct {
* @param event_data GDMA event data
* @param user_data User registered data from `gdma_register_tx_event_callbacks` or `gdma_register_rx_event_callbacks`
*
* @return Whether a task switch is needed after the callback function returns,
* this is usually due to the callback wakes up some high priority task.
*
*/
typedef bool (*gdma_event_callback_t)(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data);
@ -172,6 +188,18 @@ esp_err_t gdma_connect(gdma_channel_handle_t dma_chan, gdma_trigger_t trig_perip
*/
esp_err_t gdma_disconnect(gdma_channel_handle_t dma_chan);
/**
* @brief Set DMA channel transfer ability
*
* @param[in] dma_chan GDMA channel handle, allocated by `gdma_new_channel`
* @param[in] ability Transfer ability, e.g. alignment
* @return
* - ESP_OK: Set DMA channel transfer ability successfully
* - ESP_ERR_INVALID_ARG: Set DMA channel transfer ability failed because of invalid argument
* - ESP_FAIL: Set DMA channel transfer ability failed because of other error
*/
esp_err_t gdma_set_transfer_ability(gdma_channel_handle_t dma_chan, const gdma_transfer_ability_t *ability);
/**
* @brief Apply channel strategy for GDMA channel
*

Wyświetl plik

@ -11,6 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sys/param.h>
#include "freertos/FreeRTOS.h"
#include "freertos/semphr.h"
#include "hal/dma_types.h"
@ -22,6 +24,8 @@
static const char *TAG = "async_memcpy";
#define ALIGN_DOWN(val, align) ((val) & ~((align) - 1))
/**
* @brief Type of async mcp stream
* mcp stream inherits DMA descriptor, besides that, it has a callback function member
@ -43,7 +47,8 @@ typedef struct async_memcpy_context_t {
dma_descriptor_t *tx_desc; // pointer to the next free TX descriptor
dma_descriptor_t *rx_desc; // pointer to the next free RX descriptor
dma_descriptor_t *next_rx_desc_to_check; // pointer to the next RX descriptor to recycle
uint32_t max_stream_num; // maximum number of streams
uint32_t max_stream_num; // maximum number of streams
size_t max_dma_buffer_size; // maximum DMA buffer size
async_memcpy_stream_t *out_streams; // pointer to the first TX stream
async_memcpy_stream_t *in_streams; // pointer to the first RX stream
async_memcpy_stream_t streams_pool[0]; // stream pool (TX + RX), the size is configured during driver installation
@ -82,9 +87,14 @@ esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_me
mcp_hdl->rx_desc = &mcp_hdl->in_streams[0].desc;
mcp_hdl->next_rx_desc_to_check = &mcp_hdl->in_streams[0].desc;
mcp_hdl->spinlock = (portMUX_TYPE)portMUX_INITIALIZER_UNLOCKED;
mcp_hdl->mcp_impl.sram_trans_align = config->sram_trans_align;
mcp_hdl->mcp_impl.psram_trans_align = config->psram_trans_align;
size_t trans_align = MAX(config->sram_trans_align, config->psram_trans_align);
mcp_hdl->max_dma_buffer_size = trans_align ? ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align) : DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
// initialize implementation layer
async_memcpy_impl_init(&mcp_hdl->mcp_impl);
ret = async_memcpy_impl_init(&mcp_hdl->mcp_impl);
ESP_GOTO_ON_ERROR(ret, err, TAG, "DMA M2M init failed");
*asmcp = mcp_hdl;
@ -121,14 +131,14 @@ static int async_memcpy_prepare_receive(async_memcpy_t asmcp, void *buffer, size
dma_descriptor_t *start = desc;
dma_descriptor_t *end = desc;
while (size > DMA_DESCRIPTOR_BUFFER_MAX_SIZE) {
while (size > asmcp->max_dma_buffer_size) {
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
desc->dw0.suc_eof = 0;
desc->dw0.size = DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
desc->dw0.size = asmcp->max_dma_buffer_size;
desc->buffer = &buf[prepared_length];
desc = desc->next; // move to next descriptor
prepared_length += DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
size -= DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
prepared_length += asmcp->max_dma_buffer_size;
size -= asmcp->max_dma_buffer_size;
} else {
// out of RX descriptors
goto _exit;
@ -162,15 +172,15 @@ static int async_memcpy_prepare_transmit(async_memcpy_t asmcp, void *buffer, siz
dma_descriptor_t *start = desc;
dma_descriptor_t *end = desc;
while (len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE) {
while (len > asmcp->max_dma_buffer_size) {
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
desc->dw0.suc_eof = 0; // not the end of the transaction
desc->dw0.size = DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
desc->dw0.length = DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
desc->dw0.size = asmcp->max_dma_buffer_size;
desc->dw0.length = asmcp->max_dma_buffer_size;
desc->buffer = &buf[prepared_length];
desc = desc->next; // move to next descriptor
prepared_length += DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
len -= DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
prepared_length += asmcp->max_dma_buffer_size;
len -= asmcp->max_dma_buffer_size;
} else {
// out of TX descriptors
goto _exit;
@ -222,14 +232,20 @@ esp_err_t esp_async_memcpy(async_memcpy_t asmcp, void *dst, void *src, size_t n,
size_t rx_prepared_size = 0;
size_t tx_prepared_size = 0;
ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "mcp handle can't be null");
ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid");
ESP_GOTO_ON_FALSE(n <= DMA_DESCRIPTOR_BUFFER_MAX_SIZE * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large");
ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid: %p -> %p", src, dst);
ESP_GOTO_ON_FALSE(n <= asmcp->max_dma_buffer_size * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large");
if (asmcp->mcp_impl.sram_trans_align) {
ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.sram_trans_align);
}
if (asmcp->mcp_impl.psram_trans_align) {
ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.psram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.psram_trans_align);
}
// Prepare TX and RX descriptor
portENTER_CRITICAL_SAFE(&asmcp->spinlock);
rx_prepared_size = async_memcpy_prepare_receive(asmcp, dst, n, &rx_start_desc, &rx_end_desc);
tx_prepared_size = async_memcpy_prepare_transmit(asmcp, src, n, &tx_start_desc, &tx_end_desc);
if ((rx_prepared_size == n) && (tx_prepared_size == n)) {
if (rx_start_desc && tx_start_desc && (rx_prepared_size == n) && (tx_prepared_size == n)) {
// register user callback to the last descriptor
async_memcpy_stream_t *mcp_stream = __containerof(rx_end_desc, async_memcpy_stream_t, desc);
mcp_stream->cb = cb_isr;

Wyświetl plik

@ -54,8 +54,10 @@ typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_t mcp_hdl, async_memcpy_event
*
*/
typedef struct {
uint32_t backlog; /*!< Maximum number of streams that can be handled simultaneously */
uint32_t flags; /*!< Extra flags to control async memcpy feature */
uint32_t backlog; /*!< Maximum number of streams that can be handled simultaneously */
size_t sram_trans_align; /*!< DMA transfer alignment (both in size and address) for SRAM memory */
size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */
uint32_t flags; /*!< Extra flags to control async memcpy feature */
} async_memcpy_config_t;
/**
@ -63,9 +65,11 @@ typedef struct {
*
*/
#define ASYNC_MEMCPY_DEFAULT_CONFIG() \
{ \
.backlog = 8, \
.flags = 0, \
{ \
.backlog = 8, \
.sram_trans_align = 0, \
.psram_trans_align = 0, \
.flags = 0, \
}
/**

Wyświetl plik

@ -61,9 +61,21 @@ esp_err_t async_memcpy_impl_init(async_memcpy_impl_t *impl)
gdma_strategy_config_t strategy_config = {
.auto_update_desc = true,
.owner_check = true
.owner_check = true,
};
gdma_transfer_ability_t transfer_ability = {
.sram_trans_align = impl->sram_trans_align,
.psram_trans_align = impl->psram_trans_align,
};
ret = gdma_set_transfer_ability(impl->tx_channel, &transfer_ability);
if (ret != ESP_OK) {
goto err;
}
ret = gdma_set_transfer_ability(impl->rx_channel, &transfer_ability);
if (ret != ESP_OK) {
goto err;
}
gdma_apply_strategy(impl->tx_channel, &strategy_config);
gdma_apply_strategy(impl->rx_channel, &strategy_config);
@ -108,5 +120,15 @@ esp_err_t async_memcpy_impl_restart(async_memcpy_impl_t *impl)
bool async_memcpy_impl_is_buffer_address_valid(async_memcpy_impl_t *impl, void *src, void *dst)
{
return true;
bool valid = true;
if (esp_ptr_external_ram(dst)) {
if (impl->psram_trans_align) {
valid = valid && (((intptr_t)dst & (impl->psram_trans_align - 1)) == 0);
}
} else {
if (impl->sram_trans_align) {
valid = valid && (((intptr_t)dst & (impl->sram_trans_align - 1)) == 0);
}
}
return valid;
}

Wyświetl plik

@ -46,6 +46,8 @@ typedef struct {
gdma_channel_handle_t rx_channel;
#endif
intptr_t rx_eof_addr;
size_t sram_trans_align;
size_t psram_trans_align;
bool isr_need_yield; // if current isr needs a yield for higher priority task
} async_memcpy_impl_t;

Wyświetl plik

@ -12,37 +12,75 @@
#include "ccomp_timer.h"
#include "esp_async_memcpy.h"
#include "soc/soc_caps.h"
#include "hal/dma_types.h"
#if SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED
#define ALIGN_UP(addr, align) (((addr) + (align)-1) & ~((align)-1))
#define ALIGN_DOWN(size, align) ((size) & ~((align) - 1))
static void async_memcpy_setup_testbench(uint32_t seed, uint32_t *buffer_size, uint8_t **src_buf, uint8_t **dst_buf, uint8_t **from_addr, uint8_t **to_addr, uint32_t align)
typedef struct {
uint32_t seed;
uint32_t buffer_size;
uint8_t *src_buf;
uint8_t *dst_buf;
uint8_t *from_addr;
uint8_t *to_addr;
uint32_t align;
uint32_t offset;
bool src_in_psram;
bool dst_in_psram;
} memcpy_testbench_context_t;
static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_context)
{
srand(seed);
srand(test_context->seed);
printf("allocating memory buffer...\r\n");
// memory copy from/to PSRAM is not allowed
*src_buf = heap_caps_malloc(*buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
*dst_buf = heap_caps_calloc(1, *buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
TEST_ASSERT_NOT_NULL_MESSAGE(*src_buf, "allocate source buffer failed");
TEST_ASSERT_NOT_NULL_MESSAGE(*dst_buf, "allocate destination buffer failed");
*from_addr = (uint8_t *)ALIGN_UP((uint32_t)(*src_buf), 4);
*to_addr = (uint8_t *)ALIGN_UP((uint32_t)(*dst_buf), 4);
uint8_t gap = MAX(*from_addr - *src_buf, *to_addr - *dst_buf);
*buffer_size -= gap;
*from_addr += align;
*to_addr += align;
*buffer_size -= align;
printf("...size %d Bytes, src@%p, dst@%p\r\n", *buffer_size, *from_addr, *to_addr);
printf("fill src buffer with random data\r\n");
for (int i = 0; i < *buffer_size; i++) {
(*from_addr)[i] = rand() % 256;
uint32_t buffer_size = test_context->buffer_size;
uint8_t *src_buf = NULL;
uint8_t *dst_buf = NULL;
uint8_t *from_addr = NULL;
uint8_t *to_addr = NULL;
#if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM
if (test_context->src_in_psram) {
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_SPIRAM);
} else {
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
}
if (test_context->dst_in_psram) {
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_SPIRAM);
} else {
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
}
#else
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
#endif
TEST_ASSERT_NOT_NULL_MESSAGE(src_buf, "allocate source buffer failed");
TEST_ASSERT_NOT_NULL_MESSAGE(dst_buf, "allocate destination buffer failed");
// address alignment
from_addr = (uint8_t *)ALIGN_UP((uint32_t)(src_buf), test_context->align);
to_addr = (uint8_t *)ALIGN_UP((uint32_t)(dst_buf), test_context->align);
uint8_t gap = MAX(from_addr - src_buf, to_addr - dst_buf);
buffer_size -= gap;
// size alignment
buffer_size = ALIGN_DOWN(buffer_size, test_context->align);
// adding extra offset
from_addr += test_context->offset;
to_addr += test_context->offset;
buffer_size -= test_context->offset;
printf("...size %d Bytes, src@%p, dst@%p\r\n", buffer_size, from_addr, to_addr);
printf("fill src buffer with random data\r\n");
for (int i = 0; i < buffer_size; i++) {
from_addr[i] = rand() % 256;
}
// return value
test_context->buffer_size = buffer_size;
test_context->src_buf = src_buf;
test_context->dst_buf = dst_buf;
test_context->from_addr = from_addr;
test_context->to_addr = to_addr;
}
static void async_memcpy_verify_and_clear_testbench(uint32_t seed, uint32_t buffer_size, uint8_t *src_buf, uint8_t *dst_buf, uint8_t *from_addr, uint8_t *to_addr)
@ -91,18 +129,18 @@ TEST_CASE("memory copy by DMA one by one", "[async mcp]")
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
uint32_t test_buffer_len[] = {256, 512, 1024, 2048, 4096, 5011};
uint8_t *sbuf = NULL;
uint8_t *dbuf = NULL;
uint8_t *from = NULL;
uint8_t *to = NULL;
memcpy_testbench_context_t test_context = {
.align = 4,
};
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
// Test different align edge
for (int align = 0; align < 4; align++) {
async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbuf, &dbuf, &from, &to, align);
TEST_ESP_OK(esp_async_memcpy(driver, to, from, test_buffer_len[i], NULL, NULL));
async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbuf, dbuf, from, to);
for (int off = 0; off < 4; off++) {
test_context.buffer_size = test_buffer_len[i];
test_context.seed = i;
async_memcpy_setup_testbench(&test_context);
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, NULL, NULL));
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
vTaskDelay(pdMS_TO_TICKS(100));
}
}
@ -117,86 +155,177 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]")
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
uint32_t test_buffer_len[] = {512, 1024, 2048, 4096, 5011};
uint8_t *sbufs[] = {0, 0, 0, 0, 0};
uint8_t *dbufs[] = {0, 0, 0, 0, 0};
uint8_t *froms[] = {0, 0, 0, 0, 0};
uint8_t *tos[] = {0, 0, 0, 0, 0};
memcpy_testbench_context_t test_context[] = {
{.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4},
};
// Aligned case
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbufs[i], &dbufs[i], &froms[i], &tos[i], 0);
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
test_context[i].seed = i;
test_context[i].buffer_size = test_buffer_len[i];
async_memcpy_setup_testbench(&test_context[i]);
}
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
TEST_ESP_OK(esp_async_memcpy(driver, tos[i], froms[i], test_buffer_len[i], NULL, NULL));
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
}
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbufs[i], dbufs[i], froms[i], tos[i]);
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
}
// Non-aligned case
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbufs[i], &dbufs[i], &froms[i], &tos[i], 3);
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
test_context[i].seed = i;
test_context[i].buffer_size = test_buffer_len[i];
test_context[i].offset = 3;
async_memcpy_setup_testbench(&test_context[i]);
}
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
TEST_ESP_OK(esp_async_memcpy(driver, tos[i], froms[i], test_buffer_len[i], NULL, NULL));
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
}
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbufs[i], dbufs[i], froms[i], tos[i]);
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
}
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
}
#define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16)
static uint32_t test_async_memcpy_bench_len = 4095;
static int count = 0;
#define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16)
static int s_count = 0;
static IRAM_ATTR bool test_async_memcpy_isr_cb(async_memcpy_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
{
SemaphoreHandle_t sem = (SemaphoreHandle_t)cb_args;
BaseType_t high_task_wakeup = pdFALSE;
count++;
if (count == TEST_ASYNC_MEMCPY_BENCH_COUNTS) {
s_count++;
if (s_count == TEST_ASYNC_MEMCPY_BENCH_COUNTS) {
xSemaphoreGiveFromISR(sem, &high_task_wakeup);
}
return high_task_wakeup == pdTRUE;
}
TEST_CASE("memory copy by DMA with callback", "[async mcp]")
static void memcpy_performance_test(uint32_t buffer_size)
{
SemaphoreHandle_t sem = xSemaphoreCreateBinary();
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
config.backlog = TEST_ASYNC_MEMCPY_BENCH_COUNTS;
config.backlog = (buffer_size / DMA_DESCRIPTOR_BUFFER_MAX_SIZE + 1) * TEST_ASYNC_MEMCPY_BENCH_COUNTS;
config.sram_trans_align = 4; // at least 4 bytes aligned for SRAM transfer
config.psram_trans_align = 64; // at least 64 bytes aligned for PSRAM transfer
async_memcpy_t driver = NULL;
int64_t elapse_us = 0;
float throughput = 0.0;
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
uint8_t *sbuf = NULL;
uint8_t *dbuf = NULL;
uint8_t *from = NULL;
uint8_t *to = NULL;
async_memcpy_setup_testbench(0, &test_async_memcpy_bench_len, &sbuf, &dbuf, &from, &to, 0);
count = 0;
// 1. SRAM->SRAM
memcpy_testbench_context_t test_context = {
.align = config.psram_trans_align,
.buffer_size = buffer_size,
.src_in_psram = false,
.dst_in_psram = false,
};
async_memcpy_setup_testbench(&test_context);
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, to, from, test_async_memcpy_bench_len, test_async_memcpy_isr_cb, sem));
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
esp_rom_printf("memcpy %d Bytes data by HW costs %lldus\r\n", test_async_memcpy_bench_len, ccomp_timer_stop() / TEST_ASYNC_MEMCPY_BENCH_COUNTS);
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
memcpy(to, from, test_async_memcpy_bench_len);
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
}
esp_rom_printf("memcpy %d Bytes data by SW costs %lldus\r\n", test_async_memcpy_bench_len, ccomp_timer_stop() / TEST_ASYNC_MEMCPY_BENCH_COUNTS);
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
async_memcpy_verify_and_clear_testbench(0, test_async_memcpy_bench_len, sbuf, dbuf, from, to);
#if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM
// 2. PSRAM->PSRAM
test_context.src_in_psram = true;
test_context.dst_in_psram = true;
async_memcpy_setup_testbench(&test_context);
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
}
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
// 3. PSRAM->SRAM
test_context.src_in_psram = true;
test_context.dst_in_psram = false;
async_memcpy_setup_testbench(&test_context);
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
}
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
// 4. SRAM->PSRAM
test_context.src_in_psram = false;
test_context.dst_in_psram = true;
async_memcpy_setup_testbench(&test_context);
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
}
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
#endif
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
vSemaphoreDelete(sem);
}
TEST_CASE("memory copy performance test 40KB", "[async mcp]")
{
memcpy_performance_test(40 * 1024);
}
TEST_CASE("memory copy performance test 4KB", "[async mcp]")
{
memcpy_performance_test(4 * 1024);
}
#endif //SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED

Wyświetl plik

@ -37,7 +37,7 @@ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config
esp_err_t ret;
int time_waited_ms = 0;
while(1) {
while (1) {
ret = gdma_new_channel(channel_config, channel);
if (ret == ESP_OK) {
@ -58,14 +58,12 @@ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config
/* Initialize external memory specific DMA configs */
static void esp_crypto_shared_dma_init_extmem(void)
{
int tx_ch_id = 0;
int rx_ch_id = 0;
gdma_get_channel_id(tx_channel, &tx_ch_id);
gdma_get_channel_id(rx_channel, &rx_ch_id);
gdma_ll_tx_set_block_size_psram(&GDMA, tx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B);
gdma_ll_rx_set_block_size_psram(&GDMA, rx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B);
gdma_transfer_ability_t transfer_ability = {
.sram_trans_align = 4,
.psram_trans_align = 16,
};
gdma_set_transfer_ability(tx_channel, &transfer_ability);
gdma_set_transfer_ability(rx_channel, &transfer_ability);
}
#endif //SOC_GDMA_SUPPORT_PSRAM
@ -137,7 +135,7 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou
return ESP_ERR_INVALID_ARG;
}
/* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */
/* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */
gdma_get_channel_id(tx_channel, &rx_ch_id);
gdma_ll_rx_reset_channel(&GDMA, rx_ch_id);

Wyświetl plik

@ -22,6 +22,8 @@ Configure and Install driver
Driver configuration is described in :cpp:type:`async_memcpy_config_t`:
:cpp:member:`backlog`: This is used to configured the maximum number of DMA operation that can be working at the background at the same time.
:cpp:member:`sram_trans_align`: Declare SRAM alignment for both data address and copy size, set to zero if the data has no restriction in alignment. If set to a quadruple value (i.e. 4X), the driver will enable the burst mode internally, which is helpful for some performance related application.
:cpp:member:`psram_trans_align`: Declare PSRAM alignment for both data address and copy size. User has to give it a valid value (only 16, 32, 64 are supported) if the destination of memcpy is located in PSRAM. The default alignment (i.e. 16) will be applied if it's set to zero. Internally, the driver configures the size of block used by DMA to access PSRAM, according to the alignment.
:cpp:member:`flags`: This is used to enable some special driver features.
:c:macro:`ASYNC_MEMCPY_DEFAULT_CONFIG` provides a default configuration, which specifies the backlog to 8.

Wyświetl plik

@ -73,7 +73,7 @@ extern "C" {
/* @brief macro to print IDF performance
* @param mode : performance item name. a string pointer.
* @param item : performance item name. a string pointer.
* @param value_fmt: print format and unit of the value, for example: "%02fms", "%dKB"
* @param value : the performance value.
*/