Merge branch 'feature/heap-trace-hash-map' into 'master'

heap: Add hash map mechanism to lower the access time to trace records

Closes IDFGH-9425

See merge request espressif/esp-idf!22564
pull/11189/head
Guillaume Souchere 2023-04-05 16:46:23 +08:00
commit f94bbe1eff
3 zmienionych plików z 229 dodań i 116 usunięć

Wyświetl plik

@ -70,6 +70,30 @@ menu "Heap memory debugging"
This function depends on heap poisoning being enabled and adds four more bytes of overhead for each block
allocated.
config HEAP_TRACE_HASH_MAP
bool "Use hash map mechanism to access heap trace records"
depends on HEAP_TRACING_STANDALONE
default n
help
Enable this flag to use a hash map to increase performance in handling
heap trace records.
Keeping this as "n" in your project will save RAM and heap memory but will lower
the performance of the heap trace in adding, retrieving and removing trace records.
Making this as "y" in your project, you will decrease free RAM and heap memory but,
the heap trace performances in adding retrieving and removing trace records will be
enhanced.
config HEAP_TRACE_HASH_MAP_SIZE
int "The number of entries in the hash map"
depends on HEAP_TRACE_HASH_MAP
range 1 10000
default 10
help
Defines the number of entries in the heap trace hashmap. The bigger this number is,
the bigger the hash map will be in the memory. In case the tracing mode is set to
HEAP_TRACE_ALL, the bigger the hashmap is, the better the performances are.
config HEAP_ABORT_WHEN_ALLOCATION_FAILS
bool "Abort if memory allocation fails"
default n

Wyświetl plik

@ -5,6 +5,7 @@
*/
#include <string.h>
#include <sdkconfig.h>
#include <inttypes.h>
#define HEAP_TRACE_SRCFILE /* don't warn on inclusion here */
#include "esp_heap_trace.h"
@ -58,12 +59,12 @@ typedef struct {
// Forward Defines
static void heap_trace_dump_base(bool internal_ram, bool psram);
static void record_deep_copy(heap_trace_record_t *rDest, const heap_trace_record_t* r_src);
static void record_deep_copy(heap_trace_record_t *r_dest, const heap_trace_record_t *r_src);
static void list_setup(void);
static void list_remove(heap_trace_record_t* r_remove);
static bool list_add(const heap_trace_record_t* r_append);
static void list_remove(heap_trace_record_t *r_remove);
static heap_trace_record_t* list_add(const heap_trace_record_t *r_append);
static heap_trace_record_t* list_pop_unused(void);
static heap_trace_record_t* list_find_address_reverse(void* p);
static heap_trace_record_t* list_find_address_reverse(void *p);
/* The actual records. */
static records_t records;
@ -78,6 +79,54 @@ static size_t total_frees;
static heap_trace_record_t* r_get;
static size_t r_get_idx;
#if CONFIG_HEAP_TRACE_HASH_MAP
/* Define struct: linked list of records used in hash map */
TAILQ_HEAD(heap_trace_hash_list_struct_t, heap_trace_record_t);
typedef struct heap_trace_hash_list_struct_t heap_trace_hash_list_t;
static heap_trace_hash_list_t hash_map[(size_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE]; // Buffer used for hashmap entries
static size_t total_hashmap_hits;
static size_t total_hashmap_miss;
static size_t hash_idx(void* p)
{
static const uint32_t fnv_prime = 16777619UL; // expression 2^24 + 2^8 + 0x93 (32 bits size)
// since all the addresses are 4 bytes aligned, computing address * fnv_prime always gives
// a modulo 4 number. The bit shift goal is to distribute more evenly the hashes in the
// given range [0 , CONFIG_HEAP_TRACE_HASH_MAP_SIZE - 1].
return ((((uint32_t)p >> 3) +
((uint32_t)p >> 5) +
((uint32_t)p >> 7)) * fnv_prime) % (uint32_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE;
}
static void map_add(heap_trace_record_t *r_add)
{
size_t idx = hash_idx(r_add->address);
TAILQ_INSERT_TAIL(&hash_map[idx], r_add, tailq_hashmap);
}
static void map_remove(heap_trace_record_t *r_remove)
{
size_t idx = hash_idx(r_remove->address);
TAILQ_REMOVE(&hash_map[idx], r_remove, tailq_hashmap);
}
static heap_trace_record_t* map_find(void *p)
{
size_t idx = hash_idx(p);
heap_trace_record_t *r_cur = NULL;
TAILQ_FOREACH(r_cur, &hash_map[idx], tailq_hashmap) {
if (r_cur->address == p) {
total_hashmap_hits++;
return r_cur;
}
}
total_hashmap_miss++;
return NULL;
}
#endif // CONFIG_HEAP_TRACE_HASH_MAP
esp_err_t heap_trace_init_standalone(heap_trace_record_t *record_buffer, size_t num_records)
{
if (tracing) {
@ -94,32 +143,6 @@ esp_err_t heap_trace_init_standalone(heap_trace_record_t *record_buffer, size_t
return ESP_OK;
}
esp_err_t heap_trace_start(heap_trace_mode_t mode_param)
{
if (records.buffer == NULL || records.capacity == 0) {
return ESP_ERR_INVALID_STATE;
}
portENTER_CRITICAL(&trace_mux);
tracing = false;
mode = mode_param;
// clear buffer
memset(records.buffer, 0, sizeof(heap_trace_record_t) * records.capacity);
records.count = 0;
records.has_overflowed = false;
list_setup();
total_allocations = 0;
total_frees = 0;
heap_trace_resume();
portEXIT_CRITICAL(&trace_mux);
return ESP_OK;
}
static esp_err_t set_tracing(bool enable)
{
if (tracing == enable) {
@ -129,14 +152,56 @@ static esp_err_t set_tracing(bool enable)
return ESP_OK;
}
esp_err_t heap_trace_start(heap_trace_mode_t mode_param)
{
if (records.buffer == NULL || records.capacity == 0) {
return ESP_ERR_INVALID_STATE;
}
portENTER_CRITICAL(&trace_mux);
set_tracing(false);
mode = mode_param;
// clear buffers
memset(records.buffer, 0, sizeof(heap_trace_record_t) * records.capacity);
#if CONFIG_HEAP_TRACE_HASH_MAP
for (size_t i = 0; i < (size_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE; i++) {
TAILQ_INIT(&hash_map[i]);
}
total_hashmap_hits = 0;
total_hashmap_miss = 0;
#endif // CONFIG_HEAP_TRACE_HASH_MAP
records.count = 0;
records.has_overflowed = false;
list_setup();
total_allocations = 0;
total_frees = 0;
const esp_err_t ret_val = set_tracing(true);
portEXIT_CRITICAL(&trace_mux);
return ret_val;
}
esp_err_t heap_trace_stop(void)
{
return set_tracing(false);
portENTER_CRITICAL(&trace_mux);
const esp_err_t ret_val = set_tracing(false);
portEXIT_CRITICAL(&trace_mux);
return ret_val;
}
esp_err_t heap_trace_resume(void)
{
return set_tracing(true);
portENTER_CRITICAL(&trace_mux);
const esp_err_t ret_val = set_tracing(true);
portEXIT_CRITICAL(&trace_mux);
return ret_val;
}
size_t heap_trace_get_count(void)
@ -163,7 +228,7 @@ esp_err_t heap_trace_get(size_t index, heap_trace_record_t *r_out)
// Perf: speed up sequential access
if (r_get && r_get_idx == index - 1) {
r_get = TAILQ_NEXT(r_get, tailq);
r_get = TAILQ_NEXT(r_get, tailq_list);
r_get_idx = index;
} else {
@ -178,21 +243,15 @@ esp_err_t heap_trace_get(size_t index, heap_trace_record_t *r_out)
break;
}
r_get = TAILQ_NEXT(r_get, tailq);
r_get = TAILQ_NEXT(r_get, tailq_list);
r_get_idx = i + 1;
}
}
// copy to destination
if (r_get) {
memcpy(r_out, r_get, sizeof(heap_trace_record_t));
} else {
// this should not happen since we already
// checked that index < records.count,
// but could be indicative of memory corruption
result = ESP_ERR_INVALID_STATE;
memset(r_out, 0, sizeof(heap_trace_record_t));
}
// We already checked that index < records.count,
// This could be indicative of memory corruption.
assert(r_get != NULL);
memcpy(r_out, r_get, sizeof(heap_trace_record_t));
}
portEXIT_CRITICAL(&trace_mux);
@ -213,6 +272,10 @@ esp_err_t heap_trace_summary(heap_trace_summary_t *summary)
summary->capacity = records.capacity;
summary->high_water_mark = records.high_water_mark;
summary->has_overflowed = records.has_overflowed;
#if CONFIG_HEAP_TRACE_HASH_MAP
summary->total_hashmap_hits = total_hashmap_hits;
summary->total_hashmap_miss = total_hashmap_miss;
#endif // CONFIG_HEAP_TRACE_HASH_MAP
portEXIT_CRITICAL(&trace_mux);
return ESP_OK;
@ -234,76 +297,81 @@ static void heap_trace_dump_base(bool internal_ram, bool psram)
size_t delta_allocs = 0;
size_t start_count = records.count;
esp_rom_printf("====== Heap Trace: %u records (%u capacity) ======\n",
esp_rom_printf("====== Heap Trace: %"PRIu32" records (%"PRIu32" capacity) ======\n",
records.count, records.capacity);
// Iterate through through the linked list
heap_trace_record_t *rCur = TAILQ_FIRST(&records.list);
heap_trace_record_t *r_cur = TAILQ_FIRST(&records.list);
for (int i = 0; i < records.count; i++) {
// check corruption
if (rCur == NULL) {
if (r_cur == NULL) {
esp_rom_printf("\nError: heap trace linked list is corrupt. expected more records.\n");
break;
}
bool should_print = rCur->address != NULL &&
bool should_print = r_cur->address != NULL &&
((psram && internal_ram) ||
(internal_ram && esp_ptr_internal(rCur->address)) ||
(psram && esp_ptr_external_ram(rCur->address)));
(internal_ram && esp_ptr_internal(r_cur->address)) ||
(psram && esp_ptr_external_ram(r_cur->address)));
if (should_print) {
const char* label = "";
if (esp_ptr_internal(rCur->address)) {
if (esp_ptr_internal(r_cur->address)) {
label = ", Internal";
}
if (esp_ptr_external_ram(rCur->address)) {
if (esp_ptr_external_ram(r_cur->address)) {
label = ", PSRAM";
}
esp_rom_printf("%6d bytes (@ %p%s) allocated CPU %d ccount 0x%08x caller ",
rCur->size, rCur->address, label, rCur->ccount & 1, rCur->ccount & ~3);
r_cur->size, r_cur->address, label, r_cur->ccount & 1, r_cur->ccount & ~3);
for (int j = 0; j < STACK_DEPTH && rCur->alloced_by[j] != 0; j++) {
esp_rom_printf("%p%s", rCur->alloced_by[j],
for (int j = 0; j < STACK_DEPTH && r_cur->alloced_by[j] != 0; j++) {
esp_rom_printf("%p%s", r_cur->alloced_by[j],
(j < STACK_DEPTH - 1) ? ":" : "");
}
if (mode != HEAP_TRACE_ALL || STACK_DEPTH == 0 || rCur->freed_by[0] == NULL) {
delta_size += rCur->size;
if (mode != HEAP_TRACE_ALL || STACK_DEPTH == 0 || r_cur->freed_by[0] == NULL) {
delta_size += r_cur->size;
delta_allocs++;
esp_rom_printf("\n");
} else {
esp_rom_printf("\nfreed by ");
for (int j = 0; j < STACK_DEPTH; j++) {
esp_rom_printf("%p%s", rCur->freed_by[j],
esp_rom_printf("%p%s", r_cur->freed_by[j],
(j < STACK_DEPTH - 1) ? ":" : "\n");
}
}
}
rCur = TAILQ_NEXT(rCur, tailq);
r_cur = TAILQ_NEXT(r_cur, tailq_list);
}
esp_rom_printf("====== Heap Trace Summary ======\n");
if (mode == HEAP_TRACE_ALL) {
esp_rom_printf("Mode: Heap Trace All\n");
esp_rom_printf("%u bytes alive in trace (%u/%u allocations)\n",
esp_rom_printf("%"PRIu32" bytes alive in trace (%"PRIu32"/%"PRIu32" allocations)\n",
delta_size, delta_allocs, heap_trace_get_count());
} else {
esp_rom_printf("Mode: Heap Trace Leaks\n");
esp_rom_printf("%u bytes 'leaked' in trace (%u allocations)\n", delta_size, delta_allocs);
esp_rom_printf("%"PRIu32" bytes 'leaked' in trace (%"PRIu32" allocations)\n", delta_size, delta_allocs);
}
esp_rom_printf("records: %u (%u capacity, %u high water mark)\n",
esp_rom_printf("records: %"PRIu32" (%"PRIu32" capacity, %"PRIu32" high water mark)\n",
records.count, records.capacity, records.high_water_mark);
esp_rom_printf("total allocations: %u\n", total_allocations);
esp_rom_printf("total frees: %u\n", total_frees);
#if CONFIG_HEAP_TRACE_HASH_MAP
esp_rom_printf("hashmap: %"PRIu32" capacity (%"PRIu32" hits, %"PRIu32" misses)\n",
(size_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE, total_hashmap_hits, total_hashmap_miss);
#endif // CONFIG_HEAP_TRACE_HASH_MAP
esp_rom_printf("total allocations: %"PRIu32"\n", total_allocations);
esp_rom_printf("total frees: %"PRIu32"\n", total_frees);
if (start_count != records.count) { // only a problem if trace isn't stopped before dumping
esp_rom_printf("(NB: New entries were traced while dumping, so trace dump may have duplicate entries.)\n");
@ -317,30 +385,27 @@ static void heap_trace_dump_base(bool internal_ram, bool psram)
}
/* Add a new allocation to the heap trace records */
static IRAM_ATTR void record_allocation(const heap_trace_record_t *rAllocation)
static IRAM_ATTR void record_allocation(const heap_trace_record_t *r_allocation)
{
if (!tracing || rAllocation->address == NULL) {
if (!tracing || r_allocation->address == NULL) {
return;
}
portENTER_CRITICAL(&trace_mux);
if (tracing) {
// If buffer is full, pop off the oldest
// record to make more space
if (records.count == records.capacity) {
records.has_overflowed = true;
heap_trace_record_t *rFirst = TAILQ_FIRST(&records.list);
heap_trace_record_t *r_first = TAILQ_FIRST(&records.list);
list_remove(rFirst);
list_remove(r_first);
}
// push onto end of list
list_add(rAllocation);
list_add(r_allocation);
total_allocations++;
}
@ -357,30 +422,34 @@ static IRAM_ATTR void record_allocation(const heap_trace_record_t *rAllocation)
*/
static IRAM_ATTR void record_free(void *p, void **callers)
{
if (!tracing || p == NULL) {
if (!tracing || p == NULL) {
return;
}
portENTER_CRITICAL(&trace_mux);
// return directly if records.count == 0. In case of hashmap being used
// this prevents the hashmap to return an item that is no longer in the
// records list.
if (records.count == 0) {
portEXIT_CRITICAL(&trace_mux);
return;
}
if (tracing) {
total_frees++;
// search backwards for the allocation record matching this free
heap_trace_record_t* rFound = list_find_address_reverse(p);
if (rFound) {
heap_trace_record_t *r_found = list_find_address_reverse(p);
if (r_found) {
if (mode == HEAP_TRACE_ALL) {
// add 'freed_by' info to the record
memcpy(rFound->freed_by, callers, sizeof(void *) * STACK_DEPTH);
memcpy(r_found->freed_by, callers, sizeof(void *) * STACK_DEPTH);
} else { // HEAP_TRACE_LEAKS
// Leak trace mode, once an allocation is freed
// we remove it from the list
list_remove(rFound);
// we remove it from the list & hashmap
list_remove(r_found);
}
}
}
@ -396,27 +465,31 @@ static void list_setup(void)
for (int i = 0; i < records.capacity; i++) {
heap_trace_record_t* rCur = &records.buffer[i];
heap_trace_record_t *r_cur = &records.buffer[i];
TAILQ_INSERT_TAIL(&records.unused, rCur, tailq);
TAILQ_INSERT_TAIL(&records.unused, r_cur, tailq_list);
}
}
/* 1. removes record r_remove from records.list,
2. places it into records.unused */
static IRAM_ATTR void list_remove(heap_trace_record_t* r_remove)
static IRAM_ATTR void list_remove(heap_trace_record_t *r_remove)
{
assert(records.count > 0);
#if CONFIG_HEAP_TRACE_HASH_MAP
map_remove(r_remove);
#endif
// remove from records.list
TAILQ_REMOVE(&records.list, r_remove, tailq);
TAILQ_REMOVE(&records.list, r_remove, tailq_list);
// set as unused
r_remove->address = 0;
r_remove->size = 0;
// add to records.unused
TAILQ_INSERT_HEAD(&records.unused, r_remove, tailq);
TAILQ_INSERT_HEAD(&records.unused, r_remove, tailq_list);
// decrement
records.count--;
@ -432,45 +505,45 @@ static IRAM_ATTR heap_trace_record_t* list_pop_unused(void)
}
// get from records.unused
heap_trace_record_t* rUnused = TAILQ_FIRST(&records.unused);
assert(rUnused->address == NULL);
assert(rUnused->size == 0);
heap_trace_record_t *r_unused = TAILQ_FIRST(&records.unused);
assert(r_unused->address == NULL);
assert(r_unused->size == 0);
// remove from records.unused
TAILQ_REMOVE(&records.unused, rUnused, tailq);
TAILQ_REMOVE(&records.unused, r_unused, tailq_list);
return rUnused;
return r_unused;
}
// deep copy a record.
// Note: only copies the *allocation data*, not the next & prev ptrs
static IRAM_ATTR void record_deep_copy(heap_trace_record_t *rDest, const heap_trace_record_t *r_src)
static IRAM_ATTR void record_deep_copy(heap_trace_record_t *r_dest, const heap_trace_record_t *r_src)
{
rDest->ccount = r_src->ccount;
rDest->address = r_src->address;
rDest->size = r_src->size;
memcpy(rDest->freed_by, r_src->freed_by, sizeof(void *) * STACK_DEPTH);
memcpy(rDest->alloced_by, r_src->alloced_by, sizeof(void *) * STACK_DEPTH);
r_dest->ccount = r_src->ccount;
r_dest->address = r_src->address;
r_dest->size = r_src->size;
memcpy(r_dest->freed_by, r_src->freed_by, sizeof(void *) * STACK_DEPTH);
memcpy(r_dest->alloced_by, r_src->alloced_by, sizeof(void *) * STACK_DEPTH);
}
// Append a record to records.list
// Note: This deep copies r_append
static IRAM_ATTR bool list_add(const heap_trace_record_t *r_append)
static IRAM_ATTR heap_trace_record_t* list_add(const heap_trace_record_t *r_append)
{
if (records.count < records.capacity) {
// get unused record
heap_trace_record_t* rDest = list_pop_unused();
heap_trace_record_t *r_dest = list_pop_unused();
// we checked that there is capacity, so this
// should never be null.
assert(rDest != NULL);
assert(r_dest != NULL);
// copy allocation data
record_deep_copy(rDest, r_append);
record_deep_copy(r_dest, r_append);
// append to records.list
TAILQ_INSERT_TAIL(&records.list, rDest, tailq);
TAILQ_INSERT_TAIL(&records.list, r_dest, tailq_list);
// increment
records.count++;
@ -480,34 +553,43 @@ static IRAM_ATTR bool list_add(const heap_trace_record_t *r_append)
records.high_water_mark = records.count;
}
return true;
#if CONFIG_HEAP_TRACE_HASH_MAP
map_add(r_dest);
#endif
return r_dest;
} else {
records.has_overflowed = true;
return false;
return NULL;
}
}
// search records.list backwards for the allocation record matching this address
static IRAM_ATTR heap_trace_record_t* list_find_address_reverse(void* p)
static IRAM_ATTR heap_trace_record_t* list_find_address_reverse(void *p)
{
if (records.count == 0) {
return NULL;
}
heap_trace_record_t *r_found = NULL;
heap_trace_record_t* rFound = NULL;
#if CONFIG_HEAP_TRACE_HASH_MAP
// check the hashmap
r_found = map_find(p);
if (r_found != NULL) {
return r_found;
}
#endif
// Perf: We search backwards because new allocations are appended
// to the end of the list and most allocations are short lived.
heap_trace_record_t *rCur = NULL;
TAILQ_FOREACH_REVERSE(rCur, &records.list, heap_trace_record_list_struct_t, tailq) {
if (rCur->address == p) {
rFound = rCur;
heap_trace_record_t *r_cur = NULL;
TAILQ_FOREACH(r_cur, &records.list, tailq_list) {
if (r_cur->address == p) {
r_found = r_cur;
break;
}
}
return rFound;
return r_found;
}
#include "heap_trace.inc"

Wyświetl plik

@ -36,8 +36,11 @@ typedef struct heap_trace_record_t {
size_t size; ///< Size of the allocation
void *alloced_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which allocated the memory.
void *freed_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which freed the memory (all zero if not freed.)
#ifdef CONFIG_HEAP_TRACING_STANDALONE
TAILQ_ENTRY(heap_trace_record_t) tailq; ///< Linked list: prev & next records
#if CONFIG_HEAP_TRACING_STANDALONE
TAILQ_ENTRY(heap_trace_record_t) tailq_list; ///< Linked list: prev & next records
#if CONFIG_HEAP_TRACE_HASH_MAP
TAILQ_ENTRY(heap_trace_record_t) tailq_hashmap; ///< Linked list: prev & next in hashmap entry list
#endif // CONFIG_HEAP_TRACE_HASH_MAP
#endif // CONFIG_HEAP_TRACING_STANDALONE
} heap_trace_record_t;
@ -52,6 +55,10 @@ typedef struct {
size_t capacity; ///< The capacity of the internal buffer
size_t high_water_mark; ///< The maximum value that 'count' got to
size_t has_overflowed; ///< True if the internal buffer overflowed at some point
#if CONFIG_HEAP_TRACE_HASH_MAP
size_t total_hashmap_hits; ///< If hashmap is used, the total number of hits
size_t total_hashmap_miss; ///< If hashmap is used, the total number of misses (possibly due to overflow)
#endif
} heap_trace_summary_t;
/**