Merge branch 'feature/use_cpu_time_for_tests' into 'master'

Cache compensated timer

See merge request espressif/esp-idf!6087
pull/4494/head
Angus Gratton 2019-11-20 08:33:27 +08:00
commit bc9267aa24
33 zmienionych plików z 1151 dodań i 35 usunięć

Wyświetl plik

@ -42,7 +42,7 @@ else()
# driver is a public requirement because esp_sleep.h uses gpio_num_t & touch_pad_t
# app_update is added here because cpu_start.c uses esp_ota_get_app_description() function.
set(priv_requires app_trace app_update bootloader_support log mbedtls nvs_flash pthread
spi_flash vfs wpa_supplicant espcoredump esp_common esp_wifi)
spi_flash vfs wpa_supplicant espcoredump esp_common esp_wifi perfmon)
set(fragments linker.lf ld/esp32_fragments.lf)
idf_component_register(SRCS "${srcs}"

Wyświetl plik

@ -188,6 +188,7 @@ SECTIONS
*(.gnu.linkonce.s2.*)
*(.jcr)
mapping[dram0_data]
_data_end = ABSOLUTE(.);

Wyświetl plik

@ -45,7 +45,7 @@ typedef struct xtensa_perfmon_masks {
} xtensa_perfmon_masks_t;
// Maximum amount of performance counter events
#define MAX_PERFMON_EVENTS 118
#define MAX_PERFMON_EVENTS 119
/**
* @brief Select value description table
@ -71,4 +71,4 @@ extern const uint32_t xtensa_perfmon_select_mask_all[MAX_PERFMON_EVENTS * 2];
#ifdef __cplusplus
}
#endif
#endif // _xtensa_perfmon_masks_H_
#endif // _xtensa_perfmon_masks_H_

Wyświetl plik

@ -192,6 +192,7 @@ const uint32_t xtensa_perfmon_select_mask_all[MAX_PERFMON_EVENTS * 2] = {
XTPERF_CNT_INSN, XTPERF_MASK_INSN_LOOP_BEG,
XTPERF_CNT_INSN, XTPERF_MASK_INSN_LOOP_END,
XTPERF_CNT_INSN, XTPERF_MASK_INSN_NON_BRANCH,
XTPERF_CNT_INSN, XTPERF_MASK_INSN_ALL,
XTPERF_CNT_D_STALL, XTPERF_MASK_D_STALL_STORE_BUF_FULL,
XTPERF_CNT_D_STALL, XTPERF_MASK_D_STALL_STORE_BUF_CONFLICT,
XTPERF_CNT_D_STALL, XTPERF_MASK_D_STALL_CACHE_MISS,

Wyświetl plik

@ -1,7 +1,3 @@
set(srcdirs ".")
idf_component_register(SRC_DIRS ${srcdirs}
idf_component_register(SRC_DIRS "."
INCLUDE_DIRS .
REQUIRES unity test_utils vfs fatfs spiffs
LDFRAGMENTS linker.lf
)
REQUIRES unity test_utils vfs fatfs spiffs)

Wyświetl plik

@ -1,3 +1 @@
COMPONENT_ADD_LDFLAGS = -Wl,--whole-archive -l$(COMPONENT_NAME) -Wl,--no-whole-archive
COMPONENT_ADD_LDFRAGMENTS += linker.lf

Wyświetl plik

@ -1,9 +0,0 @@
[mapping:vfs]
archive: libvfs.a
entries:
vfs:esp_vfs_open (noflash)
vfs:esp_vfs_write (noflash)
vfs:esp_vfs_close (noflash)
vfs:get_vfs_for_fd (noflash)
vfs:get_vfs_for_path (noflash)
vfs:translate_path (noflash)

Wyświetl plik

@ -24,6 +24,7 @@
#include "unity.h"
#include "esp_log.h"
#include "test_utils.h"
#include "ccomp_timer.h"
#define VFS_PREF1 "/vfs1"
#define VFS_PREF2 "/vfs2"
@ -243,7 +244,7 @@ TEST_CASE("Open & write & close through VFS passes performance test", "[vfs]")
TEST_ESP_OK( esp_vfs_register(VFS_PREF1, &desc, NULL) );
const int64_t begin = esp_timer_get_time();
ccomp_timer_start();
const int iter_count = 5000;
for (int i = 0; i < iter_count; ++i) {
@ -255,9 +256,7 @@ TEST_CASE("Open & write & close through VFS passes performance test", "[vfs]")
TEST_ASSERT_NOT_EQUAL(close(fd), -1);
}
// esp_vfs_open, esp_vfs_write and esp_vfs_close need to be in IRAM for performance test to pass
const int64_t time_diff_us = esp_timer_get_time() - begin;
const int64_t time_diff_us = ccomp_timer_stop();
const int ns_per_iter = (int) (time_diff_us * 1000 / iter_count);
TEST_ESP_OK( esp_vfs_unregister(VFS_PREF1) );
#ifdef CONFIG_SPIRAM

Wyświetl plik

@ -205,3 +205,37 @@ Similar to multiple devices test cases, multiple stages test cases will also pri
First time you execute this case, input ``1`` to run first stage (trigger deepsleep).
After DUT is rebooted and able to run test cases, select this case again and input ``2`` to run the second stage.
The case only passes if the last stage passes and all previous stages trigger reset.
Timing Code with Cache Compensated Timer
-----------------------------------------
Instructions and data stored in external memory (e.g. SPI Flash and SPI RAM) are accessed through the CPU's unified instruction and data cache. When code or data is in cache, access is very fast (i.e., a cache hit).
However, if the instruction or data is not in cache, it needs to be fetched from external memory (i.e., a cache miss). Access to external memory is significantly slower, as the CPU must execute stall cycles whilst waiting for the instruction or data to be retrieved from external memory. This can cause the overall code execution speed to vary depending on the number of cache hits or misses.
Code and data placements can vary between builds, and some arrangements may be more favorable with regards to cache access (i.e., minimizing cache misses). This can technically affect execution speed, however these factors are usually irrelevant as their effect 'average out' over the device's operation.
The effect of the cache on execution speed, however, can be relevant in benchmarking scenarios (espcially microbenchmarks). There might be some variability in measured time
between runs and between different builds. A technique for eliminating for some of the
variability is to place code and data in instruction or data RAM (IRAM/DRAM), respectively. The CPU can access IRAM and DRAM directly, eliminating the cache out of the equation.
However, this might not always be viable as the size of IRAM and DRAM is limited.
The cache compensated timer is an alternative to placing the code/data to be benchmarked in IRAM/DRAM. This timer uses the processor's internal event counters in order to determine the amount
of time spent on waiting for code/data in case of a cache miss, then subtract that from the recorded wall time.
.. code-block:: c
// Start the timer
ccomp_timer_start();
// Function to time
func_code_to_time();
// Stop the timer, and return the elapsed time in microseconds relative to
// ccomp_timer_start
int64_t t = ccomp_timer_stop();
One limitation of the cache compensated timer is that the task that benchmarked functions should be pinned to a core. This is due to each core having its own event counters that are independent of each other. For example, if ``ccomp_timer_start`` gets called on one core, put to sleep by the scheduler, wakes up, and gets rescheduled on the other core, then the corresponding ``ccomp_timer_stop`` will be invalid.
invalid.

Wyświetl plik

@ -139,6 +139,8 @@ To set version in your project manually you need to set ``PROJECT_VER`` variable
If ``PROJECT_VER`` variable is not set in the project then it will be retrieved from either ``$(PROJECT_PATH)/version.txt`` file (if present) else using git command ``git describe``. If neither is available then ``PROJECT_VER`` will be set to "1". Application can make use of this by calling :cpp:func:`esp_ota_get_app_description` or :cpp:func:`esp_ota_get_partition_description` functions.
API Reference
-------------

Wyświetl plik

@ -476,7 +476,7 @@ UT_034:
UT_035:
extends: .unit_test_template
parallel: 32
parallel: 34
tags:
- ESP32S2BETA_IDF
- UT_T1_1

Wyświetl plik

@ -1,7 +1,11 @@
idf_component_register(SRCS "ref_clock.c"
idf_component_register(SRCS "ccomp_timer.c"
"ref_clock.c"
"test_runner.c"
"test_utils.c"
"ccomp_timer_impl.c"
INCLUDE_DIRS include
REQUIRES spi_flash idf_test unity)
PRIV_INCLUDE_DIRS private_include
REQUIRES spi_flash idf_test unity
PRIV_REQUIRES perfmon)

Wyświetl plik

@ -0,0 +1,106 @@
// Copyright 2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ccomp_timer.h"
#include "ccomp_timer_impl.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
#include "esp_log.h"
#include "esp_ipc.h"
#include "esp_intr_alloc.h"
#if CONFIG_IDF_TARGET_ESP32
#include "esp32/clk.h"
#elif CONFIG_IDF_TARGET_ESP32S2BETA
#include "esp32s2beta/clk.h"
#endif
static const char TAG[] = "ccomp_timer";
esp_err_t ccomp_timer_start(void)
{
esp_err_t err = ESP_OK;
ccomp_timer_impl_lock();
if (ccomp_timer_impl_is_init()) {
if (ccomp_timer_impl_is_active()) {
err = ESP_ERR_INVALID_STATE;
}
}
else {
err = ccomp_timer_impl_init();
}
ccomp_timer_impl_unlock();
if (err != ESP_OK) {
goto fail;
}
err = ccomp_timer_impl_reset();
if (err != ESP_OK) {
goto fail;
}
err = ccomp_timer_impl_start();
if (err == ESP_OK) {
return ESP_OK;
}
fail:
ESP_LOGE(TAG, "Unable to start performance timer");
return err;
}
int64_t IRAM_ATTR ccomp_timer_stop(void)
{
esp_err_t err = ESP_OK;
ccomp_timer_impl_lock();
if (!ccomp_timer_impl_is_active()) {
err = ESP_ERR_INVALID_STATE;
}
ccomp_timer_impl_unlock();
if (err != ESP_OK) {
goto fail;
}
err = ccomp_timer_impl_stop();
if (err != ESP_OK) {
goto fail;
}
int64_t t = ccomp_timer_get_time();
err = ccomp_timer_impl_deinit();
if (err == ESP_OK && t != -1) {
return t;
}
fail:
ESP_LOGE(TAG, "Unable to stop performance timer");
return -1;
}
int64_t IRAM_ATTR ccomp_timer_get_time(void)
{
return ccomp_timer_impl_get_time();
}

Wyświetl plik

@ -0,0 +1,232 @@
// Copyright 2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <string.h>
#include "ccomp_timer_impl.h"
#include "esp_intr_alloc.h"
#include "esp_log.h"
#include "esp_attr.h"
#include "eri.h"
#include "freertos/FreeRTOS.h"
#include "freertos/portmacro.h"
#include "esp_freertos_hooks.h"
#include "perfmon.h"
#include "xtensa/core-macros.h"
#include "xtensa/xt_perf_consts.h"
#include "xtensa-debug-module.h"
#include "esp_ipc.h"
#if CONFIG_IDF_TARGET_ESP32
#include "esp32/clk.h"
#elif CONFIG_IDF_TARGET_ESP32S2BETA
#include "esp32s2beta/clk.h"
#endif
#define D_STALL_COUNTER_ID 0
#define I_STALL_COUNTER_ID 1
typedef enum
{
PERF_TIMER_UNINIT = 0, // timer has not been initialized yet
PERF_TIMER_IDLE, // timer has been initialized but is not tracking elapsed time
PERF_TIMER_ACTIVE // timer is tracking elapsed time
} ccomp_timer_state_t;
typedef struct
{
int i_ovfl; // number of times instruction stall counter has overflowed
int d_ovfl; // number of times data stall counter has overflowed
uint32_t last_ccount; // last CCOUNT value, updated every os tick
ccomp_timer_state_t state; // state of the timer
intr_handle_t intr_handle; // handle to allocated handler for perfmon counter overflows, so that it can be freed during deinit
int64_t ccount; // accumulated processors cycles during the time when timer is active
} ccomp_timer_status_t;
// Each core has its independent timer
ccomp_timer_status_t s_status[] = {
(ccomp_timer_status_t){
.i_ovfl = 0,
.d_ovfl = 0,
.ccount = 0,
.last_ccount = 0,
.state = PERF_TIMER_UNINIT,
.intr_handle = NULL,
},
(ccomp_timer_status_t){
.i_ovfl = 0,
.d_ovfl = 0,
.ccount = 0,
.last_ccount = 0,
.state = PERF_TIMER_UNINIT,
.intr_handle = NULL
}
};
static portMUX_TYPE s_lock = portMUX_INITIALIZER_UNLOCKED;
static void IRAM_ATTR update_ccount(void)
{
if (s_status[xPortGetCoreID()].state == PERF_TIMER_ACTIVE) {
int64_t new_ccount = xthal_get_ccount();
if (new_ccount > s_status[xPortGetCoreID()].last_ccount) {
s_status[xPortGetCoreID()].ccount += new_ccount - s_status[xPortGetCoreID()].last_ccount;
} else {
// CCOUNT has wrapped around
s_status[xPortGetCoreID()].ccount += new_ccount + (UINT32_MAX - s_status[xPortGetCoreID()].last_ccount);
}
s_status[xPortGetCoreID()].last_ccount = new_ccount;
}
}
static void inline update_overflow(int id, int *cnt)
{
uint32_t pmstat = eri_read(ERI_PERFMON_PMSTAT0 + id * sizeof(int32_t));
if (pmstat & PMSTAT_OVFL) {
*cnt += 1;
// Clear overflow and PerfMonInt asserted bits. The only valid bits in PMSTAT is the ones we're trying to clear. So it should be
// ok to just modify the whole register.
eri_write(ERI_PERFMON_PMSTAT0 + id, ~0x0);
}
}
static void IRAM_ATTR perf_counter_overflow_handler(void *args)
{
update_overflow(D_STALL_COUNTER_ID, &s_status[xPortGetCoreID()].d_ovfl);
update_overflow(I_STALL_COUNTER_ID, &s_status[xPortGetCoreID()].i_ovfl);
}
static void set_perfmon_interrupt(bool enable)
{
uint32_t d_pmctrl = eri_read(ERI_PERFMON_PMCTRL0 + D_STALL_COUNTER_ID * sizeof(int32_t));
uint32_t i_pmctrl = eri_read(ERI_PERFMON_PMCTRL0 + I_STALL_COUNTER_ID * sizeof(int32_t));
if (enable) {
d_pmctrl |= PMCTRL_INTEN;
i_pmctrl |= PMCTRL_INTEN;
}
else {
d_pmctrl &= ~PMCTRL_INTEN;
i_pmctrl &= ~PMCTRL_INTEN;
}
eri_write(ERI_PERFMON_PMCTRL0 + D_STALL_COUNTER_ID * sizeof(int32_t), d_pmctrl);
eri_write(ERI_PERFMON_PMCTRL0 + I_STALL_COUNTER_ID * sizeof(int32_t), i_pmctrl);
}
static void intr_alloc(void* params)
{
int *id = (int*) params;
// Keep track of how many times each counter has overflowed.
esp_intr_alloc(ETS_INTERNAL_PROFILING_INTR_SOURCE, 0,
perf_counter_overflow_handler, NULL, &s_status[*id].intr_handle);
}
// Linker seems to be smart enough to drop this if ccomp_timer APIs are not used.
static __attribute__((constructor)) void ccomp_timer_impl_start_init(void)
{
#if !CONFIG_FREERTOS_UNICORE
for (int i = 0; i < portNUM_PROCESSORS; i++) {
esp_ipc_call_blocking(i, intr_alloc, &i);
}
#else
int i = 0;
intr_alloc(&i);
#endif
}
esp_err_t ccomp_timer_impl_init(void)
{
xtensa_perfmon_init(D_STALL_COUNTER_ID,
XTPERF_CNT_D_STALL,
XTPERF_MASK_D_STALL_BUSY, 0, -1);
xtensa_perfmon_init(I_STALL_COUNTER_ID,
XTPERF_CNT_I_STALL,
XTPERF_MASK_I_STALL_BUSY, 0, -1);
set_perfmon_interrupt(true);
s_status[xPortGetCoreID()].state = PERF_TIMER_IDLE;
return ESP_OK;
}
esp_err_t ccomp_timer_impl_deinit(void)
{
set_perfmon_interrupt(false);
s_status[xPortGetCoreID()].intr_handle = NULL;
s_status[xPortGetCoreID()].state = PERF_TIMER_UNINIT;
return ESP_OK;
}
esp_err_t ccomp_timer_impl_start(void)
{
s_status[xPortGetCoreID()].state = PERF_TIMER_ACTIVE;
s_status[xPortGetCoreID()].last_ccount = xthal_get_ccount();
// Update elapsed cycles every OS tick
esp_register_freertos_tick_hook_for_cpu(update_ccount, xPortGetCoreID());
xtensa_perfmon_start();
return ESP_OK;
}
esp_err_t IRAM_ATTR ccomp_timer_impl_stop(void)
{
xtensa_perfmon_stop();
esp_deregister_freertos_tick_hook_for_cpu(update_ccount, xPortGetCoreID());
update_ccount();
s_status[xPortGetCoreID()].state = PERF_TIMER_IDLE;
return ESP_OK;
}
int64_t IRAM_ATTR ccomp_timer_impl_get_time(void)
{
update_ccount();
int64_t d_stalls = xtensa_perfmon_value(D_STALL_COUNTER_ID) +
s_status[xPortGetCoreID()].d_ovfl * (1 << sizeof(int32_t));
int64_t i_stalls = xtensa_perfmon_value(I_STALL_COUNTER_ID) +
s_status[xPortGetCoreID()].i_ovfl * (1 << sizeof(int32_t));
int64_t stalls = d_stalls + i_stalls;
int64_t cycles = s_status[xPortGetCoreID()].ccount;
return ((cycles - stalls) * 1000000) / esp_clk_cpu_freq();
}
esp_err_t ccomp_timer_impl_reset(void)
{
xtensa_perfmon_reset(D_STALL_COUNTER_ID);
xtensa_perfmon_reset(I_STALL_COUNTER_ID);
s_status[xPortGetCoreID()].d_ovfl = 0;
s_status[xPortGetCoreID()].i_ovfl = 0;
s_status[xPortGetCoreID()].ccount = 0;
s_status[xPortGetCoreID()].last_ccount = 0;
return ESP_OK;
}
bool ccomp_timer_impl_is_init(void)
{
return s_status[xPortGetCoreID()].state != PERF_TIMER_UNINIT;
}
bool IRAM_ATTR ccomp_timer_impl_is_active(void)
{
return s_status[xPortGetCoreID()].state == PERF_TIMER_ACTIVE;
}
void IRAM_ATTR ccomp_timer_impl_lock(void)
{
portENTER_CRITICAL(&s_lock);
}
void IRAM_ATTR ccomp_timer_impl_unlock(void)
{
portEXIT_CRITICAL(&s_lock);
}

Wyświetl plik

@ -0,0 +1,3 @@
COMPONENT_PRIV_INCLUDEDIRS := private_include
COMPONENT_ADD_INCLUDEDIRS : include
COMPONENT_SRCDIRS := . esp32

Wyświetl plik

@ -0,0 +1,54 @@
// Copyright 2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include "esp_err.h"
/**
* @brief Start the timer on the current core.
*
* @return
* - ESP_OK: Success
* - ESP_ERR_INVALID_STATE: The timer has already been started previously.
* - Others: Fail
*/
esp_err_t ccomp_timer_start(void);
/**
* @brief Stop the timer on the current core.
*
* @note Returns -1 if an error has occured and stopping the timer failed.
*
* @return The time elapsed from the last ccomp_timer_start call on the current
* core.
*/
int64_t ccomp_timer_stop(void);
/**
* Return the current timer value on the current core without stopping the timer.
*
* @note Returns -1 if an error has occured and stopping the timer failed.
*
* @note If called while timer is active i.e. between ccomp_timer_start and ccomp_timer_stop,
* this function returns the elapsed time from ccomp_timer_start. Once ccomp_timer_stop
* has been called, the timer becomes inactive and stops keeping time. As a result, if this function gets
* called after esp_cccomp_timer_stop, this function will return the same value as when the timer was stopped.
*
* @return The elapsed time from the last ccomp_timer_start call on the current
* core.
*/
int64_t ccomp_timer_get_time(void);

Wyświetl plik

@ -0,0 +1,102 @@
// Copyright 2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdbool.h>
#include "esp_err.h"
/**
* @brief Initialize the underlying implementation for cache compensated timer. This might involve
* setting up architecture-specific event counters and or allocating interrupts that handle events for those counters.
* @return
* - ESP_OK: Success
* - ESP_ERR_INVALID_STATE: The timer has already been started previously.
* - Others: Fail
*/
esp_err_t ccomp_timer_impl_init(void);
/**
* @brief Deinitialize the underlying implementation for cache compensated timer. This should restore
* the state of the program to before ccomp_timer_impl_init.
* @return
* - ESP_OK: Success
* - ESP_ERR_INVALID_STATE: The timer has already been started previously.
* - Others: Fail
*/
esp_err_t ccomp_timer_impl_deinit(void);
/**
* @brief Make the underlying implementation start keeping time.
*
* @return
* - ESP_OK: Success
* - Others: Fail
*/
esp_err_t ccomp_timer_impl_start(void);
/**
* @brief Make the underlying implementation stop keeping time.
*
* @return
* - ESP_OK: Success
* - Others: Fail
*/
esp_err_t ccomp_timer_impl_stop(void);
/**
* @brief Reset the timer to its initial state.
*
* @return
* - ESP_OK: Success
* - Others: Fail
*/
esp_err_t ccomp_timer_impl_reset(void);
/**
* @brief Get the elapsed time kept track of by the underlying implementation in microseconds.
*
* @return The elapsed time in microseconds. Set to -1 if the operation is unsuccessful.
*/
int64_t ccomp_timer_impl_get_time(void);
/**
* @brief Obtain an internal critical section used in the implementation. Should be treated
* as a spinlock.
*/
void ccomp_timer_impl_lock(void);
/**
* @brief Start the performance timer on the current core.
*/
void ccomp_timer_impl_unlock(void);
/**
* @brief Check if timer has been initialized.
*
* @return
* - true: the timer has been initialized using ccomp_timer_impl_init
* - false: the timer has not been initialized, or ccomp_timer_impl_deinit has been called recently
*/
bool ccomp_timer_impl_is_init(void);
/**
* @brief Check if timer is keeping time.
*
* @return
* - true: the timer is keeping track of elapsed time from ccomp_timer_impl_start
* - false: the timer is not keeping track of elapsed time since ccomp_timer_impl_start has not yet been called or ccomp_timer_impl_stop has been called recently
*/
bool ccomp_timer_impl_is_active(void);

Wyświetl plik

@ -0,0 +1,3 @@
idf_component_register(SRC_DIRS "."
PRIV_INCLUDE_DIRS "."
REQUIRES unity test_utils perfmon)

Wyświetl plik

@ -0,0 +1,166 @@
#include <stdlib.h>
#include <stdint.h>
#include "esp_timer.h"
#include "esp_log.h"
#include "esp_attr.h"
#include "ccomp_timer.h"
#include "eri.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_ipc.h"
#include "unity.h"
#ifndef CONFIG_FREERTOS_UNICORE
static void start_timer(void *param)
{
esp_err_t *err = (esp_err_t *)param;
*err = ccomp_timer_start();
}
static void stop_timer(void *param)
{
int64_t *t = (int64_t *)param;
*t = ccomp_timer_stop();
}
#endif
static void computation(void *param)
{
int *l = (int *)param;
for (volatile int i = 0, a = 0; i < *l; i++)
{
a += i;
}
}
TEST_CASE("starting and stopping works", "[test_utils][ccomp_timer]")
{
esp_err_t err;
int64_t t;
/*
* Test on the same task
*/
err = ccomp_timer_start();
TEST_ASSERT_EQUAL(ESP_OK, err);
// Start an already started timer
err = ccomp_timer_start();
TEST_ASSERT_EQUAL(ESP_ERR_INVALID_STATE, err);
t = ccomp_timer_stop();
TEST_ASSERT_GREATER_OR_EQUAL(0, t);
// Stopping a non started timer
t = ccomp_timer_stop();
TEST_ASSERT_EQUAL(-1, t);
#ifndef CONFIG_FREERTOS_UNICORE
/*
* Test on different task on same core
*/
err = ccomp_timer_start();
TEST_ASSERT_EQUAL(ESP_OK, err);
esp_ipc_call_blocking(xPortGetCoreID(), start_timer, &err);
TEST_ASSERT_EQUAL(ESP_ERR_INVALID_STATE, err);
t = ccomp_timer_stop();
TEST_ASSERT_GREATER_OR_EQUAL(0, t);
esp_ipc_call_blocking(xPortGetCoreID(), stop_timer, &t);
TEST_ASSERT_EQUAL(-1, t);
/*
* Timer being stopped from another task on the same core
*/
err = ccomp_timer_start();
TEST_ASSERT_EQUAL(ESP_OK, err);
esp_ipc_call_blocking(xPortGetCoreID(), stop_timer, &t);
TEST_ASSERT_GREATER_OR_EQUAL(0, t);
/*
* Test on different task on same core
*/
err = ccomp_timer_start();
TEST_ASSERT_EQUAL(ESP_OK, err);
esp_ipc_call_blocking(xPortGetCoreID() == 0 ? 1 : 0, start_timer, &err);
TEST_ASSERT_EQUAL(ESP_OK, err);
t = ccomp_timer_stop();
TEST_ASSERT_GREATER_OR_EQUAL(0, t);
esp_ipc_call_blocking(xPortGetCoreID() == 0 ? 1 : 0, stop_timer, &t);
TEST_ASSERT_GREATER_OR_EQUAL(0, t);
#endif
}
TEST_CASE("getting the time works", "[test_utils][ccomp_timer]")
{
// Get wall time and start ccomp timer
int64_t start = esp_timer_get_time();
ccomp_timer_start();
int64_t t_a = ccomp_timer_get_time();
int temp = 10000;
computation(&temp);
int64_t t_b = ccomp_timer_get_time();
// Check that ccomp time after computation is more than
// ccomp time before computation.
TEST_ASSERT_LESS_THAN(t_b, t_a);
// Get time diff between wall time and ccomp time
int64_t t_1 = ccomp_timer_stop();
int64_t t_2 = esp_timer_get_time() - start;
// The times should at least be in the same ballpark (at least within 10%)
float diff = (abs(t_1 - t_2)) / ((float)t_2);
TEST_ASSERT(diff <= 10.0f);
// Since the timer was already stopped, test that ccomp_timer_get_time
// returns the same time as ccomp_timer_stop
int64_t t_c = ccomp_timer_get_time();
TEST_ASSERT_EQUAL(t_1, t_c);
}
#ifndef CONFIG_FREERTOS_UNICORE
TEST_CASE("timers for each core counts independently", "[test_utils][ccomp_timer]")
{
esp_err_t err;
// Start a timer on this core
err = ccomp_timer_start();
TEST_ASSERT_EQUAL(ESP_OK, err);
// Do some work on this core
int temp = 10000;
computation(&temp);
// Start a timer on the other core
esp_ipc_call_blocking(xPortGetCoreID() == 0 ? 1 : 0, start_timer, &err);
TEST_ASSERT_EQUAL(ESP_OK, err);
// Do some work on other core (less work than this core did)
temp = 5000;
esp_ipc_call_blocking(xPortGetCoreID() == 0 ? 1 : 0, computation, &temp);
// Stop timers from both cores
int64_t t_1 = ccomp_timer_stop();
TEST_ASSERT_GREATER_OR_EQUAL(0, t_1);
int64_t t_2;
esp_ipc_call_blocking(xPortGetCoreID() == 0 ? 1 : 0, stop_timer, &t_2);
TEST_ASSERT_GREATER_OR_EQUAL(0, t_2);
// Since this core did more work, it probably has longer measured time
TEST_ASSERT_GREATER_THAN(t_2, t_1);
}
#endif

Wyświetl plik

@ -0,0 +1,175 @@
#include <stdlib.h>
#include <stdint.h>
#include "esp_timer.h"
#include "esp_log.h"
#include "esp_attr.h"
#include "ccomp_timer.h"
#include "eri.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_ipc.h"
#if CONFIG_IDF_TARGET_ESP32
#include "esp32/clk.h"
#elif CONFIG_IDF_TARGET_ESP32S2BETA
#include "esp32s2beta/clk.h"
#endif
#include "unity.h"
#include "sdkconfig.h"
static const char* TAG = "test_ccomp_timer";
#if CONFIG_IDF_TARGET_ESP32
#define CACHE_WAYS 2
#define CACHE_LINE_SIZE 32
#define CACHE_SIZE (1 << 15)
// Only test half due to lack of memory
#define TEST_SIZE (CACHE_SIZE / 2)
#elif CONFIG_IDF_TARGET_ESP32S2BETA
// Default cache configuration - no override specified on
// test_utils config
#define CACHE_WAYS 8
#define CACHE_LINE_SIZE 32
#define CACHE_SIZE (1 << 13)
#define TEST_SIZE (CACHE_SIZE)
#endif
typedef struct {
uint8_t **accesses;
size_t len;
} ccomp_test_access_t;
typedef struct {
int64_t wall;
int64_t ccomp;
} ccomp_test_time_t;
#if CONFIG_ESP32_SPIRAM_SUPPORT
static uint8_t *flash_mem;
#else
static const uint8_t flash_mem[2 * CACHE_SIZE] = {0};
#endif
static IRAM_ATTR void perform_accesses(ccomp_test_access_t *access)
{
volatile int a = 0;
for (int i = 0; i < access->len; i++) {
a += (int)(*(access->accesses[i]));
}
}
static void prepare_cache(const uint8_t *to_cache)
{
volatile int a = 0;
for (int i = 0; i < CACHE_SIZE; i++) {
a += to_cache[i];
}
}
static void prepare_access_pattern(int hit_rate, const uint8_t *cached, ccomp_test_access_t *out)
{
assert(hit_rate <= 100);
assert(hit_rate >= 0);
int misses = (100 - hit_rate) * CACHE_LINE_SIZE;
int hits = hit_rate * CACHE_LINE_SIZE;
uint8_t **accesses = calloc(TEST_SIZE, sizeof(uint8_t *));
for (int i = 0, h = 0, i_h = 1, m = -1, i_m = 0; i < TEST_SIZE; i++, h += i_h, m += i_m) {
if (i_m) {
accesses[i] = (uint8_t*) (cached + CACHE_SIZE + i);
}
else {
accesses[i] = (uint8_t*) (cached + i);
}
if (h >= hits) {
h = -1;
i_h = 0;
m = 0;
i_m = 1;
}
if (m >= misses) {
m = -1;
i_m = 0;
h = 0;
i_h = 1;
}
}
out->accesses = accesses;
out->len = TEST_SIZE;
}
static ccomp_test_time_t perform_test_at_hit_rate(int hit_rate, const uint8_t *mem)
{
ccomp_test_access_t access;
prepare_access_pattern(hit_rate, mem, &access);
prepare_cache(mem);
int64_t start = esp_timer_get_time();
ccomp_timer_start();
perform_accesses(&access);
ccomp_test_time_t t = {
.ccomp = ccomp_timer_stop(),
.wall = esp_timer_get_time() - start
};
free(access.accesses);
return t;
}
static ccomp_test_time_t ccomp_test_ref_time(void)
{
#if CONFIG_ESP32_SPIRAM_SUPPORT
uint8_t *mem = heap_caps_malloc(2 * CACHE_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_DEFAULT);
#else
uint8_t *mem = heap_caps_malloc(sizeof(flash_mem), MALLOC_CAP_INTERNAL | MALLOC_CAP_DEFAULT);
#endif
ccomp_test_time_t t = perform_test_at_hit_rate(0, mem);
free(mem);
return t;
}
TEST_CASE("data cache hit rate sweep", "[test_utils][ccomp_timer]")
{
ccomp_test_time_t t_ref;
ccomp_test_time_t t_hr;
#if CONFIG_ESP32_SPIRAM_SUPPORT
flash_mem = heap_caps_malloc(2 * CACHE_SIZE, MALLOC_CAP_8BIT | MALLOC_CAP_SPIRAM);
#endif
// Perform accesses on RAM. The time recorded here serves as
// reference.
t_ref = ccomp_test_ref_time();
ESP_LOGI(TAG, "Reference Time(us): %lld", (long long)t_ref.ccomp);
// Measure time at particular hit rates
for (int i = 0; i <= 100; i += 5)
{
t_hr = perform_test_at_hit_rate(i, flash_mem);
float error = (abs(t_ref.ccomp - t_hr.ccomp) / (float)t_ref.ccomp) * 100.0f;
ESP_LOGI(TAG, "Hit Rate(%%): %d Wall Time(us): %lld Compensated Time(us): %lld Error(%%): %f", i, (long long)t_hr.wall, (long long)t_hr.ccomp, error);
// Check if the measured time is at least within some percent of the
// reference.
TEST_ASSERT(error <= 5.0f);
}
#if CONFIG_ESP32_SPIRAM_SUPPORT
free(flash_mem);
#endif
}

Wyświetl plik

@ -0,0 +1,229 @@
#include <stdlib.h>
#include <stdint.h>
#include "esp_timer.h"
#include "esp_log.h"
#include "esp_attr.h"
#include "ccomp_timer.h"
#include "freertos/FreeRTOS.h"
#include "freertos/portmacro.h"
#include "unity.h"
#include "sdkconfig.h"
#if CONFIG_IDF_TARGET_ESP32
#define CACHE_WAYS 2
#define CACHE_LINE_SIZE 32
#define CACHE_SIZE (1 << 15)
// Only test half due to lack of memory
#elif CONFIG_IDF_TARGET_ESP32S2BETA
// Default cache configuration - no override specified on
// test_utils config
#define CACHE_WAYS 8
#define CACHE_LINE_SIZE 32
#define CACHE_SIZE (1 << 13)
#endif
typedef void (*ccomp_test_func_t)(void);
static const char* TAG = "test_ccomp_timer";
typedef struct {
int64_t wall;
int64_t ccomp;
} ccomp_test_time_t;
typedef struct {
ccomp_test_func_t *funcs;
size_t len;
} ccomp_test_call_t;
#define FUNC() \
do \
{ \
volatile int a = 0; \
a++; \
} while (0);
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func1(void)
{
FUNC();
}
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func2(void)
{
FUNC();
}
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func3(void)
{
FUNC();
}
#if CONFIG_IDF_TARGET_ESP32S2BETA
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func4(void)
{
FUNC();
}
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func5(void)
{
FUNC();
}
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func6(void)
{
FUNC();
}
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func7(void)
{
FUNC();
}
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func8(void)
{
FUNC();
}
__aligned(CACHE_SIZE / CACHE_WAYS) static void test_func9(void)
{
FUNC();
}
#endif
static void IRAM_ATTR iram_func(void)
{
FUNC();
}
static void IRAM_ATTR perform_calls(ccomp_test_call_t *call)
{
for (int i = 0; i < call->len; i++) {
call->funcs[i]();
}
}
static void IRAM_ATTR prepare_cache(ccomp_test_call_t *call)
{
perform_calls(call);
}
static void IRAM_ATTR prepare_calls(int hit_rate, ccomp_test_func_t *alts, size_t alts_len, size_t len, ccomp_test_call_t *out)
{
assert(hit_rate <= 100);
assert(hit_rate >= 0);
int misses = (100 - hit_rate);
int hits = hit_rate;
ccomp_test_func_t *funcs = calloc(len, sizeof(ccomp_test_func_t));
for (int i = 0, h = 0, i_h = 1, m = -1, i_m = 0, l = 0; i < len; i++, h += i_h, m += i_m) {
funcs[i] = alts[l % alts_len];
if (i_m) {
l++;
}
if (h >= hits) {
h = -1;
i_h = 0;
m = 0;
i_m = 1;
}
if (m >= misses) {
m = -1;
i_m = 0;
h = 0;
i_h = 1;
}
}
out->funcs = funcs;
out->len = len;
}
static ccomp_test_time_t IRAM_ATTR perform_test_at_hit_rate(int hit_rate)
{
static portMUX_TYPE m = portMUX_INITIALIZER_UNLOCKED;
ccomp_test_call_t calls;
ccomp_test_func_t alts[] = {test_func1, test_func2, test_func3,
#if CONFIG_IDF_TARGET_ESP32S2BETA
test_func4, test_func5, test_func6, test_func7, test_func8, test_func9,
#endif
};
prepare_calls(hit_rate, alts, sizeof(alts)/sizeof(alts[0]), 10000, &calls);
ccomp_test_func_t f[] = {test_func1, test_func2};
ccomp_test_call_t cache = {
.funcs = f,
.len = sizeof(f) / sizeof(f[0])};
portENTER_CRITICAL(&m);
prepare_cache(&cache);
int64_t start = esp_timer_get_time();
ccomp_timer_start();
perform_calls(&calls);
ccomp_test_time_t t = {
.ccomp = ccomp_timer_stop(),
.wall = esp_timer_get_time() - start
};
portEXIT_CRITICAL(&m);
free(calls.funcs);
return t;
}
static ccomp_test_time_t ccomp_test_ref_time(void)
{
ccomp_test_call_t calls;
ccomp_test_func_t alts[] = {iram_func};
prepare_calls(0, alts, 1, 10000, &calls);
int64_t start = esp_timer_get_time();
ccomp_timer_start();
perform_calls(&calls);
ccomp_test_time_t t = {
.ccomp = ccomp_timer_stop(),
.wall = esp_timer_get_time() - start
};
free(calls.funcs);
return t;
}
TEST_CASE("instruction cache hit rate sweep test", "[test_utils][ccomp_timer]")
{
ccomp_test_time_t t_ref;
ccomp_test_time_t t_hr;
// Perform accesses on RAM. The time recorded here serves as
// reference.
t_ref = ccomp_test_ref_time();
ESP_LOGI(TAG, "Reference Time(us): %lld", (long long)t_ref.ccomp);
// Measure time at particular hit rates
for (int i = 0; i <= 100; i += 5)
{
t_hr = perform_test_at_hit_rate(i);
float error = (abs(t_ref.ccomp - t_hr.ccomp) / (float)t_ref.wall) * 100.0f;
ESP_LOGI(TAG, "Hit Rate(%%): %d Wall Time(us): %lld Compensated Time(us): %lld Error(%%): %f", i, (long long)t_hr.wall, (long long)t_hr.ccomp, error);
// Check if the measured time is at least within some percent of the
// reference.
TEST_ASSERT(error <= 5.0f);
}
}

Wyświetl plik

@ -0,0 +1,7 @@
#
#Component Makefile
#
COMPONENT_SRCDIRS += .
COMPONENT_ADD_LDFLAGS = -Wl,--whole-archive -l$(COMPONENT_NAME) -Wl,--no-whole-archive

Wyświetl plik

@ -1,3 +1,3 @@
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update test_utils
TEST_COMPONENTS=mbedtls
CONFIG_MBEDTLS_HARDWARE_AES=n

Wyświetl plik

@ -1,4 +1,4 @@
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update test_utils
TEST_COMPONENTS=mbedtls
CONFIG_MBEDTLS_HARDWARE_AES=n
CONFIG_IDF_TARGET="esp32s2beta"

Wyświetl plik

@ -1 +1 @@
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32 driver heap pthread soc spi_flash vfs
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32 driver heap pthread soc spi_flash vfs test_utils

Wyświetl plik

@ -1,4 +1,4 @@
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update driver esp32 spi_flash
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update driver esp32 spi_flash test_utils
CONFIG_ESP32_SPIRAM_SUPPORT=y
CONFIG_ESP_INT_WDT_TIMEOUT_MS=800
CONFIG_SPIRAM_OCCUPY_NO_HOST=y

Wyświetl plik

@ -1,3 +1,3 @@
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32 driver heap pthread soc spi_flash vfs
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32 driver heap pthread soc spi_flash vfs test_utils
CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_SILENT=y

Wyświetl plik

@ -1,4 +1,4 @@
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32s2beta driver heap pthread soc spi_flash vfs
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32s2beta driver heap pthread soc spi_flash vfs test_utils
CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_SILENT=y
CONFIG_IDF_TARGET="esp32s2beta"

Wyświetl plik

@ -1,4 +1,4 @@
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32 driver heap pthread soc spi_flash vfs
TEST_EXCLUDE_COMPONENTS=libsodium bt app_update freertos esp32 driver heap pthread soc spi_flash vfs test_utils
CONFIG_MEMMAP_SMP=n
CONFIG_FREERTOS_UNICORE=y
CONFIG_ESP32_RTCDATA_IN_FAST_MEM=y

Wyświetl plik

@ -1,4 +1,4 @@
TEST_COMPONENTS=freertos esp32s2beta driver heap pthread soc spi_flash vfs
TEST_COMPONENTS=freertos esp32s2beta driver heap pthread soc spi_flash vfs test_utils
CONFIG_MEMMAP_SMP=n
CONFIG_FREERTOS_UNICORE=y
CONFIG_ESP32_RTCDATA_IN_FAST_MEM=y

Wyświetl plik

@ -0,0 +1,4 @@
# The test is isolated as it requires particular memory layout
TEST_COMPONENTS=test_utils
CONFIG_ESP_IPC_TASK_STACK_SIZE=2048

Wyświetl plik

@ -0,0 +1,4 @@
# The test is isolated as it requires particular memory layout
TEST_COMPONENTS=test_utils
CONFIG_ESP_IPC_TASK_STACK_SIZE=2048
CONFIG_ESP32_SPIRAM_SUPPORT=y

Wyświetl plik

@ -0,0 +1,5 @@
# The test is isolated as it requires particular memory layout
TEST_COMPONENTS=test_utils
CONFIG_ESP_IPC_TASK_STACK_SIZE=2048
CONFIG_IDF_TARGET="esp32s2beta"