From a70a4e6688c6b29797b1e314ce2841e8e4fecb7b Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 4 Jun 2021 01:17:09 +1000 Subject: [PATCH] py/emitglue: Always flush caches when assigning native ARM code. Prior to this commit, cache flushing for ARM native code was done only in the assembler code asm_thumb_end_pass()/asm_arm_end_pass(), at the last pass of the assembler. But this misses flushing the cache when loading native code from an .mpy file, ie in persistentcode.c. The change here makes sure the cache is always flushed/cleaned/invalidated when assigning native code on ARM architectures. This problem was found running tests/micropython/import_mpy_native_gc.py on the mimxrt port. Signed-off-by: Damien George --- py/asmarm.c | 19 ------------------- py/asmarm.h | 4 +++- py/asmthumb.c | 15 --------------- py/asmthumb.h | 4 +++- py/emitglue.c | 25 +++++++++++++++++++++++++ 5 files changed, 31 insertions(+), 36 deletions(-) diff --git a/py/asmarm.c b/py/asmarm.c index 5662d75e19..4ba93d0806 100644 --- a/py/asmarm.c +++ b/py/asmarm.c @@ -38,25 +38,6 @@ #define SIGNED_FIT24(x) (((x) & 0xff800000) == 0) || (((x) & 0xff000000) == 0xff000000) -void asm_arm_end_pass(asm_arm_t *as) { - if (as->base.pass == MP_ASM_PASS_EMIT) { - #if (defined(__linux__) && defined(__GNUC__)) || __ARM_ARCH == 7 - char *start = mp_asm_base_get_code(&as->base); - char *end = start + mp_asm_base_get_code_size(&as->base); - __builtin___clear_cache(start, end); - #elif defined(__arm__) - // flush I- and D-cache - asm volatile ( - "0:" - "mrc p15, 0, r15, c7, c10, 3\n" // test and clean D-cache - "bne 0b\n" - "mov r0, #0\n" - "mcr p15, 0, r0, c7, c7, 0\n" // invalidate I-cache and D-cache - : : : "r0", "cc"); - #endif - } -} - // Insert word into instruction flow STATIC void emit(asm_arm_t *as, uint op) { uint8_t *c = mp_asm_base_get_cur_to_write_bytes(&as->base, 4); diff --git a/py/asmarm.h b/py/asmarm.h index 46da661faa..0e029f20e9 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -72,7 +72,9 @@ typedef struct _asm_arm_t { uint stack_adjust; } asm_arm_t; -void asm_arm_end_pass(asm_arm_t *as); +static inline void asm_arm_end_pass(asm_arm_t *as) { + (void)as; +} void asm_arm_entry(asm_arm_t *as, int num_locals); void asm_arm_exit(asm_arm_t *as); diff --git a/py/asmthumb.c b/py/asmthumb.c index f7ac87fa0f..db4520ce12 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -35,7 +35,6 @@ #include "py/mpstate.h" #include "py/persistentcode.h" -#include "py/mphal.h" #include "py/asmthumb.h" #define UNSIGNED_FIT5(x) ((uint32_t)(x) < 32) @@ -62,20 +61,6 @@ static inline byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int n) { return mp_asm_base_get_cur_to_write_bytes(&as->base, n); } -void asm_thumb_end_pass(asm_thumb_t *as) { - (void)as; - // could check labels are resolved... - - #if __ICACHE_PRESENT == 1 - if (as->base.pass == MP_ASM_PASS_EMIT) { - // flush D-cache, so the code emitted is stored in memory - MP_HAL_CLEAN_DCACHE(as->base.code_base, as->base.code_size); - // invalidate I-cache - SCB_InvalidateICache(); - } - #endif -} - /* STATIC void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) { byte *c = asm_thumb_get_cur_to_write_bytes(as, 1); diff --git a/py/asmthumb.h b/py/asmthumb.h index 17a0cca98c..1a01d20c69 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -70,7 +70,9 @@ typedef struct _asm_thumb_t { uint32_t stack_adjust; } asm_thumb_t; -void asm_thumb_end_pass(asm_thumb_t *as); +static inline void asm_thumb_end_pass(asm_thumb_t *as) { + (void)as; +} void asm_thumb_entry(asm_thumb_t *as, int num_locals); void asm_thumb_exit(asm_thumb_t *as); diff --git a/py/emitglue.c b/py/emitglue.c index 0ef708a3f3..09b48682ff 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -108,6 +108,31 @@ void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void assert(kind == MP_CODE_NATIVE_PY || kind == MP_CODE_NATIVE_VIPER || kind == MP_CODE_NATIVE_ASM); + // Some architectures require flushing/invalidation of the I/D caches, + // so that the generated native code which was created in data RAM will + // be available for execution from instruction RAM. + #if MICROPY_EMIT_THUMB || MICROPY_EMIT_INLINE_THUMB + #if __ICACHE_PRESENT == 1 + // Flush D-cache, so the code emitted is stored in RAM. + MP_HAL_CLEAN_DCACHE(fun_data, fun_len); + // Invalidate I-cache, so the newly-created code is reloaded from RAM. + SCB_InvalidateICache(); + #endif + #elif MICROPY_EMIT_ARM + #if (defined(__linux__) && defined(__GNUC__)) || __ARM_ARCH == 7 + __builtin___clear_cache(fun_data, (uint8_t *)fun_data + fun_len); + #elif defined(__arm__) + // Flush I-cache and D-cache. + asm volatile ( + "0:" + "mrc p15, 0, r15, c7, c10, 3\n" // test and clean D-cache + "bne 0b\n" + "mov r0, #0\n" + "mcr p15, 0, r0, c7, c7, 0\n" // invalidate I-cache and D-cache + : : : "r0", "cc"); + #endif + #endif + rc->kind = kind; rc->scope_flags = scope_flags; rc->n_pos_args = n_pos_args;