kopia lustrzana https://github.com/micropython/micropython
py/qstr: Add support for MICROPY_QSTR_BYTES_IN_HASH=0.
This disables using qstr hashes altogether, which saves RAM and flash (two bytes per interned string on a typical build) as well as code size. On PYBV11 this is worth over 3k flash. qstr comparison will now be done just by length then data. This affects qstr_find_strn although this has a negligible performance impact as, for a given comparison, the length and first character will ~usually be different anyway. String hashing (e.g. builtin `hash()` and map.c) now need to compute the hash dynamically, and for the map case this does come at a performance cost. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>pull/12835/head
rodzic
307ecc5707
commit
7ea503929a
|
@ -295,7 +295,8 @@ def compute_hash(qstr, bytes_hash):
|
||||||
for b in qstr:
|
for b in qstr:
|
||||||
hash = (hash * 33) ^ b
|
hash = (hash * 33) ^ b
|
||||||
# Make sure that valid hash is never zero, zero means "hash not computed"
|
# Make sure that valid hash is never zero, zero means "hash not computed"
|
||||||
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
|
# if bytes_hash is zero, assume a 16-bit mask (to match qstr.c)
|
||||||
|
return (hash & ((1 << (8 * (bytes_hash or 2))) - 1)) or 1
|
||||||
|
|
||||||
|
|
||||||
def qstr_escape(qst):
|
def qstr_escape(qst):
|
||||||
|
|
52
py/qstr.c
52
py/qstr.c
|
@ -42,7 +42,11 @@
|
||||||
// A qstr is an index into the qstr pool.
|
// A qstr is an index into the qstr pool.
|
||||||
// The data for a qstr is \0 terminated (so they can be printed using printf)
|
// The data for a qstr is \0 terminated (so they can be printed using printf)
|
||||||
|
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1)
|
#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1)
|
||||||
|
#else
|
||||||
|
#define Q_HASH_MASK (0xffff)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
|
#if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
|
||||||
#define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1)
|
#define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1)
|
||||||
|
@ -77,6 +81,7 @@ size_t qstr_compute_hash(const byte *data, size_t len) {
|
||||||
// future .mpy version we could re-order them and make it sorted). It also
|
// future .mpy version we could re-order them and make it sorted). It also
|
||||||
// contains additional qstrs that must have IDs <256, see operator_qstr_list
|
// contains additional qstrs that must have IDs <256, see operator_qstr_list
|
||||||
// in makeqstrdata.py.
|
// in makeqstrdata.py.
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
const qstr_hash_t mp_qstr_const_hashes_static[] = {
|
const qstr_hash_t mp_qstr_const_hashes_static[] = {
|
||||||
#ifndef NO_QSTR
|
#ifndef NO_QSTR
|
||||||
#define QDEF0(id, hash, len, str) hash,
|
#define QDEF0(id, hash, len, str) hash,
|
||||||
|
@ -86,6 +91,7 @@ const qstr_hash_t mp_qstr_const_hashes_static[] = {
|
||||||
#undef QDEF1
|
#undef QDEF1
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
const qstr_len_t mp_qstr_const_lengths_static[] = {
|
const qstr_len_t mp_qstr_const_lengths_static[] = {
|
||||||
#ifndef NO_QSTR
|
#ifndef NO_QSTR
|
||||||
|
@ -103,7 +109,9 @@ const qstr_pool_t mp_qstr_const_pool_static = {
|
||||||
false, // is_sorted
|
false, // is_sorted
|
||||||
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
|
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
|
||||||
MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
|
MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
(qstr_hash_t *)mp_qstr_const_hashes_static,
|
(qstr_hash_t *)mp_qstr_const_hashes_static,
|
||||||
|
#endif
|
||||||
(qstr_len_t *)mp_qstr_const_lengths_static,
|
(qstr_len_t *)mp_qstr_const_lengths_static,
|
||||||
{
|
{
|
||||||
#ifndef NO_QSTR
|
#ifndef NO_QSTR
|
||||||
|
@ -118,6 +126,7 @@ const qstr_pool_t mp_qstr_const_pool_static = {
|
||||||
|
|
||||||
// The next pool is the remainder of the qstrs defined in the firmware. This
|
// The next pool is the remainder of the qstrs defined in the firmware. This
|
||||||
// is sorted.
|
// is sorted.
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
const qstr_hash_t mp_qstr_const_hashes[] = {
|
const qstr_hash_t mp_qstr_const_hashes[] = {
|
||||||
#ifndef NO_QSTR
|
#ifndef NO_QSTR
|
||||||
#define QDEF0(id, hash, len, str)
|
#define QDEF0(id, hash, len, str)
|
||||||
|
@ -127,6 +136,7 @@ const qstr_hash_t mp_qstr_const_hashes[] = {
|
||||||
#undef QDEF1
|
#undef QDEF1
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
const qstr_len_t mp_qstr_const_lengths[] = {
|
const qstr_len_t mp_qstr_const_lengths[] = {
|
||||||
#ifndef NO_QSTR
|
#ifndef NO_QSTR
|
||||||
|
@ -144,7 +154,9 @@ const qstr_pool_t mp_qstr_const_pool = {
|
||||||
true, // is_sorted
|
true, // is_sorted
|
||||||
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
|
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
|
||||||
MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
|
MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
(qstr_hash_t *)mp_qstr_const_hashes,
|
(qstr_hash_t *)mp_qstr_const_hashes,
|
||||||
|
#endif
|
||||||
(qstr_len_t *)mp_qstr_const_lengths,
|
(qstr_len_t *)mp_qstr_const_lengths,
|
||||||
{
|
{
|
||||||
#ifndef NO_QSTR
|
#ifndef NO_QSTR
|
||||||
|
@ -188,8 +200,13 @@ STATIC const qstr_pool_t *find_qstr(qstr *q) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// qstr_mutex must be taken while in this function
|
// qstr_mutex must be taken while in this function
|
||||||
STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
|
STATIC qstr qstr_add(mp_uint_t len, const char *q_ptr) {
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
|
mp_uint_t hash = qstr_compute_hash((const byte *)q_ptr, len);
|
||||||
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr);
|
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr);
|
||||||
|
#else
|
||||||
|
DEBUG_printf("QSTR: add len=%d data=%.*s\n", len, len, q_ptr);
|
||||||
|
#endif
|
||||||
|
|
||||||
// make sure we have room in the pool for a new qstr
|
// make sure we have room in the pool for a new qstr
|
||||||
if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
|
if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
|
||||||
|
@ -199,7 +216,11 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
|
||||||
new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc);
|
new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc);
|
||||||
#endif
|
#endif
|
||||||
mp_uint_t pool_size = sizeof(qstr_pool_t)
|
mp_uint_t pool_size = sizeof(qstr_pool_t)
|
||||||
+ (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * new_alloc;
|
+ (sizeof(const char *)
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
|
+ sizeof(qstr_hash_t)
|
||||||
|
#endif
|
||||||
|
+ sizeof(qstr_len_t)) * new_alloc;
|
||||||
qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size);
|
qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size);
|
||||||
if (pool == NULL) {
|
if (pool == NULL) {
|
||||||
// Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned
|
// Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned
|
||||||
|
@ -211,8 +232,12 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
|
||||||
QSTR_EXIT();
|
QSTR_EXIT();
|
||||||
m_malloc_fail(new_alloc);
|
m_malloc_fail(new_alloc);
|
||||||
}
|
}
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc);
|
pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc);
|
||||||
pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc);
|
pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc);
|
||||||
|
#else
|
||||||
|
pool->lengths = (qstr_len_t *)(pool->qstrs + new_alloc);
|
||||||
|
#endif
|
||||||
pool->prev = MP_STATE_VM(last_pool);
|
pool->prev = MP_STATE_VM(last_pool);
|
||||||
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
|
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
|
||||||
pool->alloc = new_alloc;
|
pool->alloc = new_alloc;
|
||||||
|
@ -223,7 +248,9 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
|
||||||
|
|
||||||
// add the new qstr
|
// add the new qstr
|
||||||
mp_uint_t at = MP_STATE_VM(last_pool)->len;
|
mp_uint_t at = MP_STATE_VM(last_pool)->len;
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
MP_STATE_VM(last_pool)->hashes[at] = hash;
|
MP_STATE_VM(last_pool)->hashes[at] = hash;
|
||||||
|
#endif
|
||||||
MP_STATE_VM(last_pool)->lengths[at] = len;
|
MP_STATE_VM(last_pool)->lengths[at] = len;
|
||||||
MP_STATE_VM(last_pool)->qstrs[at] = q_ptr;
|
MP_STATE_VM(last_pool)->qstrs[at] = q_ptr;
|
||||||
MP_STATE_VM(last_pool)->len++;
|
MP_STATE_VM(last_pool)->len++;
|
||||||
|
@ -238,8 +265,10 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
|
||||||
return MP_QSTR_;
|
return MP_QSTR_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
// work out hash of str
|
// work out hash of str
|
||||||
size_t str_hash = qstr_compute_hash((const byte *)str, str_len);
|
size_t str_hash = qstr_compute_hash((const byte *)str, str_len);
|
||||||
|
#endif
|
||||||
|
|
||||||
// search pools for the data
|
// search pools for the data
|
||||||
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
|
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
|
||||||
|
@ -261,7 +290,11 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
|
||||||
|
|
||||||
// sequential search for the remaining strings
|
// sequential search for the remaining strings
|
||||||
for (mp_uint_t at = low; at < high + 1; at++) {
|
for (mp_uint_t at = low; at < high + 1; at++) {
|
||||||
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
|
if (
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
|
pool->hashes[at] == str_hash &&
|
||||||
|
#endif
|
||||||
|
pool->lengths[at] == str_len
|
||||||
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
|
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
|
||||||
return pool->total_prev_len + at;
|
return pool->total_prev_len + at;
|
||||||
}
|
}
|
||||||
|
@ -329,10 +362,9 @@ qstr qstr_from_strn(const char *str, size_t len) {
|
||||||
MP_STATE_VM(qstr_last_used) += n_bytes;
|
MP_STATE_VM(qstr_last_used) += n_bytes;
|
||||||
|
|
||||||
// store the interned strings' data
|
// store the interned strings' data
|
||||||
size_t hash = qstr_compute_hash((const byte *)str, len);
|
|
||||||
memcpy(q_ptr, str, len);
|
memcpy(q_ptr, str, len);
|
||||||
q_ptr[len] = '\0';
|
q_ptr[len] = '\0';
|
||||||
q = qstr_add(hash, len, q_ptr);
|
q = qstr_add(len, q_ptr);
|
||||||
}
|
}
|
||||||
QSTR_EXIT();
|
QSTR_EXIT();
|
||||||
return q;
|
return q;
|
||||||
|
@ -340,7 +372,11 @@ qstr qstr_from_strn(const char *str, size_t len) {
|
||||||
|
|
||||||
mp_uint_t qstr_hash(qstr q) {
|
mp_uint_t qstr_hash(qstr q) {
|
||||||
const qstr_pool_t *pool = find_qstr(&q);
|
const qstr_pool_t *pool = find_qstr(&q);
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
return pool->hashes[q];
|
return pool->hashes[q];
|
||||||
|
#else
|
||||||
|
return qstr_compute_hash((byte *)pool->qstrs[q], pool->lengths[q]);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t qstr_len(qstr q) {
|
size_t qstr_len(qstr q) {
|
||||||
|
@ -375,7 +411,11 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si
|
||||||
*n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap
|
*n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap
|
||||||
#else
|
#else
|
||||||
*n_total_bytes += sizeof(qstr_pool_t)
|
*n_total_bytes += sizeof(qstr_pool_t)
|
||||||
+ (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * pool->alloc;
|
+ (sizeof(const char *)
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
|
+ sizeof(qstr_hash_t)
|
||||||
|
#endif
|
||||||
|
+ sizeof(qstr_len_t)) * pool->alloc;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
*n_total_bytes += *n_str_data_bytes;
|
*n_total_bytes += *n_str_data_bytes;
|
||||||
|
|
|
@ -60,7 +60,9 @@ enum {
|
||||||
typedef size_t qstr;
|
typedef size_t qstr;
|
||||||
typedef uint16_t qstr_short_t;
|
typedef uint16_t qstr_short_t;
|
||||||
|
|
||||||
#if MICROPY_QSTR_BYTES_IN_HASH == 1
|
#if MICROPY_QSTR_BYTES_IN_HASH == 0
|
||||||
|
// No qstr_hash_t type needed.
|
||||||
|
#elif MICROPY_QSTR_BYTES_IN_HASH == 1
|
||||||
typedef uint8_t qstr_hash_t;
|
typedef uint8_t qstr_hash_t;
|
||||||
#elif MICROPY_QSTR_BYTES_IN_HASH == 2
|
#elif MICROPY_QSTR_BYTES_IN_HASH == 2
|
||||||
typedef uint16_t qstr_hash_t;
|
typedef uint16_t qstr_hash_t;
|
||||||
|
@ -82,7 +84,9 @@ typedef struct _qstr_pool_t {
|
||||||
size_t is_sorted : 1;
|
size_t is_sorted : 1;
|
||||||
size_t alloc;
|
size_t alloc;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
#if MICROPY_QSTR_BYTES_IN_HASH
|
||||||
qstr_hash_t *hashes;
|
qstr_hash_t *hashes;
|
||||||
|
#endif
|
||||||
qstr_len_t *lengths;
|
qstr_len_t *lengths;
|
||||||
const char *qstrs[];
|
const char *qstrs[];
|
||||||
} qstr_pool_t;
|
} qstr_pool_t;
|
||||||
|
@ -92,6 +96,7 @@ typedef struct _qstr_pool_t {
|
||||||
void qstr_init(void);
|
void qstr_init(void);
|
||||||
|
|
||||||
size_t qstr_compute_hash(const byte *data, size_t len);
|
size_t qstr_compute_hash(const byte *data, size_t len);
|
||||||
|
|
||||||
qstr qstr_find_strn(const char *str, size_t str_len); // returns MP_QSTRnull if not found
|
qstr qstr_find_strn(const char *str, size_t str_len); // returns MP_QSTRnull if not found
|
||||||
|
|
||||||
qstr qstr_from_str(const char *str);
|
qstr qstr_from_str(const char *str);
|
||||||
|
|
|
@ -1473,21 +1473,20 @@ def freeze_mpy(firmware_qstr_idents, compiled_modules):
|
||||||
raw_code_count = 0
|
raw_code_count = 0
|
||||||
raw_code_content = 0
|
raw_code_content = 0
|
||||||
|
|
||||||
|
if config.MICROPY_QSTR_BYTES_IN_HASH:
|
||||||
print()
|
print()
|
||||||
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
|
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
|
||||||
qstr_size = {"metadata": 0, "data": 0}
|
|
||||||
for _, _, _, qbytes in new:
|
for _, _, _, qbytes in new:
|
||||||
qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
|
qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
|
||||||
print(" %d," % qhash)
|
print(" %d," % qhash)
|
||||||
|
qstr_content += config.MICROPY_QSTR_BYTES_IN_HASH
|
||||||
print("};")
|
print("};")
|
||||||
print()
|
print()
|
||||||
print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
|
print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
|
||||||
for _, _, _, qbytes in new:
|
for _, _, _, qbytes in new:
|
||||||
print(" %d," % len(qbytes))
|
print(" %d," % len(qbytes))
|
||||||
qstr_size["metadata"] += (
|
qstr_content += config.MICROPY_QSTR_BYTES_IN_LEN
|
||||||
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH
|
qstr_content += len(qbytes) + 1 # include NUL
|
||||||
)
|
|
||||||
qstr_size["data"] += len(qbytes)
|
|
||||||
print("};")
|
print("};")
|
||||||
print()
|
print()
|
||||||
print("extern const qstr_pool_t mp_qstr_const_pool;")
|
print("extern const qstr_pool_t mp_qstr_const_pool;")
|
||||||
|
@ -1497,14 +1496,12 @@ def freeze_mpy(firmware_qstr_idents, compiled_modules):
|
||||||
print(" true, // is_sorted")
|
print(" true, // is_sorted")
|
||||||
print(" %u, // allocated entries" % qstr_pool_alloc)
|
print(" %u, // allocated entries" % qstr_pool_alloc)
|
||||||
print(" %u, // used entries" % len(new))
|
print(" %u, // used entries" % len(new))
|
||||||
|
if config.MICROPY_QSTR_BYTES_IN_HASH:
|
||||||
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
|
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
|
||||||
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
|
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
|
||||||
print(" {")
|
print(" {")
|
||||||
for _, _, qstr, qbytes in new:
|
for _, _, qstr, qbytes in new:
|
||||||
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
|
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
|
||||||
qstr_content += (
|
|
||||||
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1
|
|
||||||
)
|
|
||||||
print(" },")
|
print(" },")
|
||||||
print("};")
|
print("};")
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue