py/qstr: Add support for sorted qstr pools.

This provides a significant performance boost for qstr_find_strn, which is
called a lot during parsing and loading of .mpy files, as well as interning
of string objects (which happens in most string methods that return new
strings).

Also adds comments to explain the "static" qstrs.  These are part of the
.mpy ABI and avoid needing to duplicate string data for QSTRs known to
already be in the firmware.  The static pool isn't currently sorted, but in
the future we could either split the static pool into the sorted regions,
or in the next .mpy version just sort them.

Based on initial work done by @amirgon in #6896.

This work was funded through GitHub Sponsors.

Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
This commit is contained in:
Jim Mussared
2023-02-15 16:09:04 +11:00
committed by Damien George
parent e910533012
commit 64c79a5423
5 changed files with 200 additions and 54 deletions

View File

@@ -1396,15 +1396,16 @@ def disassemble_mpy(compiled_modules):
cm.disassemble()
def freeze_mpy(base_qstrs, compiled_modules):
def freeze_mpy(firmware_qstr_idents, compiled_modules):
# add to qstrs
new = {}
for q in global_qstrs.qstrs:
# don't add duplicates
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
# don't add duplicates that are already in the firmware
if q is None or q.qstr_esc in firmware_qstr_idents or q.qstr_esc in new:
continue
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
new = sorted(new.values(), key=lambda x: x[0])
# Sort by string value (because this is a sorted pool).
new = sorted(new.values(), key=lambda x: x[2])
print('#include "py/mpconfig.h"')
print('#include "py/objint.h"')
@@ -1485,6 +1486,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
print("const qstr_pool_t mp_qstr_frozen_const_pool = {")
print(" &mp_qstr_const_pool, // previous pool")
print(" MP_QSTRnumber_of, // previous pool size")
print(" true, // is_sorted")
print(" %u, // allocated entries" % qstr_pool_alloc)
print(" %u, // used entries" % len(new))
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
@@ -1779,14 +1781,16 @@ def main():
config.native_arch = MP_NATIVE_ARCH_NONE
# set config values for qstrs, and get the existing base set of qstrs
# already in the firmware
if args.qstr_header:
qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header])
qcfgs, extra_qstrs = qstrutil.parse_input_headers([args.qstr_header])
firmware_qstr_idents = set(qstrutil.static_qstr_list_ident) | set(extra_qstrs.keys())
config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"])
config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"])
else:
config.MICROPY_QSTR_BYTES_IN_LEN = 1
config.MICROPY_QSTR_BYTES_IN_HASH = 1
base_qstrs = list(qstrutil.static_qstr_list)
firmware_qstr_idents = set(qstrutil.static_qstr_list)
# Create initial list of global qstrs.
global_qstrs = GlobalQStrList()
@@ -1808,7 +1812,7 @@ def main():
if args.freeze:
try:
freeze_mpy(base_qstrs, compiled_modules)
freeze_mpy(firmware_qstr_idents, compiled_modules)
except FreezeError as er:
print(er, file=sys.stderr)
sys.exit(1)