py/persistentcode: Add architecture flags compatibility checks.

This commit extends the MPY file format in a backwards-compatible way to
store an encoded form of architecture-specific flags that have been
specified in the "mpy-cross" command line, or that have been explicitly
set as part of a native emitter configuration.

The file format changes are as follows:

* The features byte, previously containing the target native
  architecture and the minor file format version, now claims bit 6 as a
  flag indicating the presence of an encoded architecture flags integer
* If architecture flags need to be stored, they are placed right after
  the MPY file header.

This means that properly-written MPY parsers, if encountering a MPY file
containing encoded architecture flags, should raise an error since no
architecture identifiers have been defined that make use of bits 6 and
7 in the referenced header byte.  This should give enough guarantees of
backwards compatibility when this feature is used (improper parsers were
subjected to breakage anyway).

The encoded architecture flags could have been placed at the end, but:

* Having them right after the header makes the architecture
  compatibility checks occur before having read the whole file in memory
  (which still happens on certain platforms as the reader may be backed
  by a memory buffer), and prevents eventual memory allocations that do
  not take place if the module is rejected early
* Properly-written MPY file parsers should have checked the upper two
  bits of the flags byte to be actually zero according to the format
  specification available right before this change, so no assumptions
  should have been made on the exact order of the chunks for an
  unexpected format.

The meaning of the architecture flags value is backend-specific, with
the only common characteristic of being a variable-encoded unsigned
integer for the time being.

The changes made to the file format effectively limit the number of
possible target architectures to 16, of which 13 are already claimed.
There aren't that many new architectures planned to be supported for the
lifetime of the current MPY file format, so this change still leaves
space for architecture updates if needed.

Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
This commit is contained in:
Alessandro Gatti
2025-09-23 00:50:51 +02:00
parent 7373338fa9
commit a6bc1ccbe5
6 changed files with 80 additions and 7 deletions

View File

@@ -120,6 +120,8 @@ MP_BC_FORMAT_QSTR = 1
MP_BC_FORMAT_VAR_UINT = 2
MP_BC_FORMAT_OFFSET = 3
MP_NATIVE_ARCH_FLAGS_PRESENT = 0x40
mp_unary_op_method_name = (
"__pos__",
"__neg__",
@@ -542,6 +544,7 @@ class CompiledModule:
mpy_source_file,
mpy_segments,
header,
arch_flags,
qstr_table,
obj_table,
raw_code,
@@ -554,6 +557,7 @@ class CompiledModule:
self.mpy_segments = mpy_segments
self.source_file = qstr_table[0]
self.header = header
self.arch_flags = arch_flags
self.qstr_table = qstr_table
self.obj_table = obj_table
self.raw_code = raw_code
@@ -1339,7 +1343,7 @@ def read_mpy(filename):
if header[1] != config.MPY_VERSION:
raise MPYReadError(filename, "incompatible .mpy version")
feature_byte = header[2]
mpy_native_arch = feature_byte >> 2
mpy_native_arch = (feature_byte >> 2) & 0x2F
if mpy_native_arch != MP_NATIVE_ARCH_NONE:
mpy_sub_version = feature_byte & 3
if mpy_sub_version != config.MPY_SUB_VERSION:
@@ -1350,6 +1354,11 @@ def read_mpy(filename):
raise MPYReadError(filename, "native architecture mismatch")
config.mp_small_int_bits = header[3]
arch_flags = 0
# Read the architecture-specific flag bits if present.
if (feature_byte & MP_NATIVE_ARCH_FLAGS_PRESENT) != 0:
arch_flags = reader.read_uint()
# Read number of qstrs, and number of objects.
n_qstr = reader.read_uint()
n_obj = reader.read_uint()
@@ -1378,6 +1387,7 @@ def read_mpy(filename):
filename,
segments,
header,
arch_flags,
qstr_table,
obj_table,
raw_code,
@@ -1673,25 +1683,39 @@ def merge_mpy(compiled_modules, output_file):
merged_mpy.extend(f.read())
else:
main_cm_idx = None
arch_flags = 0
for idx, cm in enumerate(compiled_modules):
feature_byte = cm.header[2]
mpy_native_arch = feature_byte >> 2
mpy_native_arch = (feature_byte >> 2) & 0x2F
if mpy_native_arch:
# Must use qstr_table and obj_table from this raw_code
if main_cm_idx is not None:
raise Exception("can't merge files when more than one contains native code")
main_cm_idx = idx
arch_flags = cm.arch_flags
if main_cm_idx is not None:
# Shift main_cm to front of list.
compiled_modules.insert(0, compiled_modules.pop(main_cm_idx))
if config.arch_flags is not None:
arch_flags = config.arch_flags
header = bytearray(4)
header[0] = ord("M")
header[1] = config.MPY_VERSION
header[2] = config.native_arch << 2 | config.MPY_SUB_VERSION if config.native_arch else 0
header[2] = (
(MP_NATIVE_ARCH_FLAGS_PRESENT if arch_flags != 0 else 0)
| config.native_arch << 2
| config.MPY_SUB_VERSION
if config.native_arch
else 0
)
header[3] = config.mp_small_int_bits
merged_mpy.extend(header)
if arch_flags != 0:
merged_mpy.extend(mp_encode_uint(arch_flags))
n_qstr = 0
n_obj = 0
for cm in compiled_modules:
@@ -1823,6 +1847,12 @@ def main(args=None):
default=16,
help="mpz digit size used by target (default 16)",
)
cmd_parser.add_argument(
"-march-flags",
metavar="F",
type=int,
help="architecture flags value to set in the output file (strips existing flags if not present)",
)
cmd_parser.add_argument("-o", "--output", default=None, help="output file")
cmd_parser.add_argument("files", nargs="+", help="input .mpy files")
args = cmd_parser.parse_args(args)
@@ -1835,6 +1865,7 @@ def main(args=None):
}[args.mlongint_impl]
config.MPZ_DIG_SIZE = args.mmpz_dig_size
config.native_arch = MP_NATIVE_ARCH_NONE
config.arch_flags = args.march_flags
# set config values for qstrs, and get the existing base set of qstrs
# already in the firmware