py/asmrv32: Do not clobber index register on viper load/store.

This commit lets the native emitter preserve the value of the index
register when performing register-indexed loads or stores for halfword
or word values on RV32.

The original code was optimised too aggressively to reduce the generated
code's size, using compressed opcodes that alias the target register to
one of the operands.  In register-indexed load/store operations, the
index register was assumed to be allocated somewhere safe, but it was
not always the case.

To solve this, now all halfword and word register-indexed operations
will use REG_TEMP2 to store the scaled index register.  The size penalty
on generated code varies across operation sizes and enabled extensions:

- byte operations stay the same size with or without Zba
- halfword operations will be 2 bytes larger without Zba, and will stay
  the same size with Zba
- word operations will be 4 bytes larger without Zba, and 2 bytes larger
  with Zba

There is also a minor firmware footprint increase to hold the extra
logic needed for conditional register clobbering, but it shouldn't be
that large anyway.

Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
This commit is contained in:
Alessandro Gatti
2026-02-15 21:04:03 +01:00
committed by Damien George
parent b46f9f503f
commit 424ae08680

View File

@@ -620,28 +620,31 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs)
asm_rv32_opcode_xor(state, rd, rd, rs);
}
// WARNING: The scaled offset will be stored in REG_TEMP2.
static void asm_rv32_fix_up_scaled_reg_reg_reg(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
assert(operation_size <= 2 && "Operation size value out of range.");
if (operation_size > 0 && asm_rv32_allow_zba_opcodes()) {
// sh{1,2}add rs1, rs2, rs1
asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 1 << operation_size, 0x10, rs1, rs2, rs1));
asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 1 << operation_size, 0x10, REG_TEMP2, rs2, rs1));
} else {
if (operation_size > 0) {
asm_rv32_opcode_cslli(state, rs2, operation_size);
asm_rv32_opcode_slli(state, REG_TEMP2, rs2, operation_size);
asm_rv32_opcode_cadd(state, REG_TEMP2, rs1);
} else {
asm_rv32_opcode_add(state, REG_TEMP2, rs1, rs2);
}
asm_rv32_opcode_cadd(state, rs1, rs2);
}
}
void asm_rv32_emit_load_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size);
asm_rv32_emit_load_reg_reg_offset(state, rd, rs1, 0, operation_size);
asm_rv32_emit_load_reg_reg_offset(state, rd, REG_TEMP2, 0, operation_size);
}
void asm_rv32_emit_store_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size);
asm_rv32_emit_store_reg_reg_offset(state, rd, rs1, 0, operation_size);
asm_rv32_emit_store_reg_reg_offset(state, rd, REG_TEMP2, 0, operation_size);
}
void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) {