py/asmrv32: Do not clobber index register on viper load/store.

This commit lets the native emitter preserve the value of the index register when performing register-indexed loads or stores for halfword or word values on RV32. The original code was optimised too aggressively to reduce the generated code's size, using compressed opcodes that alias the target register to one of the operands. In register-indexed load/store operations, the index register was assumed to be allocated somewhere safe, but it was not always the case. To solve this, now all halfword and word register-indexed operations will use REG_TEMP2 to store the scaled index register. The size penalty on generated code varies across operation sizes and enabled extensions: - byte operations stay the same size with or without Zba - halfword operations will be 2 bytes larger without Zba, and will stay the same size with Zba - word operations will be 4 bytes larger without Zba, and 2 bytes larger with Zba There is also a minor firmware footprint increase to hold the extra logic needed for conditional register clobbering, but it shouldn't be that large anyway. Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
2026-03-10 19:00:30 +01:00 · 2026-02-15 21:04:03 +01:00
parent b46f9f503f
commit 424ae08680
1 changed files with 8 additions and 5 deletions
--- a/py/asmrv32.c
+++ b/py/asmrv32.c
@@ -620,28 +620,31 @@ void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs)
    asm_rv32_opcode_xor(state, rd, rd, rs);
 }

+// WARNING: The scaled offset will be stored in REG_TEMP2.
 static void asm_rv32_fix_up_scaled_reg_reg_reg(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
    assert(operation_size <= 2 && "Operation size value out of range.");

    if (operation_size > 0 && asm_rv32_allow_zba_opcodes()) {
        // sh{1,2}add rs1, rs2, rs1
-        asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 1 << operation_size, 0x10, rs1, rs2, rs1));
+        asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_R(0x33, 1 << operation_size, 0x10, REG_TEMP2, rs2, rs1));
    } else {
        if (operation_size > 0) {
-            asm_rv32_opcode_cslli(state, rs2, operation_size);
+            asm_rv32_opcode_slli(state, REG_TEMP2, rs2, operation_size);
+            asm_rv32_opcode_cadd(state, REG_TEMP2, rs1);
+        } else {
+            asm_rv32_opcode_add(state, REG_TEMP2, rs1, rs2);
        }
-        asm_rv32_opcode_cadd(state, rs1, rs2);
    }
 }

 void asm_rv32_emit_load_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
    asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size);
-    asm_rv32_emit_load_reg_reg_offset(state, rd, rs1, 0, operation_size);
+    asm_rv32_emit_load_reg_reg_offset(state, rd, REG_TEMP2, 0, operation_size);
 }

 void asm_rv32_emit_store_reg_reg_reg(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t operation_size) {
    asm_rv32_fix_up_scaled_reg_reg_reg(state, rs1, rs2, operation_size);
-    asm_rv32_emit_store_reg_reg_offset(state, rd, rs1, 0, operation_size);
+    asm_rv32_emit_store_reg_reg_offset(state, rd, REG_TEMP2, 0, operation_size);
 }

 void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) {