From b46f9f503feae99a92f98a882666cb09b460151a Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Sun, 15 Feb 2026 20:55:24 +0100 Subject: [PATCH] py/asmthumb: Do not clobber index register on viper load/store. This commit lets the native emitter preserve the value of the index register when performing register-indexed loads or stores of halfword or word values on Thumb. The original code was optimised too aggressively for a register-starved architecture like Thumb, and the index value in the sequence to generate was assumed to be allocated somewhere safe. This is valid on other architectures, but not on Thumb. To solve this, load operations do clobber a temporary register that should be safe to use, REG_TEMP2, to store the scaled register offset. REG_TEMP2's value is only used within the scope of a single ASM API instruction. Save operations unfortunately use a register that is aliased to REG_TEMP2, since they need to have three values in registers to perform the operation. This means the index register needs to be pushed to the stack before performing the scale + store operation, and then popped from the stack. That's a 4 bytes penalty on each store and a minor speed hit on generated code (plus a minor footprint increase of the firmware image). Signed-off-by: Alessandro Gatti --- py/asmthumb.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/py/asmthumb.h b/py/asmthumb.h index 88f4e399bc..4d9002290c 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -465,24 +465,28 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel); #define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_thumb_ldrb_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)) #define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ do { \ - asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \ - asm_thumb_ldrh_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \ + asm_thumb_lsl_rlo_rlo_i5((as), REG_TEMP2, (reg_index), 1); \ + asm_thumb_ldrh_rlo_rlo_rlo((as), (reg_dest), (reg_base), REG_TEMP2); \ } while (0) #define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ do { \ - asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \ - asm_thumb_ldr_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \ + asm_thumb_lsl_rlo_rlo_i5((as), REG_TEMP2, (reg_index), 2); \ + asm_thumb_ldr_rlo_rlo_rlo((as), (reg_dest), (reg_base), REG_TEMP2); \ } while (0) #define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_thumb_strb_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)) #define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) \ do { \ + asm_thumb_op16((as), 0xB400 | (1 << reg_index)); \ asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \ asm_thumb_strh_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ + asm_thumb_op16((as), 0xBC00 | (1 << reg_index)); \ } while (0) #define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) \ do { \ + asm_thumb_op16((as), 0xB400 | (1 << reg_index)); \ asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \ asm_thumb_str_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ + asm_thumb_op16((as), 0xBC00 | (1 << reg_index)); \ } while (0) #define ASM_CLR_REG(as, reg_dest) asm_thumb_mov_rlo_i8((as), (reg_dest), 0)