From f0895f0ea0d3daea42161bc29c3ff8abeb101e57 Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Tue, 4 Nov 2025 09:46:16 +0100 Subject: [PATCH] py/emitnative: Optimise register clearing. This commit introduces a new generic ASM API function to clear a register (i.e. clearing all the registers' bits). The native emitter used to perform a XOR operation to clear a given register, but different platform have more optimised method to achieve the same result taking up less space - either for the generated code or for the code generator itself. Arm, RV32, X86, and X64 already had an already optimised generator and generated optimised code. The code generator when build for Thumb takes less space generating a constant immediate move rather than a XOR operation, even though both operations would distill down to a single narrow opcode. On Xtensa the situation is almost the same as Thumb, with the exception that a constant immediate move would take one byte less than a XOR operation. Signed-off-by: Alessandro Gatti --- py/asmarm.h | 2 ++ py/asmrv32.h | 2 +- py/asmthumb.h | 2 ++ py/asmx64.h | 2 ++ py/asmx86.h | 2 ++ py/asmxtensa.h | 2 ++ py/emitnative.c | 22 ++++++++-------------- py/emitndebug.c | 3 +++ 8 files changed, 22 insertions(+), 15 deletions(-) diff --git a/py/asmarm.h b/py/asmarm.h index 405457d440..5ae952ee8a 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -230,6 +230,8 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src); #define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strh_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) #define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_str_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) +#define ASM_CLR_REG(as, reg_dest) asm_arm_eor_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_dest)) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMARM_H diff --git a/py/asmrv32.h b/py/asmrv32.h index 6f709daa11..1100d09801 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -804,7 +804,7 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_STORE32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 2) #define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs) #define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs) -#define ASM_CLR_REG(state, rd) +#define ASM_CLR_REG(state, rd) asm_rv32_emit_optimised_xor(state, rd, rd) #define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 0) #define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 1) #define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) asm_rv32_emit_load_reg_reg_reg(state, rd, rs1, rs2, 2) diff --git a/py/asmthumb.h b/py/asmthumb.h index 5edf6573e1..88f4e399bc 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -485,6 +485,8 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel); asm_thumb_str_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ } while (0) +#define ASM_CLR_REG(as, reg_dest) asm_thumb_mov_rlo_i8((as), (reg_dest), 0) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMTHUMB_H diff --git a/py/asmx64.h b/py/asmx64.h index d80c5dcc13..efc3027b17 100644 --- a/py/asmx64.h +++ b/py/asmx64.h @@ -221,6 +221,8 @@ void asm_x64_call_ind(asm_x64_t *as, size_t fun_id, int temp_r32); #define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) #define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, dword_offset) asm_x64_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (dword_offset)) +#define ASM_CLR_REG(as, reg_dest) asm_x64_xor_r64_r64((as), (reg_dest), (reg_dest)) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMX64_H diff --git a/py/asmx86.h b/py/asmx86.h index d2e078ad51..80a67794d2 100644 --- a/py/asmx86.h +++ b/py/asmx86.h @@ -216,6 +216,8 @@ void asm_x86_call_ind(asm_x86_t *as, size_t fun_id, mp_uint_t n_args, int temp_r #define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0) #define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, dword_offset) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (dword_offset)) +#define ASM_CLR_REG(as, reg_dest) asm_x86_xor_r32_r32((as), (reg_dest), (reg_dest)) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMX86_H diff --git a/py/asmxtensa.h b/py/asmxtensa.h index 559b3cacd5..15f8b4d92e 100644 --- a/py/asmxtensa.h +++ b/py/asmxtensa.h @@ -464,6 +464,8 @@ void asm_xtensa_l32r(asm_xtensa_t *as, mp_uint_t reg, mp_uint_t label); asm_xtensa_op_s32i_n((as), (reg_val), (reg_base), 0); \ } while (0) +#define ASM_CLR_REG(as, reg_dest) asm_xtensa_op_movi_n((as), (reg_dest), 0) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMXTENSA_H diff --git a/py/emitnative.c b/py/emitnative.c index a33ec01ec0..6cf01dcab1 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -282,17 +282,13 @@ struct _emit_t { ASM_T *as; }; -#ifndef REG_ZERO -#define REG_ZERO REG_TEMP0 -#define ASM_CLR_REG(state, rd) ASM_XOR_REG_REG(state, rd, rd) -#endif - -#if N_RV32 +#ifdef REG_ZERO #define ASM_MOV_LOCAL_MP_OBJ_NULL(as, local_num, reg_temp) \ ASM_MOV_LOCAL_REG(as, local_num, REG_ZERO) #else +#define REG_ZERO REG_TEMP0 #define ASM_MOV_LOCAL_MP_OBJ_NULL(as, local_num, reg_temp) \ - ASM_MOV_REG_IMM(as, reg_temp, (mp_uint_t)MP_OBJ_NULL); \ + ASM_CLR_REG(as, reg_temp); \ ASM_MOV_LOCAL_REG(as, local_num, reg_temp) #endif @@ -1145,7 +1141,7 @@ static void emit_native_leave_exc_stack(emit_t *emit, bool start_of_handler) { // Optimisation: PC is already cleared by global exc handler return; } - ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET); + ASM_CLR_REG(emit->as, REG_RET); } else { // Found new active handler, get its PC ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label); @@ -1242,8 +1238,7 @@ static void emit_native_global_exc_entry(emit_t *emit) { ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, start_label, true); } else { // Clear the unwind state - ASM_CLR_REG(emit->as, REG_ZERO); - ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_ZERO); + ASM_MOV_LOCAL_MP_OBJ_NULL(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_ZERO); // clear nlr.ret_val, because it's passed to mp_native_raise regardless // of whether there was an exception or not @@ -1263,8 +1258,7 @@ static void emit_native_global_exc_entry(emit_t *emit) { ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, global_except_label, true); // Clear PC of current code block, and jump there to resume execution - ASM_CLR_REG(emit->as, REG_ZERO); - ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_ZERO); + ASM_MOV_LOCAL_MP_OBJ_NULL(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_ZERO); ASM_JUMP_REG(emit->as, REG_LOCAL_1); // Global exception handler: check for valid exception handler @@ -1945,7 +1939,7 @@ static void emit_native_delete_attr(emit_t *emit, qstr qst) { vtype_kind_t vtype_base; emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base assert(vtype_base == VTYPE_PYOBJ); - ASM_XOR_REG_REG(emit->as, REG_ARG_3, REG_ARG_3); // arg3 = value (null for delete) + ASM_CLR_REG(emit->as, REG_ARG_3); // arg3 = value (null for delete) emit_call_with_qstr_arg(emit, MP_F_STORE_ATTR, qst, REG_ARG_2); // arg2 = attribute name emit_post(emit); } @@ -2091,7 +2085,7 @@ static void emit_native_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t exc // No finally, handle the jump ourselves // First, restore the exception handler address for the jump if (e < emit->exc_stack) { - ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET); + ASM_CLR_REG(emit->as, REG_RET); } else { ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label); } diff --git a/py/emitndebug.c b/py/emitndebug.c index e49c5cdbff..2144d14e6b 100644 --- a/py/emitndebug.c +++ b/py/emitndebug.c @@ -271,6 +271,9 @@ static void asm_debug_setcc_reg_reg_reg(asm_debug_t *as, int op, int reg1, int r #define ASM_STORE32_REG_REG(as, reg_src, reg_base) \ asm_debug_reg_reg(as, "store32", reg_src, reg_base) +#define ASM_CLR_REG(as, reg_dest) \ + asm_debug_reg(as, "clr", reg_dest) + // Word indices of REG_LOCAL_x in nlr_buf_t #define NLR_BUF_IDX_LOCAL_1 (5) // rbx