From b6d269ee32026c7380fdb6ebcbc7e50e84239c12 Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Wed, 7 May 2025 22:41:33 +0200 Subject: [PATCH] py/emitnative: Refactor Viper register-indexed load/stores. This commit cleans up the Viper code generation blocks for register-indexed load and store operations. An attempt is made to simplify the code in the common code generator code block, by moving architecture-specific code to the appropriate native generation backends whenever possible. This should make that specific bit of code in the Viper generator clearer and easier to maintain in the long term. To achieve this, six generic assembler meta-opcodes have been introduced, named `ASM_{LOAD,STORE}{8,16,32}_REG_REG_REG`. A platform-independent implementation for those operations is provided, so backends that cannot emit a shorter sequence for the requested operation or are fine with the platform-independent implementation can just not provide said meta-opcodes. Signed-off-by: Alessandro Gatti --- py/asmarm.h | 7 ++++ py/asmrv32.h | 24 +++++++++++++ py/asmthumb.c | 20 ----------- py/asmthumb.h | 28 ++++++++++++--- py/asmxtensa.h | 20 +++++++++++ py/emitnative.c | 94 +++++++++++++------------------------------------ 6 files changed, 98 insertions(+), 95 deletions(-) diff --git a/py/asmarm.h b/py/asmarm.h index 20b4757d21..77c700ff0b 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -221,6 +221,13 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src); #define ASM_STORE16_REG_REG(as, reg_value, reg_base) asm_arm_strh_reg_reg((as), (reg_value), (reg_base)) #define ASM_STORE32_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg((as), (reg_value), (reg_base), 0) +#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrb_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrh_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldr_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strb_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) +#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_strh_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) +#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_arm_str_reg_reg_reg((as), (reg_val), (reg_base), (reg_index)) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMARM_H diff --git a/py/asmrv32.h b/py/asmrv32.h index b09f48eb12..1cbdc88720 100644 --- a/py/asmrv32.h +++ b/py/asmrv32.h @@ -758,6 +758,30 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_ #define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs) #define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs) #define ASM_CLR_REG(state, rd) +#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 1); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_lhu(state, rd, rs1, 0); \ + } while (0) +#define ASM_LOAD32_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 2); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_lw(state, rd, rs1, 0); \ + } while (0) +#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 1); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_sh(state, rd, rs1, 0); \ + } while (0) +#define ASM_STORE32_REG_REG_REG(state, rd, rs1, rs2) \ + do { \ + asm_rv32_opcode_slli(state, rs2, rs2, 2); \ + asm_rv32_opcode_cadd(state, rs1, rs2); \ + asm_rv32_opcode_sw(state, rd, rs1, 0); \ + } while (0) #endif diff --git a/py/asmthumb.c b/py/asmthumb.c index 73684c0f5f..420815e802 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -491,26 +491,6 @@ void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint r } } -void asm_thumb_ldrh_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index) { - asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 1); - asm_thumb_ldrh_rlo_rlo_rlo(as, reg_dest, reg_base, reg_index); -} - -void asm_thumb_ldr_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index) { - asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 2); - asm_thumb_ldr_rlo_rlo_rlo(as, reg_dest, reg_base, reg_index); -} - -void asm_thumb_strh_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index) { - asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 1); - asm_thumb_strh_rlo_rlo_rlo(as, reg_val, reg_base, reg_index); -} - -void asm_thumb_str_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index) { - asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 2); - asm_thumb_str_rlo_rlo_rlo(as, reg_val, reg_base, reg_index); -} - // this could be wrong, because it should have a range of +/- 16MiB... #define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff)) #define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff)) diff --git a/py/asmthumb.h b/py/asmthumb.h index f7cd52fb40..f62f7dd8d9 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -385,11 +385,6 @@ void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label); void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset); // convenience void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset); // convenience -void asm_thumb_ldrh_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index); -void asm_thumb_ldr_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index); -void asm_thumb_strh_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index); -void asm_thumb_str_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index); - void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narrow or wide branch void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp); // convenience @@ -480,6 +475,29 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel); #define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_thumb_strh_rlo_rlo_i5((as), (reg_src), (reg_base), 0) #define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0) +#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_thumb_ldrb_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)) +#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \ + asm_thumb_ldrh_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \ + } while (0) +#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \ + asm_thumb_ldr_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index)); \ + } while (0) +#define ASM_STORE8_REG_REG_REG(as, reg_val, reg_base, reg_index) asm_thumb_strb_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)) +#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 1); \ + asm_thumb_strh_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ + } while (0) +#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_thumb_lsl_rlo_rlo_i5((as), (reg_index), (reg_index), 2); \ + asm_thumb_str_rlo_rlo_rlo((as), (reg_val), (reg_base), (reg_index)); \ + } while (0) + #endif // GENERIC_ASM_API #endif // MICROPY_INCLUDED_PY_ASMTHUMB_H diff --git a/py/asmxtensa.h b/py/asmxtensa.h index d2f37bf828..6451c75aa5 100644 --- a/py/asmxtensa.h +++ b/py/asmxtensa.h @@ -411,12 +411,32 @@ void asm_xtensa_call_ind_win(asm_xtensa_t *as, uint idx); #define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l8ui((as), (reg_dest), (reg_base), 0) #define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), 0) #define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), (uint16_offset)) +#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx2((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_l16ui((as), (reg_dest), (reg_base), 0); \ + } while (0) #define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_xtensa_op_l32i_n((as), (reg_dest), (reg_base), 0) +#define ASM_LOAD32_REG_REG_REG(as, reg_dest, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx4((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_l32i_n((as), (reg_dest), (reg_base), 0); \ + } while (0) #define ASM_STORE_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_xtensa_s32i_optimised((as), (reg_dest), (reg_base), (word_offset)) #define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s8i((as), (reg_src), (reg_base), 0) #define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s16i((as), (reg_src), (reg_base), 0) +#define ASM_STORE16_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx2((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_s16i((as), (reg_val), (reg_base), 0); \ + } while (0) #define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_xtensa_op_s32i_n((as), (reg_src), (reg_base), 0) +#define ASM_STORE32_REG_REG_REG(as, reg_val, reg_base, reg_index) \ + do { \ + asm_xtensa_op_addx4((as), (reg_base), (reg_index), (reg_base)); \ + asm_xtensa_op_s32i_n((as), (reg_val), (reg_base), 0); \ + } while (0) #endif // GENERIC_ASM_API diff --git a/py/emitnative.c b/py/emitnative.c index 088e1e7ae0..c2eb46657d 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -1638,59 +1638,36 @@ static void emit_native_load_subscr(emit_t *emit) { switch (vtype_base) { case VTYPE_PTR8: { // pointer to 8-bit memory - #if N_ARM - asm_arm_ldrb_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); - break; - #elif N_THUMB - asm_thumb_ldrb_rlo_rlo_rlo(emit->as, REG_RET, REG_ARG_1, reg_index); - break; - #endif - // TODO optimise to use thumb ldrb r1, [r2, r3] + #ifdef ASM_LOAD8_REG_REG_REG + ASM_LOAD8_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD8_REG_REG(emit->as, REG_RET, REG_ARG_1); // store value to (base+index) + #endif break; } case VTYPE_PTR16: { // pointer to 16-bit memory - #if N_ARM - asm_arm_ldrh_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); - break; - #elif N_THUMB - asm_thumb_ldrh_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); - break; - #elif N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_l16ui(emit->as, REG_RET, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_LOAD16_REG_REG_REG + ASM_LOAD16_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD16_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+2*index) + #endif break; } case VTYPE_PTR32: { // pointer to word-size memory - #if N_ARM - asm_arm_ldr_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); - break; - #elif N_THUMB - asm_thumb_ldr_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index); - break; - #elif N_RV32 - asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2); - asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2); - asm_rv32_opcode_lw(emit->as, REG_RET, REG_ARG_1, 0); - break; - #elif N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_l32i_n(emit->as, REG_RET, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_LOAD32_REG_REG_REG + ASM_LOAD32_REG_REG_REG(emit->as, REG_RET, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_LOAD32_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+4*index) + #endif break; } default: @@ -1949,59 +1926,36 @@ static void emit_native_store_subscr(emit_t *emit) { switch (vtype_base) { case VTYPE_PTR8: { // pointer to 8-bit memory - // TODO optimise to use thumb strb r1, [r2, r3] - #if N_ARM - asm_arm_strb_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #elif N_THUMB - asm_thumb_strb_rlo_rlo_rlo(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #endif + #ifdef ASM_STORE8_REG_REG_REG + ASM_STORE8_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_STORE8_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+index) + #endif break; } case VTYPE_PTR16: { // pointer to 16-bit memory - #if N_ARM - asm_arm_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #elif N_THUMB - asm_thumb_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #elif N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_s16i(emit->as, reg_value, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_STORE16_REG_REG_REG + ASM_STORE16_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_STORE16_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+2*index) + #endif break; } case VTYPE_PTR32: { // pointer to 32-bit memory - #if N_ARM - asm_arm_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #elif N_THUMB - asm_thumb_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index); - break; - #elif N_RV32 - asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2); - asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2); - asm_rv32_opcode_sw(emit->as, reg_value, REG_ARG_1, 0); - break; - #elif N_XTENSA || N_XTENSAWIN - asm_xtensa_op_addx4(emit->as, REG_ARG_1, reg_index, REG_ARG_1); - asm_xtensa_op_s32i_n(emit->as, reg_value, REG_ARG_1, 0); - break; - #endif + #ifdef ASM_STORE32_REG_REG_REG + ASM_STORE32_REG_REG_REG(emit->as, reg_value, REG_ARG_1, reg_index); + #else ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base ASM_STORE32_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+4*index) + #endif break; } default: