From 072ec2a363424eb3000c9dd4652b605ef939fed2 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Tue, 2 Sep 2025 11:46:30 +0200 Subject: [PATCH] enhanced RISC benchmarks --- src/BASE-RISC-V/add-TP.S | 79 ++++++++++++++++- src/BASE-RISC-V/addi-TP.S | 140 +++++++++++++++++++++++++++++ src/BASE-RISC-V/ld-TP.S | 79 +++++++++++++++-- src/BASE-RISC-V/ld-add-11-TP.S | 155 +++++++++++++++++++++++++++++++++ 4 files changed, 447 insertions(+), 6 deletions(-) create mode 100644 src/BASE-RISC-V/addi-TP.S create mode 100644 src/BASE-RISC-V/ld-add-11-TP.S diff --git a/src/BASE-RISC-V/add-TP.S b/src/BASE-RISC-V/add-TP.S index 49927b9..a58d3c8 100644 --- a/src/BASE-RISC-V/add-TP.S +++ b/src/BASE-RISC-V/add-TP.S @@ -1,5 +1,5 @@ #define INSTR add -#define NINST 6 +#define NINST 80 #define N a0 .globl ninst @@ -32,13 +32,90 @@ latency: loop: addi t0, t0, 1 # i++ + + INSTR a3, a1, a2 + INSTR a4, a1, a2 + INSTR a5, a1, a2 + INSTR a6, a1, a2 + INSTR a7, a1, a2 + INSTR t1, a1, a2 + INSTR t2, a1, a2 + INSTR t3, a1, a2 + INSTR t4, a1, a2 + INSTR t5, a1, a2 + INSTR t6, a1, a2 + INSTR s0, a1, a2 + INSTR s1, a1, a2 + INSTR s2, a1, a2 + INSTR s3, a1, a2 + INSTR s4, a1, a2 + INSTR s5, a1, a2 + INSTR s6, a1, a2 + INSTR s7, a1, a2 + INSTR s8, a1, a2 INSTR a3, a1, a2 INSTR a4, a1, a2 INSTR a5, a1, a2 INSTR a6, a1, a2 INSTR a7, a1, a2 + INSTR t1, a1, a2 INSTR t2, a1, a2 + INSTR t3, a1, a2 + INSTR t4, a1, a2 + INSTR t5, a1, a2 + INSTR t6, a1, a2 + INSTR s0, a1, a2 + INSTR s1, a1, a2 + INSTR s2, a1, a2 + INSTR s3, a1, a2 + INSTR s4, a1, a2 + INSTR s5, a1, a2 + INSTR s6, a1, a2 + INSTR s7, a1, a2 + INSTR s8, a1, a2 + + INSTR a3, a1, a2 + INSTR a4, a1, a2 + INSTR a5, a1, a2 + INSTR a6, a1, a2 + INSTR a7, a1, a2 + INSTR t1, a1, a2 + INSTR t2, a1, a2 + INSTR t3, a1, a2 + INSTR t4, a1, a2 + INSTR t5, a1, a2 + INSTR t6, a1, a2 + INSTR s0, a1, a2 + INSTR s1, a1, a2 + INSTR s2, a1, a2 + INSTR s3, a1, a2 + INSTR s4, a1, a2 + INSTR s5, a1, a2 + INSTR s6, a1, a2 + INSTR s7, a1, a2 + INSTR s8, a1, a2 + + INSTR a3, a1, a2 + INSTR a4, a1, a2 + INSTR a5, a1, a2 + INSTR a6, a1, a2 + INSTR a7, a1, a2 + INSTR t1, a1, a2 + INSTR t2, a1, a2 + INSTR t3, a1, a2 + INSTR t4, a1, a2 + INSTR t5, a1, a2 + INSTR t6, a1, a2 + INSTR s0, a1, a2 + INSTR s1, a1, a2 + INSTR s2, a1, a2 + INSTR s3, a1, a2 + INSTR s4, a1, a2 + INSTR s5, a1, a2 + INSTR s6, a1, a2 + INSTR s7, a1, a2 + INSTR s8, a1, a2 blt t0, N, loop diff --git a/src/BASE-RISC-V/addi-TP.S b/src/BASE-RISC-V/addi-TP.S new file mode 100644 index 0000000..ec668fd --- /dev/null +++ b/src/BASE-RISC-V/addi-TP.S @@ -0,0 +1,140 @@ +#define INSTR addi +#define NINST 80 +#define N a0 + +.globl ninst +.data +ninst: +.long NINST +.text +.globl latency +.type latency, @function +.align 4 +latency: + # Save callee-save registers (RISC-V ABI) + addi sp, sp, -12*8 # Make space for 12 registers (8 bytes each) + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + sd s6, 48(sp) + sd s7, 56(sp) + sd s8, 64(sp) + sd s9, 72(sp) + sd s10, 80(sp) + sd s11, 88(sp) + + # Initialize loop counter + li t0, 0 + blez N, done + +loop: + addi t0, t0, 1 # i++ + + INSTR a3, a1, 2 + INSTR a4, a1, 2 + INSTR a5, a1, 2 + INSTR a6, a1, 2 + INSTR a7, a1, 2 + INSTR t1, a1, 2 + INSTR t2, a1, 2 + INSTR t3, a1, 2 + INSTR t4, a1, 2 + INSTR t5, a1, 2 + INSTR t6, a1, 2 + INSTR s0, a1, 2 + INSTR s1, a1, 2 + INSTR s2, a1, 2 + INSTR s3, a1, 2 + INSTR s4, a1, 2 + INSTR s5, a1, 2 + INSTR s6, a1, 2 + INSTR s7, a1, 2 + INSTR s8, a1, 2 + + INSTR a3, a1, 2 + INSTR a4, a1, 2 + INSTR a5, a1, 2 + INSTR a6, a1, 2 + INSTR a7, a1, 2 + INSTR t1, a1, 2 + INSTR t2, a1, 2 + INSTR t3, a1, 2 + INSTR t4, a1, 2 + INSTR t5, a1, 2 + INSTR t6, a1, 2 + INSTR s0, a1, 2 + INSTR s1, a1, 2 + INSTR s2, a1, 2 + INSTR s3, a1, 2 + INSTR s4, a1, 2 + INSTR s5, a1, 2 + INSTR s6, a1, 2 + INSTR s7, a1, 2 + INSTR s8, a1, 2 + + INSTR a3, a1, 2 + INSTR a4, a1, 2 + INSTR a5, a1, 2 + INSTR a6, a1, 2 + INSTR a7, a1, 2 + INSTR t1, a1, 2 + INSTR t2, a1, 2 + INSTR t3, a1, 2 + INSTR t4, a1, 2 + INSTR t5, a1, 2 + INSTR t6, a1, 2 + INSTR s0, a1, 2 + INSTR s1, a1, 2 + INSTR s2, a1, 2 + INSTR s3, a1, 2 + INSTR s4, a1, 2 + INSTR s5, a1, 2 + INSTR s6, a1, 2 + INSTR s7, a1, 2 + INSTR s8, a1, 2 + + INSTR a3, a1, 2 + INSTR a4, a1, 2 + INSTR a5, a1, 2 + INSTR a6, a1, 2 + INSTR a7, a1, 2 + INSTR t1, a1, 2 + INSTR t2, a1, 2 + INSTR t3, a1, 2 + INSTR t4, a1, 2 + INSTR t5, a1, 2 + INSTR t6, a1, 2 + INSTR s0, a1, 2 + INSTR s1, a1, 2 + INSTR s2, a1, 2 + INSTR s3, a1, 2 + INSTR s4, a1, 2 + INSTR s5, a1, 2 + INSTR s6, a1, 2 + INSTR s7, a1, 2 + INSTR s8, a1, 2 + + blt t0, N, loop + +done: + # Restore callee-save registers from stack + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + ld s6, 48(sp) + ld s7, 56(sp) + ld s8, 64(sp) + ld s9, 72(sp) + ld s10, 80(sp) + ld s11, 88(sp) + addi sp, sp, 12*8 + + ret +.size latency, .-latency + diff --git a/src/BASE-RISC-V/ld-TP.S b/src/BASE-RISC-V/ld-TP.S index 48fd19e..981cef0 100644 --- a/src/BASE-RISC-V/ld-TP.S +++ b/src/BASE-RISC-V/ld-TP.S @@ -1,5 +1,5 @@ #define INSTR ld -#define NINST 22 +#define NINST 88 #define N a0 .globl ninst @@ -59,10 +59,79 @@ loop: INSTR s5, 120(t1) INSTR s6, 128(t1) INSTR s7, 136(t1) - INSTR s8, 142(t1) - INSTR s9, 150(t1) - INSTR s10, 158(t1) - INSTR s11, 164(t1) + INSTR s8, 144(t1) + INSTR s9, 152(t1) + INSTR s10, 160(t1) + INSTR s11, 168(t1) + + INSTR a3, 0(t1) + INSTR a4, 8(t1) + INSTR a5, 16(t1) + INSTR a6, 24(t1) + INSTR a7, 32(t1) + INSTR t2, 40(t1) + INSTR t3, 48(t1) + INSTR t4, 56(t1) + INSTR t5, 64(t1) + INSTR t6, 72(t1) + INSTR s0, 80(t1) + INSTR s1, 88(t1) + INSTR s2, 96(t1) + INSTR s3, 104(t1) + INSTR s4, 112(t1) + INSTR s5, 120(t1) + INSTR s6, 128(t1) + INSTR s7, 136(t1) + INSTR s8, 144(t1) + INSTR s9, 152(t1) + INSTR s10, 160(t1) + INSTR s11, 168(t1) + + INSTR a3, 0(t1) + INSTR a4, 8(t1) + INSTR a5, 16(t1) + INSTR a6, 24(t1) + INSTR a7, 32(t1) + INSTR t2, 40(t1) + INSTR t3, 48(t1) + INSTR t4, 56(t1) + INSTR t5, 64(t1) + INSTR t6, 72(t1) + INSTR s0, 80(t1) + INSTR s1, 88(t1) + INSTR s2, 96(t1) + INSTR s3, 104(t1) + INSTR s4, 112(t1) + INSTR s5, 120(t1) + INSTR s6, 128(t1) + INSTR s7, 136(t1) + INSTR s8, 144(t1) + INSTR s9, 152(t1) + INSTR s10, 160(t1) + INSTR s11, 168(t1) + + INSTR a3, 0(t1) + INSTR a4, 8(t1) + INSTR a5, 16(t1) + INSTR a6, 24(t1) + INSTR a7, 32(t1) + INSTR t2, 40(t1) + INSTR t3, 48(t1) + INSTR t4, 56(t1) + INSTR t5, 64(t1) + INSTR t6, 72(t1) + INSTR s0, 80(t1) + INSTR s1, 88(t1) + INSTR s2, 96(t1) + INSTR s3, 104(t1) + INSTR s4, 112(t1) + INSTR s5, 120(t1) + INSTR s6, 128(t1) + INSTR s7, 136(t1) + INSTR s8, 144(t1) + INSTR s9, 152(t1) + INSTR s10, 160(t1) + INSTR s11, 168(t1) blt t0, N, loop diff --git a/src/BASE-RISC-V/ld-add-11-TP.S b/src/BASE-RISC-V/ld-add-11-TP.S new file mode 100644 index 0000000..f23c467 --- /dev/null +++ b/src/BASE-RISC-V/ld-add-11-TP.S @@ -0,0 +1,155 @@ +#define INSTR ld +#define NINST 44 +#define N a0 + +.globl ninst +.data +ninst: +.long NINST +.align 4 +PI: +.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, \ +0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, \ +0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, \ +0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 +.text +.globl latency +.type latency, @function +.align 4 +latency: + # Save callee-save registers (RISC-V ABI) + addi sp, sp, -12*8 # Make space for 12 registers (8 bytes each) + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + sd s6, 48(sp) + sd s7, 56(sp) + sd s8, 64(sp) + sd s9, 72(sp) + sd s10, 80(sp) + sd s11, 88(sp) + + la t1, PI + + # Initialize loop counter + li t0, 0 + blez N, done + +loop: + addi t0, t0, 1 # i++ + + INSTR a3, 0(t1) + add a4, a1, a2 + INSTR a5, 16(t1) + add a6, a1, a2 + INSTR a7, 32(t1) + add a7, a1, a2 + INSTR t3, 48(t1) + add t4, a1, a2 + INSTR t5, 64(t1) + add t6, a1, a2 + INSTR s0, 80(t1) + add s1, a1, a2 + INSTR s2, 96(t1) + add s3, a1, a2 + INSTR s4, 112(t1) + add s5, a1, a2 + INSTR s6, 128(t1) + add s7, a1, a2 + INSTR s8, 144(t1) + add s9, a1, a2 + INSTR s10, 160(t1) + add s11, a1, a2 + + INSTR a3, 0(t1) + add a4, a1, a2 + INSTR a5, 16(t1) + add a6, a1, a2 + INSTR a7, 32(t1) + add a7, a1, a2 + INSTR t3, 48(t1) + add t4, a1, a2 + INSTR t5, 64(t1) + add t6, a1, a2 + INSTR s0, 80(t1) + add s1, a1, a2 + INSTR s2, 96(t1) + add s3, a1, a2 + INSTR s4, 112(t1) + add s5, a1, a2 + INSTR s6, 128(t1) + add s7, a1, a2 + INSTR s8, 144(t1) + add s9, a1, a2 + INSTR s10, 160(t1) + add s11, a1, a2 + + INSTR a3, 0(t1) + add a4, a1, a2 + INSTR a5, 16(t1) + add a6, a1, a2 + INSTR a7, 32(t1) + add a7, a1, a2 + INSTR t3, 48(t1) + add t4, a1, a2 + INSTR t5, 64(t1) + add t6, a1, a2 + INSTR s0, 80(t1) + add s1, a1, a2 + INSTR s2, 96(t1) + add s3, a1, a2 + INSTR s4, 112(t1) + add s5, a1, a2 + INSTR s6, 128(t1) + add s7, a1, a2 + INSTR s8, 144(t1) + add s9, a1, a2 + INSTR s10, 160(t1) + add s11, a1, a2 + + INSTR a3, 0(t1) + add a4, a1, a2 + INSTR a5, 16(t1) + add a6, a1, a2 + INSTR a7, 32(t1) + add a7, a1, a2 + INSTR t3, 48(t1) + add t4, a1, a2 + INSTR t5, 64(t1) + add t6, a1, a2 + INSTR s0, 80(t1) + add s1, a1, a2 + INSTR s2, 96(t1) + add s3, a1, a2 + INSTR s4, 112(t1) + add s5, a1, a2 + INSTR s6, 128(t1) + add s7, a1, a2 + INSTR s8, 144(t1) + add s9, a1, a2 + INSTR s10, 160(t1) + add s11, a1, a2 + blt t0, N, loop + +done: + # Restore callee-save registers from stack + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + ld s6, 48(sp) + ld s7, 56(sp) + ld s8, 64(sp) + ld s9, 72(sp) + ld s10, 80(sp) + ld s11, 88(sp) + addi sp, sp, 12*8 + + ret +.size latency, .-latency +