enhanced RISC benchmarks

This commit is contained in:
JanLJL
2025-09-02 11:46:30 +02:00
parent 4c82e7b8bf
commit 072ec2a363
4 changed files with 447 additions and 6 deletions

View File

@@ -1,5 +1,5 @@
#define INSTR add
#define NINST 6
#define NINST 80
#define N a0
.globl ninst
@@ -32,13 +32,90 @@ latency:
loop:
addi t0, t0, 1 # i++
INSTR a3, a1, a2
INSTR a4, a1, a2
INSTR a5, a1, a2
INSTR a6, a1, a2
INSTR a7, a1, a2
INSTR t1, a1, a2
INSTR t2, a1, a2
INSTR t3, a1, a2
INSTR t4, a1, a2
INSTR t5, a1, a2
INSTR t6, a1, a2
INSTR s0, a1, a2
INSTR s1, a1, a2
INSTR s2, a1, a2
INSTR s3, a1, a2
INSTR s4, a1, a2
INSTR s5, a1, a2
INSTR s6, a1, a2
INSTR s7, a1, a2
INSTR s8, a1, a2
INSTR a3, a1, a2
INSTR a4, a1, a2
INSTR a5, a1, a2
INSTR a6, a1, a2
INSTR a7, a1, a2
INSTR t1, a1, a2
INSTR t2, a1, a2
INSTR t3, a1, a2
INSTR t4, a1, a2
INSTR t5, a1, a2
INSTR t6, a1, a2
INSTR s0, a1, a2
INSTR s1, a1, a2
INSTR s2, a1, a2
INSTR s3, a1, a2
INSTR s4, a1, a2
INSTR s5, a1, a2
INSTR s6, a1, a2
INSTR s7, a1, a2
INSTR s8, a1, a2
INSTR a3, a1, a2
INSTR a4, a1, a2
INSTR a5, a1, a2
INSTR a6, a1, a2
INSTR a7, a1, a2
INSTR t1, a1, a2
INSTR t2, a1, a2
INSTR t3, a1, a2
INSTR t4, a1, a2
INSTR t5, a1, a2
INSTR t6, a1, a2
INSTR s0, a1, a2
INSTR s1, a1, a2
INSTR s2, a1, a2
INSTR s3, a1, a2
INSTR s4, a1, a2
INSTR s5, a1, a2
INSTR s6, a1, a2
INSTR s7, a1, a2
INSTR s8, a1, a2
INSTR a3, a1, a2
INSTR a4, a1, a2
INSTR a5, a1, a2
INSTR a6, a1, a2
INSTR a7, a1, a2
INSTR t1, a1, a2
INSTR t2, a1, a2
INSTR t3, a1, a2
INSTR t4, a1, a2
INSTR t5, a1, a2
INSTR t6, a1, a2
INSTR s0, a1, a2
INSTR s1, a1, a2
INSTR s2, a1, a2
INSTR s3, a1, a2
INSTR s4, a1, a2
INSTR s5, a1, a2
INSTR s6, a1, a2
INSTR s7, a1, a2
INSTR s8, a1, a2
blt t0, N, loop

140
src/BASE-RISC-V/addi-TP.S Normal file
View File

@@ -0,0 +1,140 @@
#define INSTR addi
#define NINST 80
#define N a0
.globl ninst
.data
ninst:
.long NINST
.text
.globl latency
.type latency, @function
.align 4
latency:
# Save callee-save registers (RISC-V ABI)
addi sp, sp, -12*8 # Make space for 12 registers (8 bytes each)
sd s0, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
sd s6, 48(sp)
sd s7, 56(sp)
sd s8, 64(sp)
sd s9, 72(sp)
sd s10, 80(sp)
sd s11, 88(sp)
# Initialize loop counter
li t0, 0
blez N, done
loop:
addi t0, t0, 1 # i++
INSTR a3, a1, 2
INSTR a4, a1, 2
INSTR a5, a1, 2
INSTR a6, a1, 2
INSTR a7, a1, 2
INSTR t1, a1, 2
INSTR t2, a1, 2
INSTR t3, a1, 2
INSTR t4, a1, 2
INSTR t5, a1, 2
INSTR t6, a1, 2
INSTR s0, a1, 2
INSTR s1, a1, 2
INSTR s2, a1, 2
INSTR s3, a1, 2
INSTR s4, a1, 2
INSTR s5, a1, 2
INSTR s6, a1, 2
INSTR s7, a1, 2
INSTR s8, a1, 2
INSTR a3, a1, 2
INSTR a4, a1, 2
INSTR a5, a1, 2
INSTR a6, a1, 2
INSTR a7, a1, 2
INSTR t1, a1, 2
INSTR t2, a1, 2
INSTR t3, a1, 2
INSTR t4, a1, 2
INSTR t5, a1, 2
INSTR t6, a1, 2
INSTR s0, a1, 2
INSTR s1, a1, 2
INSTR s2, a1, 2
INSTR s3, a1, 2
INSTR s4, a1, 2
INSTR s5, a1, 2
INSTR s6, a1, 2
INSTR s7, a1, 2
INSTR s8, a1, 2
INSTR a3, a1, 2
INSTR a4, a1, 2
INSTR a5, a1, 2
INSTR a6, a1, 2
INSTR a7, a1, 2
INSTR t1, a1, 2
INSTR t2, a1, 2
INSTR t3, a1, 2
INSTR t4, a1, 2
INSTR t5, a1, 2
INSTR t6, a1, 2
INSTR s0, a1, 2
INSTR s1, a1, 2
INSTR s2, a1, 2
INSTR s3, a1, 2
INSTR s4, a1, 2
INSTR s5, a1, 2
INSTR s6, a1, 2
INSTR s7, a1, 2
INSTR s8, a1, 2
INSTR a3, a1, 2
INSTR a4, a1, 2
INSTR a5, a1, 2
INSTR a6, a1, 2
INSTR a7, a1, 2
INSTR t1, a1, 2
INSTR t2, a1, 2
INSTR t3, a1, 2
INSTR t4, a1, 2
INSTR t5, a1, 2
INSTR t6, a1, 2
INSTR s0, a1, 2
INSTR s1, a1, 2
INSTR s2, a1, 2
INSTR s3, a1, 2
INSTR s4, a1, 2
INSTR s5, a1, 2
INSTR s6, a1, 2
INSTR s7, a1, 2
INSTR s8, a1, 2
blt t0, N, loop
done:
# Restore callee-save registers from stack
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
ld s6, 48(sp)
ld s7, 56(sp)
ld s8, 64(sp)
ld s9, 72(sp)
ld s10, 80(sp)
ld s11, 88(sp)
addi sp, sp, 12*8
ret
.size latency, .-latency

View File

@@ -1,5 +1,5 @@
#define INSTR ld
#define NINST 22
#define NINST 88
#define N a0
.globl ninst
@@ -59,10 +59,79 @@ loop:
INSTR s5, 120(t1)
INSTR s6, 128(t1)
INSTR s7, 136(t1)
INSTR s8, 142(t1)
INSTR s9, 150(t1)
INSTR s10, 158(t1)
INSTR s11, 164(t1)
INSTR s8, 144(t1)
INSTR s9, 152(t1)
INSTR s10, 160(t1)
INSTR s11, 168(t1)
INSTR a3, 0(t1)
INSTR a4, 8(t1)
INSTR a5, 16(t1)
INSTR a6, 24(t1)
INSTR a7, 32(t1)
INSTR t2, 40(t1)
INSTR t3, 48(t1)
INSTR t4, 56(t1)
INSTR t5, 64(t1)
INSTR t6, 72(t1)
INSTR s0, 80(t1)
INSTR s1, 88(t1)
INSTR s2, 96(t1)
INSTR s3, 104(t1)
INSTR s4, 112(t1)
INSTR s5, 120(t1)
INSTR s6, 128(t1)
INSTR s7, 136(t1)
INSTR s8, 144(t1)
INSTR s9, 152(t1)
INSTR s10, 160(t1)
INSTR s11, 168(t1)
INSTR a3, 0(t1)
INSTR a4, 8(t1)
INSTR a5, 16(t1)
INSTR a6, 24(t1)
INSTR a7, 32(t1)
INSTR t2, 40(t1)
INSTR t3, 48(t1)
INSTR t4, 56(t1)
INSTR t5, 64(t1)
INSTR t6, 72(t1)
INSTR s0, 80(t1)
INSTR s1, 88(t1)
INSTR s2, 96(t1)
INSTR s3, 104(t1)
INSTR s4, 112(t1)
INSTR s5, 120(t1)
INSTR s6, 128(t1)
INSTR s7, 136(t1)
INSTR s8, 144(t1)
INSTR s9, 152(t1)
INSTR s10, 160(t1)
INSTR s11, 168(t1)
INSTR a3, 0(t1)
INSTR a4, 8(t1)
INSTR a5, 16(t1)
INSTR a6, 24(t1)
INSTR a7, 32(t1)
INSTR t2, 40(t1)
INSTR t3, 48(t1)
INSTR t4, 56(t1)
INSTR t5, 64(t1)
INSTR t6, 72(t1)
INSTR s0, 80(t1)
INSTR s1, 88(t1)
INSTR s2, 96(t1)
INSTR s3, 104(t1)
INSTR s4, 112(t1)
INSTR s5, 120(t1)
INSTR s6, 128(t1)
INSTR s7, 136(t1)
INSTR s8, 144(t1)
INSTR s9, 152(t1)
INSTR s10, 160(t1)
INSTR s11, 168(t1)
blt t0, N, loop

View File

@@ -0,0 +1,155 @@
#define INSTR ld
#define NINST 44
#define N a0
.globl ninst
.data
ninst:
.long NINST
.align 4
PI:
.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, \
0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, \
0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, \
0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9
.text
.globl latency
.type latency, @function
.align 4
latency:
# Save callee-save registers (RISC-V ABI)
addi sp, sp, -12*8 # Make space for 12 registers (8 bytes each)
sd s0, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
sd s6, 48(sp)
sd s7, 56(sp)
sd s8, 64(sp)
sd s9, 72(sp)
sd s10, 80(sp)
sd s11, 88(sp)
la t1, PI
# Initialize loop counter
li t0, 0
blez N, done
loop:
addi t0, t0, 1 # i++
INSTR a3, 0(t1)
add a4, a1, a2
INSTR a5, 16(t1)
add a6, a1, a2
INSTR a7, 32(t1)
add a7, a1, a2
INSTR t3, 48(t1)
add t4, a1, a2
INSTR t5, 64(t1)
add t6, a1, a2
INSTR s0, 80(t1)
add s1, a1, a2
INSTR s2, 96(t1)
add s3, a1, a2
INSTR s4, 112(t1)
add s5, a1, a2
INSTR s6, 128(t1)
add s7, a1, a2
INSTR s8, 144(t1)
add s9, a1, a2
INSTR s10, 160(t1)
add s11, a1, a2
INSTR a3, 0(t1)
add a4, a1, a2
INSTR a5, 16(t1)
add a6, a1, a2
INSTR a7, 32(t1)
add a7, a1, a2
INSTR t3, 48(t1)
add t4, a1, a2
INSTR t5, 64(t1)
add t6, a1, a2
INSTR s0, 80(t1)
add s1, a1, a2
INSTR s2, 96(t1)
add s3, a1, a2
INSTR s4, 112(t1)
add s5, a1, a2
INSTR s6, 128(t1)
add s7, a1, a2
INSTR s8, 144(t1)
add s9, a1, a2
INSTR s10, 160(t1)
add s11, a1, a2
INSTR a3, 0(t1)
add a4, a1, a2
INSTR a5, 16(t1)
add a6, a1, a2
INSTR a7, 32(t1)
add a7, a1, a2
INSTR t3, 48(t1)
add t4, a1, a2
INSTR t5, 64(t1)
add t6, a1, a2
INSTR s0, 80(t1)
add s1, a1, a2
INSTR s2, 96(t1)
add s3, a1, a2
INSTR s4, 112(t1)
add s5, a1, a2
INSTR s6, 128(t1)
add s7, a1, a2
INSTR s8, 144(t1)
add s9, a1, a2
INSTR s10, 160(t1)
add s11, a1, a2
INSTR a3, 0(t1)
add a4, a1, a2
INSTR a5, 16(t1)
add a6, a1, a2
INSTR a7, 32(t1)
add a7, a1, a2
INSTR t3, 48(t1)
add t4, a1, a2
INSTR t5, 64(t1)
add t6, a1, a2
INSTR s0, 80(t1)
add s1, a1, a2
INSTR s2, 96(t1)
add s3, a1, a2
INSTR s4, 112(t1)
add s5, a1, a2
INSTR s6, 128(t1)
add s7, a1, a2
INSTR s8, 144(t1)
add s9, a1, a2
INSTR s10, 160(t1)
add s11, a1, a2
blt t0, N, loop
done:
# Restore callee-save registers from stack
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
ld s6, 48(sp)
ld s7, 56(sp)
ld s8, 64(sp)
ld s9, 72(sp)
ld s10, 80(sp)
ld s11, 88(sp)
addi sp, sp, 12*8
ret
.size latency, .-latency