mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
Add RISC-V vector add and triad benchmarks with corresponding Makefiles and assembly files
This commit is contained in:
24
examples/add/Makefile
Normal file
24
examples/add/Makefile
Normal file
@@ -0,0 +1,24 @@
|
||||
# Makefile for RISC-V add example
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -O3
|
||||
CFLAGS_VEC = -O3 -march=rv64gcv
|
||||
|
||||
# Default target with -O3
|
||||
all: add_riscv add_riscv_vec
|
||||
|
||||
# Build with -O3 optimization
|
||||
add_riscv: add_riscv.c
|
||||
$(CC) $(CFLAGS) -o add_riscv add_riscv.c
|
||||
$(CC) $(CFLAGS) -S -o add_riscv.s add_riscv.c
|
||||
|
||||
# Build with vector extensions
|
||||
add_riscv_vec: add_riscv.c
|
||||
$(CC) $(CFLAGS_VEC) -o add_riscv_vec add_riscv.c
|
||||
$(CC) $(CFLAGS_VEC) -S -o add_riscv_vec.s add_riscv.c
|
||||
|
||||
# Clean up
|
||||
clean:
|
||||
rm -f add_riscv add_riscv_vec add_riscv.s add_riscv_vec.s
|
||||
|
||||
.PHONY: all clean
|
||||
54
examples/add/add_riscv.c
Normal file
54
examples/add/add_riscv.c
Normal file
@@ -0,0 +1,54 @@
|
||||
// Vector add benchmark for RISC-V testing
|
||||
// a[i] = b[i] + c[i]
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define DTYPE double
|
||||
|
||||
void kernel(DTYPE* a, DTYPE* b, DTYPE* c, const int size)
|
||||
{
|
||||
// OSACA start marker will be added around this loop
|
||||
for(int i=0; i<size; i++) {
|
||||
a[i] = b[i] + c[i];
|
||||
}
|
||||
// OSACA end marker will be added
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int size = 1000;
|
||||
if(argc > 1) {
|
||||
size = atoi(argv[1]);
|
||||
}
|
||||
|
||||
printf("RISC-V Vector add: a[i] = b[i] + c[i], size=%d\n", size);
|
||||
|
||||
// Allocate memory
|
||||
DTYPE* a = (DTYPE*)malloc(size * sizeof(DTYPE));
|
||||
DTYPE* b = (DTYPE*)malloc(size * sizeof(DTYPE));
|
||||
DTYPE* c = (DTYPE*)malloc(size * sizeof(DTYPE));
|
||||
|
||||
// Initialize arrays
|
||||
for(int i=0; i<size; i++) {
|
||||
a[i] = 0.0;
|
||||
b[i] = i;
|
||||
c[i] = i * 2.0;
|
||||
}
|
||||
|
||||
// Run kernel
|
||||
kernel(a, b, c, size);
|
||||
|
||||
// Check result (to prevent optimization)
|
||||
DTYPE checksum = 0.0;
|
||||
for(int i=0; i<size; i++) {
|
||||
checksum += a[i];
|
||||
}
|
||||
printf("Checksum: %f\n", checksum);
|
||||
|
||||
// Cleanup
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
178
examples/riscy_asm_files/add_riscv.s
Normal file
178
examples/riscy_asm_files/add_riscv.s
Normal file
@@ -0,0 +1,178 @@
|
||||
.file "add_riscv.c"
|
||||
.option pic
|
||||
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
|
||||
.attribute unaligned_access, 0
|
||||
.attribute stack_align, 16
|
||||
.text
|
||||
.align 1
|
||||
.globl kernel
|
||||
.type kernel, @function
|
||||
kernel:
|
||||
.LFB22:
|
||||
.cfi_startproc
|
||||
ble a3,zero,.L1
|
||||
slli a3,a3,3
|
||||
add a5,a1,a3
|
||||
.L3:
|
||||
fld fa5,0(a1)
|
||||
fld fa4,0(a2)
|
||||
addi a1,a1,8
|
||||
addi a2,a2,8
|
||||
fadd.d fa5,fa5,fa4
|
||||
addi a0,a0,8
|
||||
fsd fa5,-8(a0)
|
||||
bne a1,a5,.L3
|
||||
.L1:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LFE22:
|
||||
.size kernel, .-kernel
|
||||
.section .rodata.str1.8,"aMS",@progbits,1
|
||||
.align 3
|
||||
.LC0:
|
||||
.string "RISC-V Vector add: a[i] = b[i] + c[i], size=%d\n"
|
||||
.align 3
|
||||
.LC1:
|
||||
.string "Checksum: %f\n"
|
||||
.section .text.startup,"ax",@progbits
|
||||
.align 1
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
.LFB23:
|
||||
.cfi_startproc
|
||||
addi sp,sp,-48
|
||||
.cfi_def_cfa_offset 48
|
||||
sd ra,40(sp)
|
||||
sd s0,32(sp)
|
||||
sd s1,24(sp)
|
||||
sd s2,16(sp)
|
||||
sd s3,8(sp)
|
||||
sd s4,0(sp)
|
||||
.cfi_offset 1, -8
|
||||
.cfi_offset 8, -16
|
||||
.cfi_offset 9, -24
|
||||
.cfi_offset 18, -32
|
||||
.cfi_offset 19, -40
|
||||
.cfi_offset 20, -48
|
||||
li a5,1
|
||||
bgt a0,a5,.L21
|
||||
li a1,1000
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s1,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
li s0,8192
|
||||
mv s3,a0
|
||||
addi s0,s0,-192
|
||||
li s4,1000
|
||||
.L13:
|
||||
slli a5,s4,32
|
||||
srli a2,a5,29
|
||||
li a1,0
|
||||
mv a0,s1
|
||||
call memset@plt
|
||||
mv a4,s2
|
||||
mv a3,s2
|
||||
li a5,0
|
||||
.L9:
|
||||
fcvt.d.w fa5,a5
|
||||
mv a1,a5
|
||||
addiw a5,a5,1
|
||||
fsd fa5,0(a3)
|
||||
addi a3,a3,8
|
||||
bne a5,s4,.L9
|
||||
mv a2,s3
|
||||
mv a3,s3
|
||||
li a5,0
|
||||
.L10:
|
||||
fcvt.d.w fa5,a5
|
||||
mv a0,a5
|
||||
addi a3,a3,8
|
||||
fadd.d fa5,fa5,fa5
|
||||
addiw a5,a5,1
|
||||
fsd fa5,-8(a3)
|
||||
bne a0,a1,.L10
|
||||
mv a5,s1
|
||||
add a1,s2,s0
|
||||
mv a3,s1
|
||||
.L11:
|
||||
fld fa5,0(a4)
|
||||
fld fa4,0(a2)
|
||||
addi a4,a4,8
|
||||
addi a2,a2,8
|
||||
fadd.d fa5,fa5,fa4
|
||||
addi a3,a3,8
|
||||
fsd fa5,-8(a3)
|
||||
bne a4,a1,.L11
|
||||
fmv.d.x fa5,zero
|
||||
add s0,s1,s0
|
||||
.L12:
|
||||
fld fa4,0(a5)
|
||||
addi a5,a5,8
|
||||
fadd.d fa5,fa5,fa4
|
||||
bne a5,s0,.L12
|
||||
.L8:
|
||||
fmv.x.d a1,fa5
|
||||
lla a0,.LC1
|
||||
call printf@plt
|
||||
mv a0,s1
|
||||
call free@plt
|
||||
mv a0,s2
|
||||
call free@plt
|
||||
mv a0,s3
|
||||
call free@plt
|
||||
ld ra,40(sp)
|
||||
.cfi_remember_state
|
||||
.cfi_restore 1
|
||||
ld s0,32(sp)
|
||||
.cfi_restore 8
|
||||
ld s1,24(sp)
|
||||
.cfi_restore 9
|
||||
ld s2,16(sp)
|
||||
.cfi_restore 18
|
||||
ld s3,8(sp)
|
||||
.cfi_restore 19
|
||||
ld s4,0(sp)
|
||||
.cfi_restore 20
|
||||
li a0,0
|
||||
addi sp,sp,48
|
||||
.cfi_def_cfa_offset 0
|
||||
jr ra
|
||||
.L21:
|
||||
.cfi_restore_state
|
||||
ld a0,8(a1)
|
||||
li a2,10
|
||||
li a1,0
|
||||
call strtol@plt
|
||||
sext.w s4,a0
|
||||
mv a1,s4
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
slli s0,s4,3
|
||||
mv a0,s0
|
||||
call malloc@plt
|
||||
mv s1,a0
|
||||
mv a0,s0
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
mv a0,s0
|
||||
call malloc@plt
|
||||
mv s3,a0
|
||||
bgt s4,zero,.L13
|
||||
fmv.d.x fa5,zero
|
||||
j .L8
|
||||
.cfi_endproc
|
||||
.LFE23:
|
||||
.size main, .-main
|
||||
.ident "GCC: (GNU) 14.2.1 20250207"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
225
examples/riscy_asm_files/add_riscv_vec.s
Normal file
225
examples/riscy_asm_files/add_riscv_vec.s
Normal file
@@ -0,0 +1,225 @@
|
||||
.file "add_riscv.c"
|
||||
.option pic
|
||||
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
|
||||
.attribute unaligned_access, 0
|
||||
.attribute stack_align, 16
|
||||
.text
|
||||
.align 1
|
||||
.globl kernel
|
||||
.type kernel, @function
|
||||
kernel:
|
||||
.LFB22:
|
||||
.cfi_startproc
|
||||
ble a3,zero,.L10
|
||||
addiw a5,a3,-1
|
||||
li a4,2
|
||||
bleu a5,a4,.L3
|
||||
addi a5,a2,8
|
||||
addi a4,a1,8
|
||||
sub a5,a0,a5
|
||||
sub a4,a0,a4
|
||||
bgtu a5,a4,.L14
|
||||
.L4:
|
||||
csrr a4,vlenb
|
||||
addi a4,a4,-16
|
||||
bleu a5,a4,.L3
|
||||
.L5:
|
||||
vsetvli a5,a3,e64,m1,ta,ma
|
||||
vle64.v v1,0(a1)
|
||||
vle64.v v2,0(a2)
|
||||
slli a4,a5,3
|
||||
sub a3,a3,a5
|
||||
add a1,a1,a4
|
||||
add a2,a2,a4
|
||||
vfadd.vv v1,v1,v2
|
||||
vse64.v v1,0(a0)
|
||||
add a0,a0,a4
|
||||
bne a3,zero,.L5
|
||||
ret
|
||||
.L3:
|
||||
slli a3,a3,3
|
||||
add a3,a1,a3
|
||||
.L7:
|
||||
fld fa5,0(a1)
|
||||
fld fa4,0(a2)
|
||||
addi a1,a1,8
|
||||
addi a2,a2,8
|
||||
fadd.d fa5,fa5,fa4
|
||||
addi a0,a0,8
|
||||
fsd fa5,-8(a0)
|
||||
bne a1,a3,.L7
|
||||
.L10:
|
||||
ret
|
||||
.L14:
|
||||
mv a5,a4
|
||||
j .L4
|
||||
.cfi_endproc
|
||||
.LFE22:
|
||||
.size kernel, .-kernel
|
||||
.section .rodata.str1.8,"aMS",@progbits,1
|
||||
.align 3
|
||||
.LC0:
|
||||
.string "RISC-V Vector add: a[i] = b[i] + c[i], size=%d\n"
|
||||
.align 3
|
||||
.LC2:
|
||||
.string "Checksum: %f\n"
|
||||
.section .text.startup,"ax",@progbits
|
||||
.align 1
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
.LFB23:
|
||||
.cfi_startproc
|
||||
addi sp,sp,-48
|
||||
.cfi_def_cfa_offset 48
|
||||
sd ra,40(sp)
|
||||
sd s0,32(sp)
|
||||
sd s1,24(sp)
|
||||
sd s2,16(sp)
|
||||
sd s3,8(sp)
|
||||
sd s4,0(sp)
|
||||
.cfi_offset 1, -8
|
||||
.cfi_offset 8, -16
|
||||
.cfi_offset 9, -24
|
||||
.cfi_offset 18, -32
|
||||
.cfi_offset 19, -40
|
||||
.cfi_offset 20, -48
|
||||
li a5,1
|
||||
bgt a0,a5,.L35
|
||||
li a1,1000
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s0,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
li s3,8192
|
||||
mv s1,a0
|
||||
addi s3,s3,-192
|
||||
li s4,1000
|
||||
.L22:
|
||||
slli a5,s4,32
|
||||
srli a2,a5,29
|
||||
li a1,0
|
||||
mv a0,s0
|
||||
call memset@plt
|
||||
mv a4,s2
|
||||
li a5,0
|
||||
.L18:
|
||||
fcvt.d.w fa5,a5
|
||||
addiw a5,a5,1
|
||||
addi a4,a4,8
|
||||
fsd fa5,-8(a4)
|
||||
bne a5,s4,.L18
|
||||
fld fa5,.LC1,a4
|
||||
vsetvli a3,zero,e64,m1,ta,ma
|
||||
mv a2,a5
|
||||
vfmv.v.f v4,fa5
|
||||
vsetvli zero,zero,e32,mf2,ta,ma
|
||||
vid.v v2
|
||||
mv a3,s1
|
||||
.L19:
|
||||
vsetvli a4,a2,e32,mf2,ta,ma
|
||||
vfwcvt.f.x.v v1,v2
|
||||
vsetvli a0,zero,e32,mf2,ta,ma
|
||||
vmv.v.x v3,a4
|
||||
vsetvli zero,a4,e64,m1,ta,ma
|
||||
vfmul.vv v1,v1,v4
|
||||
vsetvli a0,zero,e32,mf2,ta,ma
|
||||
vadd.vv v2,v2,v3
|
||||
vsetvli zero,a4,e64,m1,ta,ma
|
||||
slli a1,a4,3
|
||||
sub a2,a2,a4
|
||||
vse64.v v1,0(a3)
|
||||
add a3,a3,a1
|
||||
bne a2,zero,.L19
|
||||
mv a3,s0
|
||||
mv a0,s1
|
||||
mv a1,s2
|
||||
.L20:
|
||||
vsetvli a4,a5,e64,m1,ta,ma
|
||||
vle64.v v2,0(a1)
|
||||
vle64.v v1,0(a0)
|
||||
slli a2,a4,3
|
||||
sub a5,a5,a4
|
||||
add a1,a1,a2
|
||||
add a0,a0,a2
|
||||
vfadd.vv v1,v1,v2
|
||||
vse64.v v1,0(a3)
|
||||
add a3,a3,a2
|
||||
bne a5,zero,.L20
|
||||
fmv.d.x fa5,zero
|
||||
add s3,s0,s3
|
||||
mv a5,s0
|
||||
.L21:
|
||||
fld fa4,0(a5)
|
||||
addi a5,a5,8
|
||||
fadd.d fa5,fa5,fa4
|
||||
bne s3,a5,.L21
|
||||
.L17:
|
||||
fmv.x.d a1,fa5
|
||||
lla a0,.LC2
|
||||
call printf@plt
|
||||
mv a0,s0
|
||||
call free@plt
|
||||
mv a0,s2
|
||||
call free@plt
|
||||
mv a0,s1
|
||||
call free@plt
|
||||
ld ra,40(sp)
|
||||
.cfi_remember_state
|
||||
.cfi_restore 1
|
||||
ld s0,32(sp)
|
||||
.cfi_restore 8
|
||||
ld s1,24(sp)
|
||||
.cfi_restore 9
|
||||
ld s2,16(sp)
|
||||
.cfi_restore 18
|
||||
ld s3,8(sp)
|
||||
.cfi_restore 19
|
||||
ld s4,0(sp)
|
||||
.cfi_restore 20
|
||||
li a0,0
|
||||
addi sp,sp,48
|
||||
.cfi_def_cfa_offset 0
|
||||
jr ra
|
||||
.L35:
|
||||
.cfi_restore_state
|
||||
ld a0,8(a1)
|
||||
li a2,10
|
||||
li a1,0
|
||||
call strtol@plt
|
||||
sext.w s4,a0
|
||||
mv a1,s4
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
slli s3,s4,3
|
||||
mv a0,s3
|
||||
call malloc@plt
|
||||
mv s0,a0
|
||||
mv a0,s3
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
mv a0,s3
|
||||
call malloc@plt
|
||||
mv s1,a0
|
||||
bgt s4,zero,.L22
|
||||
fmv.d.x fa5,zero
|
||||
j .L17
|
||||
.cfi_endproc
|
||||
.LFE23:
|
||||
.size main, .-main
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 3
|
||||
.LC1:
|
||||
.word 0
|
||||
.word 1073741824
|
||||
.ident "GCC: (GNU) 14.2.1 20250207"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
184
examples/riscy_asm_files/triad.s
Normal file
184
examples/riscy_asm_files/triad.s
Normal file
@@ -0,0 +1,184 @@
|
||||
.file "triad.c"
|
||||
.option pic
|
||||
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
|
||||
.attribute unaligned_access, 0
|
||||
.attribute stack_align, 16
|
||||
.text
|
||||
.align 1
|
||||
.globl kernel
|
||||
.type kernel, @function
|
||||
kernel:
|
||||
.LFB22:
|
||||
.cfi_startproc
|
||||
ble a3,zero,.L1
|
||||
slli a3,a3,3
|
||||
add a5,a1,a3
|
||||
.L3:
|
||||
fld fa5,0(a2)
|
||||
fld fa4,0(a1)
|
||||
addi a1,a1,8
|
||||
addi a2,a2,8
|
||||
fmadd.d fa5,fa5,fa0,fa4
|
||||
addi a0,a0,8
|
||||
fsd fa5,-8(a0)
|
||||
bne a1,a5,.L3
|
||||
.L1:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LFE22:
|
||||
.size kernel, .-kernel
|
||||
.section .rodata.str1.8,"aMS",@progbits,1
|
||||
.align 3
|
||||
.LC0:
|
||||
.string "RISC-V STREAM triad: a[i] = b[i] + s * c[i], size=%d\n"
|
||||
.align 3
|
||||
.LC2:
|
||||
.string "Checksum: %f\n"
|
||||
.section .text.startup,"ax",@progbits
|
||||
.align 1
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
.LFB23:
|
||||
.cfi_startproc
|
||||
addi sp,sp,-48
|
||||
.cfi_def_cfa_offset 48
|
||||
sd ra,40(sp)
|
||||
sd s0,32(sp)
|
||||
sd s1,24(sp)
|
||||
sd s2,16(sp)
|
||||
sd s3,8(sp)
|
||||
sd s4,0(sp)
|
||||
.cfi_offset 1, -8
|
||||
.cfi_offset 8, -16
|
||||
.cfi_offset 9, -24
|
||||
.cfi_offset 18, -32
|
||||
.cfi_offset 19, -40
|
||||
.cfi_offset 20, -48
|
||||
li a5,1
|
||||
bgt a0,a5,.L21
|
||||
li a1,1000
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s1,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
li s0,8192
|
||||
mv s3,a0
|
||||
addi s0,s0,-192
|
||||
li s4,1000
|
||||
.L13:
|
||||
slli a5,s4,32
|
||||
srli a2,a5,29
|
||||
li a1,0
|
||||
mv a0,s1
|
||||
call memset@plt
|
||||
mv a4,s2
|
||||
mv a3,s2
|
||||
li a5,0
|
||||
.L9:
|
||||
fcvt.d.w fa5,a5
|
||||
mv a1,a5
|
||||
addiw a5,a5,1
|
||||
fsd fa5,0(a3)
|
||||
addi a3,a3,8
|
||||
bne a5,s4,.L9
|
||||
mv a2,s3
|
||||
mv a3,s3
|
||||
li a5,0
|
||||
.L10:
|
||||
fcvt.d.w fa5,a5
|
||||
mv a0,a5
|
||||
addi a3,a3,8
|
||||
fadd.d fa5,fa5,fa5
|
||||
addiw a5,a5,1
|
||||
fsd fa5,-8(a3)
|
||||
bne a0,a1,.L10
|
||||
fld fa3,.LC1,a5
|
||||
add a1,s2,s0
|
||||
mv a5,s1
|
||||
mv a3,s1
|
||||
.L11:
|
||||
fld fa5,0(a2)
|
||||
fld fa4,0(a4)
|
||||
addi a4,a4,8
|
||||
addi a2,a2,8
|
||||
fmadd.d fa5,fa5,fa3,fa4
|
||||
addi a3,a3,8
|
||||
fsd fa5,-8(a3)
|
||||
bne a4,a1,.L11
|
||||
fmv.d.x fa5,zero
|
||||
add s0,s1,s0
|
||||
.L12:
|
||||
fld fa4,0(a5)
|
||||
addi a5,a5,8
|
||||
fadd.d fa5,fa5,fa4
|
||||
bne a5,s0,.L12
|
||||
.L8:
|
||||
fmv.x.d a1,fa5
|
||||
lla a0,.LC2
|
||||
call printf@plt
|
||||
mv a0,s1
|
||||
call free@plt
|
||||
mv a0,s2
|
||||
call free@plt
|
||||
mv a0,s3
|
||||
call free@plt
|
||||
ld ra,40(sp)
|
||||
.cfi_remember_state
|
||||
.cfi_restore 1
|
||||
ld s0,32(sp)
|
||||
.cfi_restore 8
|
||||
ld s1,24(sp)
|
||||
.cfi_restore 9
|
||||
ld s2,16(sp)
|
||||
.cfi_restore 18
|
||||
ld s3,8(sp)
|
||||
.cfi_restore 19
|
||||
ld s4,0(sp)
|
||||
.cfi_restore 20
|
||||
li a0,0
|
||||
addi sp,sp,48
|
||||
.cfi_def_cfa_offset 0
|
||||
jr ra
|
||||
.L21:
|
||||
.cfi_restore_state
|
||||
ld a0,8(a1)
|
||||
li a2,10
|
||||
li a1,0
|
||||
call strtol@plt
|
||||
sext.w s4,a0
|
||||
mv a1,s4
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
slli s0,s4,3
|
||||
mv a0,s0
|
||||
call malloc@plt
|
||||
mv s1,a0
|
||||
mv a0,s0
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
mv a0,s0
|
||||
call malloc@plt
|
||||
mv s3,a0
|
||||
bgt s4,zero,.L13
|
||||
fmv.d.x fa5,zero
|
||||
j .L8
|
||||
.cfi_endproc
|
||||
.LFE23:
|
||||
.size main, .-main
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 3
|
||||
.LC1:
|
||||
.word 1374389535
|
||||
.word 1074339512
|
||||
.ident "GCC: (GNU) 14.2.1 20250207"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
234
examples/riscy_asm_files/triad_vec.s
Normal file
234
examples/riscy_asm_files/triad_vec.s
Normal file
@@ -0,0 +1,234 @@
|
||||
.file "triad.c"
|
||||
.option pic
|
||||
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
|
||||
.attribute unaligned_access, 0
|
||||
.attribute stack_align, 16
|
||||
.text
|
||||
.align 1
|
||||
.globl kernel
|
||||
.type kernel, @function
|
||||
kernel:
|
||||
.LFB22:
|
||||
.cfi_startproc
|
||||
ble a3,zero,.L10
|
||||
addiw a5,a3,-1
|
||||
li a4,4
|
||||
bleu a5,a4,.L3
|
||||
addi a5,a2,8
|
||||
addi a4,a1,8
|
||||
sub a5,a0,a5
|
||||
sub a4,a0,a4
|
||||
bgtu a5,a4,.L14
|
||||
.L4:
|
||||
csrr a4,vlenb
|
||||
addi a4,a4,-16
|
||||
bleu a5,a4,.L3
|
||||
vsetvli a5,zero,e64,m1,ta,ma
|
||||
vfmv.v.f v3,fa0
|
||||
.L5:
|
||||
vsetvli a5,a3,e64,m1,ta,ma
|
||||
vle64.v v2,0(a1)
|
||||
vle64.v v1,0(a2)
|
||||
slli a4,a5,3
|
||||
sub a3,a3,a5
|
||||
add a1,a1,a4
|
||||
add a2,a2,a4
|
||||
vfmadd.vv v1,v3,v2
|
||||
vse64.v v1,0(a0)
|
||||
add a0,a0,a4
|
||||
bne a3,zero,.L5
|
||||
ret
|
||||
.L3:
|
||||
slli a3,a3,3
|
||||
add a3,a1,a3
|
||||
.L7:
|
||||
fld fa5,0(a2)
|
||||
fld fa4,0(a1)
|
||||
addi a1,a1,8
|
||||
addi a2,a2,8
|
||||
fmadd.d fa5,fa0,fa5,fa4
|
||||
addi a0,a0,8
|
||||
fsd fa5,-8(a0)
|
||||
bne a3,a1,.L7
|
||||
.L10:
|
||||
ret
|
||||
.L14:
|
||||
mv a5,a4
|
||||
j .L4
|
||||
.cfi_endproc
|
||||
.LFE22:
|
||||
.size kernel, .-kernel
|
||||
.section .rodata.str1.8,"aMS",@progbits,1
|
||||
.align 3
|
||||
.LC0:
|
||||
.string "RISC-V STREAM triad: a[i] = b[i] + s * c[i], size=%d\n"
|
||||
.align 3
|
||||
.LC3:
|
||||
.string "Checksum: %f\n"
|
||||
.section .text.startup,"ax",@progbits
|
||||
.align 1
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
.LFB23:
|
||||
.cfi_startproc
|
||||
addi sp,sp,-48
|
||||
.cfi_def_cfa_offset 48
|
||||
sd ra,40(sp)
|
||||
sd s0,32(sp)
|
||||
sd s1,24(sp)
|
||||
sd s2,16(sp)
|
||||
sd s3,8(sp)
|
||||
sd s4,0(sp)
|
||||
.cfi_offset 1, -8
|
||||
.cfi_offset 8, -16
|
||||
.cfi_offset 9, -24
|
||||
.cfi_offset 18, -32
|
||||
.cfi_offset 19, -40
|
||||
.cfi_offset 20, -48
|
||||
li a5,1
|
||||
bgt a0,a5,.L35
|
||||
li a1,1000
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s0,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
li a0,8192
|
||||
addi a0,a0,-192
|
||||
call malloc@plt
|
||||
li s3,8192
|
||||
mv s1,a0
|
||||
addi s3,s3,-192
|
||||
li s4,1000
|
||||
.L22:
|
||||
slli a5,s4,32
|
||||
srli a2,a5,29
|
||||
li a1,0
|
||||
mv a0,s0
|
||||
call memset@plt
|
||||
mv a4,s2
|
||||
li a5,0
|
||||
.L18:
|
||||
fcvt.d.w fa5,a5
|
||||
addiw a5,a5,1
|
||||
addi a4,a4,8
|
||||
fsd fa5,-8(a4)
|
||||
bne a5,s4,.L18
|
||||
fld fa5,.LC1,a4
|
||||
vsetvli a3,zero,e64,m1,ta,ma
|
||||
mv a2,a5
|
||||
vfmv.v.f v4,fa5
|
||||
vsetvli zero,zero,e32,mf2,ta,ma
|
||||
vid.v v2
|
||||
mv a3,s1
|
||||
.L19:
|
||||
vsetvli a4,a2,e32,mf2,ta,ma
|
||||
vfwcvt.f.x.v v1,v2
|
||||
vsetvli a0,zero,e32,mf2,ta,ma
|
||||
vmv.v.x v3,a4
|
||||
vsetvli zero,a4,e64,m1,ta,ma
|
||||
vfmul.vv v1,v1,v4
|
||||
vsetvli a0,zero,e32,mf2,ta,ma
|
||||
vadd.vv v2,v2,v3
|
||||
vsetvli zero,a4,e64,m1,ta,ma
|
||||
slli a1,a4,3
|
||||
sub a2,a2,a4
|
||||
vse64.v v1,0(a3)
|
||||
add a3,a3,a1
|
||||
bne a2,zero,.L19
|
||||
fld fa5,.LC2,a4
|
||||
vsetvli a0,zero,e64,m1,ta,ma
|
||||
mv a3,s0
|
||||
vfmv.v.f v3,fa5
|
||||
mv a0,s1
|
||||
mv a1,s2
|
||||
.L20:
|
||||
vsetvli a4,a5,e64,m1,ta,ma
|
||||
vle64.v v2,0(a1)
|
||||
vle64.v v1,0(a0)
|
||||
slli a2,a4,3
|
||||
sub a5,a5,a4
|
||||
add a1,a1,a2
|
||||
add a0,a0,a2
|
||||
vfmadd.vv v1,v3,v2
|
||||
vse64.v v1,0(a3)
|
||||
add a3,a3,a2
|
||||
bne a5,zero,.L20
|
||||
fmv.d.x fa5,zero
|
||||
add s3,s0,s3
|
||||
mv a5,s0
|
||||
.L21:
|
||||
fld fa4,0(a5)
|
||||
addi a5,a5,8
|
||||
fadd.d fa5,fa5,fa4
|
||||
bne s3,a5,.L21
|
||||
.L17:
|
||||
fmv.x.d a1,fa5
|
||||
lla a0,.LC3
|
||||
call printf@plt
|
||||
mv a0,s0
|
||||
call free@plt
|
||||
mv a0,s2
|
||||
call free@plt
|
||||
mv a0,s1
|
||||
call free@plt
|
||||
ld ra,40(sp)
|
||||
.cfi_remember_state
|
||||
.cfi_restore 1
|
||||
ld s0,32(sp)
|
||||
.cfi_restore 8
|
||||
ld s1,24(sp)
|
||||
.cfi_restore 9
|
||||
ld s2,16(sp)
|
||||
.cfi_restore 18
|
||||
ld s3,8(sp)
|
||||
.cfi_restore 19
|
||||
ld s4,0(sp)
|
||||
.cfi_restore 20
|
||||
li a0,0
|
||||
addi sp,sp,48
|
||||
.cfi_def_cfa_offset 0
|
||||
jr ra
|
||||
.L35:
|
||||
.cfi_restore_state
|
||||
ld a0,8(a1)
|
||||
li a2,10
|
||||
li a1,0
|
||||
call strtol@plt
|
||||
sext.w s4,a0
|
||||
mv a1,s4
|
||||
lla a0,.LC0
|
||||
call printf@plt
|
||||
slli s3,s4,3
|
||||
mv a0,s3
|
||||
call malloc@plt
|
||||
mv s0,a0
|
||||
mv a0,s3
|
||||
call malloc@plt
|
||||
mv s2,a0
|
||||
mv a0,s3
|
||||
call malloc@plt
|
||||
mv s1,a0
|
||||
bgt s4,zero,.L22
|
||||
fmv.d.x fa5,zero
|
||||
j .L17
|
||||
.cfi_endproc
|
||||
.LFE23:
|
||||
.size main, .-main
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 3
|
||||
.LC1:
|
||||
.word 0
|
||||
.word 1073741824
|
||||
.align 3
|
||||
.LC2:
|
||||
.word 1374389535
|
||||
.word 1074339512
|
||||
.ident "GCC: (GNU) 14.2.1 20250207"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
24
examples/triad/Makefile
Normal file
24
examples/triad/Makefile
Normal file
@@ -0,0 +1,24 @@
|
||||
# Makefile for RISC-V triad example
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -O3
|
||||
CFLAGS_VEC = -O3 -march=rv64gcv
|
||||
|
||||
# Default target with -O3
|
||||
all: triad triad_vec
|
||||
|
||||
# Build with -O3 optimization
|
||||
triad: triad.c
|
||||
$(CC) $(CFLAGS) -o triad triad.c
|
||||
$(CC) $(CFLAGS) -S -o triad.s triad.c
|
||||
|
||||
# Build with vector extensions
|
||||
triad_vec: triad.c
|
||||
$(CC) $(CFLAGS_VEC) -o triad_vec triad.c
|
||||
$(CC) $(CFLAGS_VEC) -S -o triad_vec.s triad.c
|
||||
|
||||
# Clean up
|
||||
clean:
|
||||
rm -f triad triad_vec triad.s triad_vec.s
|
||||
|
||||
.PHONY: all clean
|
||||
55
examples/triad/triad.c
Normal file
55
examples/triad/triad.c
Normal file
@@ -0,0 +1,55 @@
|
||||
// STREAM triad benchmark for RISC-V testing
|
||||
// a[i] = b[i] + s * c[i]
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define DTYPE double
|
||||
|
||||
void kernel(DTYPE* a, DTYPE* b, DTYPE* c, const DTYPE s, const int size)
|
||||
{
|
||||
// OSACA start marker will be added around this loop
|
||||
for(int i=0; i<size; i++) {
|
||||
a[i] = b[i] + s * c[i];
|
||||
}
|
||||
// OSACA end marker will be added
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int size = 1000;
|
||||
if(argc > 1) {
|
||||
size = atoi(argv[1]);
|
||||
}
|
||||
|
||||
printf("RISC-V STREAM triad: a[i] = b[i] + s * c[i], size=%d\n", size);
|
||||
|
||||
// Allocate memory
|
||||
DTYPE* a = (DTYPE*)malloc(size * sizeof(DTYPE));
|
||||
DTYPE* b = (DTYPE*)malloc(size * sizeof(DTYPE));
|
||||
DTYPE* c = (DTYPE*)malloc(size * sizeof(DTYPE));
|
||||
|
||||
// Initialize arrays
|
||||
for(int i=0; i<size; i++) {
|
||||
a[i] = 0.0;
|
||||
b[i] = i;
|
||||
c[i] = i * 2.0;
|
||||
}
|
||||
|
||||
// Run kernel
|
||||
DTYPE scalar = 3.14;
|
||||
kernel(a, b, c, scalar, size);
|
||||
|
||||
// Check result (to prevent optimization)
|
||||
DTYPE checksum = 0.0;
|
||||
for(int i=0; i<size; i++) {
|
||||
checksum += a[i];
|
||||
}
|
||||
printf("Checksum: %f\n", checksum);
|
||||
|
||||
// Cleanup
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
osaca_version: 0.6.1
|
||||
osaca_version: 0.7.0
|
||||
isa: riscv
|
||||
# Contains all operand-irregular instruction forms OSACA supports for RISC-V.
|
||||
# Operand-regular for a RISC-V instruction form with N operands in the shape of
|
||||
@@ -8,259 +8,6 @@ isa: riscv
|
||||
# For vector instructions with suffixes (.v, .vv, .vf), the operand behavior follows
|
||||
# the base instruction pattern.
|
||||
instruction_forms:
|
||||
- name: addi
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: immediate
|
||||
imd: int
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: add
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: sub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: mul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: div
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: and
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: or
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: xor
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: sll
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: srl
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: sra
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: false
|
||||
destination: true
|
||||
- class: flag
|
||||
name: N
|
||||
source: false
|
||||
destination: true
|
||||
- name: lw
|
||||
operands:
|
||||
- class: register
|
||||
@@ -364,11 +111,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: true
|
||||
destination: false
|
||||
- name: bne
|
||||
operands:
|
||||
- class: register
|
||||
@@ -382,11 +124,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: Z
|
||||
source: true
|
||||
destination: false
|
||||
- name: blt
|
||||
operands:
|
||||
- class: register
|
||||
@@ -400,11 +137,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: N
|
||||
source: true
|
||||
destination: false
|
||||
- name: bge
|
||||
operands:
|
||||
- class: register
|
||||
@@ -418,11 +150,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: N
|
||||
source: true
|
||||
destination: false
|
||||
- name: jal
|
||||
operands:
|
||||
- class: register
|
||||
@@ -432,67 +159,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
- name: jalr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- class: immediate
|
||||
imd: int
|
||||
source: true
|
||||
destination: false
|
||||
- name: lui
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: immediate
|
||||
imd: int
|
||||
source: true
|
||||
destination: false
|
||||
- name: auipc
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: immediate
|
||||
imd: int
|
||||
source: true
|
||||
destination: false
|
||||
- name: li
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: immediate
|
||||
imd: int
|
||||
source: true
|
||||
destination: false
|
||||
- name: mv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
- name: ret
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
name: '1' # x1 is the return address register (ra)
|
||||
source: true
|
||||
destination: false
|
||||
- name: j
|
||||
operands:
|
||||
- class: identifier
|
||||
@@ -534,82 +200,6 @@ instruction_forms:
|
||||
post_indexed: '*'
|
||||
source: false
|
||||
destination: true
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: f
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: FSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: f
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: FSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: fmul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: f
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: FSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: fdiv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: f
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: FSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: fmv.x.w
|
||||
operands:
|
||||
- class: register
|
||||
@@ -652,11 +242,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: false
|
||||
destination: true
|
||||
- name: vsetivli
|
||||
operands:
|
||||
- class: register
|
||||
@@ -673,11 +258,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: false
|
||||
destination: true
|
||||
- name: vle32.v
|
||||
operands:
|
||||
- class: register
|
||||
@@ -693,11 +273,6 @@ instruction_forms:
|
||||
post_indexed: '*'
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: vse32.v
|
||||
operands:
|
||||
- class: register
|
||||
@@ -713,30 +288,6 @@ instruction_forms:
|
||||
post_indexed: '*'
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: vadd.vv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: v
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: v
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmv.v.f
|
||||
operands:
|
||||
- class: register
|
||||
@@ -747,30 +298,6 @@ instruction_forms:
|
||||
prefix: f
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd.vv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
source: false
|
||||
destination: true
|
||||
- class: register
|
||||
prefix: v
|
||||
source: true
|
||||
destination: false
|
||||
- class: register
|
||||
prefix: v
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmacc.vf
|
||||
operands:
|
||||
- class: register
|
||||
@@ -785,12 +312,7 @@ instruction_forms:
|
||||
prefix: v
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: VSR
|
||||
source: true
|
||||
destination: true
|
||||
# CSR instructions
|
||||
# CSR instructions
|
||||
- name: csrr
|
||||
operands:
|
||||
- class: register
|
||||
@@ -800,11 +322,6 @@ instruction_forms:
|
||||
- class: identifier
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: CSR
|
||||
source: true
|
||||
destination: false
|
||||
- name: csrw
|
||||
operands:
|
||||
- class: identifier
|
||||
@@ -814,11 +331,6 @@ instruction_forms:
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: CSR
|
||||
source: false
|
||||
destination: true
|
||||
- name: csrs
|
||||
operands:
|
||||
- class: identifier
|
||||
@@ -828,11 +340,6 @@ instruction_forms:
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: CSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: csrc
|
||||
operands:
|
||||
- class: identifier
|
||||
@@ -842,12 +349,7 @@ instruction_forms:
|
||||
prefix: x
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: CSR
|
||||
source: true
|
||||
destination: true
|
||||
# Atomic instructions
|
||||
# Atomic instructions
|
||||
- name: lr.w
|
||||
operands:
|
||||
- class: register
|
||||
@@ -863,11 +365,6 @@ instruction_forms:
|
||||
post_indexed: '*'
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: CSR
|
||||
source: true
|
||||
destination: true
|
||||
- name: sc.w
|
||||
operands:
|
||||
- class: register
|
||||
@@ -886,9 +383,4 @@ instruction_forms:
|
||||
pre_indexed: '*'
|
||||
post_indexed: '*'
|
||||
source: true
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: flag
|
||||
name: CSR
|
||||
source: true
|
||||
destination: true
|
||||
destination: true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
osaca_version: 0.6.1
|
||||
osaca_version: 0.7.0
|
||||
micro_architecture: rv64
|
||||
arch_code: rv64
|
||||
isa: riscv
|
||||
@@ -653,4 +653,4 @@ instruction_forms:
|
||||
operands: []
|
||||
latency: 1
|
||||
throughput: 1
|
||||
port_pressure: [[1, ["ALU"]]]
|
||||
port_pressure: [[1, ["ALU"]]]
|
||||
@@ -107,8 +107,8 @@ def create_parser(parser=None):
|
||||
"--arch",
|
||||
type=str,
|
||||
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ICX, SPR, ZEN1, ZEN2, ZEN3, "
|
||||
"ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a "
|
||||
"default uarch for x86/AArch64.",
|
||||
"ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2, RV64). If no architecture is given, OSACA assumes a "
|
||||
"default uarch for the detected ISA.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fixed",
|
||||
@@ -325,15 +325,28 @@ def inspect(args, output_file=sys.stdout):
|
||||
except Exception as e:
|
||||
# probably the wrong parser based on heuristic
|
||||
if args.arch is None:
|
||||
# change ISA and try again
|
||||
arch = (
|
||||
DEFAULT_ARCHS["x86"]
|
||||
if BaseParser.detect_ISA(code) == "aarch64"
|
||||
else DEFAULT_ARCHS["aarch64"]
|
||||
)
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
parser = get_asm_parser(arch)
|
||||
parsed_code = parser.parse_file(code)
|
||||
# Try all supported ISAs in order if auto-detection may have failed
|
||||
detected_isa = BaseParser.detect_ISA(code)
|
||||
fallback_isas = ["x86", "aarch64", "riscv"]
|
||||
# Remove already tried ISA from fallback options
|
||||
if detected_isa in fallback_isas:
|
||||
fallback_isas.remove(detected_isa)
|
||||
|
||||
# Try each remaining ISA until one works
|
||||
for fallback_isa in fallback_isas:
|
||||
try:
|
||||
arch = DEFAULT_ARCHS[fallback_isa]
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
parser = get_asm_parser(arch)
|
||||
parsed_code = parser.parse_file(code)
|
||||
# If parsing succeeds, break out of the loop
|
||||
break
|
||||
except Exception:
|
||||
# Continue trying with next ISA
|
||||
continue
|
||||
else:
|
||||
# If none of the parsers work, raise the original exception
|
||||
raise e
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
@@ -34,12 +34,24 @@ class BaseParser(object):
|
||||
heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"]
|
||||
# 2) check for v and z vector registers and x/w general-purpose registers
|
||||
heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"]
|
||||
matches = {"x86": 0, "aarch64": 0}
|
||||
# 3) check for RISC-V registers (x0-x31, a0-a7, t0-t6, s0-s11) and instructions
|
||||
heuristics_riscv = [
|
||||
r"\bx[0-9]|x[1-2][0-9]|x3[0-1]\b", # x0-x31 registers
|
||||
r"\ba[0-7]\b", # a0-a7 registers
|
||||
r"\bt[0-6]\b", # t0-t6 registers
|
||||
r"\bs[0-9]|s1[0-1]\b", # s0-s11 registers
|
||||
r"\bzero\b|\bra\b|\bsp\b|\bgp\b", # zero, ra, sp, gp registers
|
||||
r"\bvsetvli\b|\bvle\b|\bvse\b", # RV Vector instructions
|
||||
r"\baddi\b|\bsd\b|\bld\b|\bjal\b" # Common RISC-V instructions
|
||||
]
|
||||
matches = {"x86": 0, "aarch64": 0, "riscv": 0}
|
||||
|
||||
for h in heuristics_x86ATT:
|
||||
matches["x86"] += len(re.findall(h, file_content))
|
||||
for h in heuristics_aarch64:
|
||||
matches["aarch64"] += len(re.findall(h, file_content))
|
||||
for h in heuristics_riscv:
|
||||
matches["riscv"] += len(re.findall(h, file_content))
|
||||
|
||||
return max(matches.items(), key=operator.itemgetter(1))[0]
|
||||
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
import os
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
import pyparsing as pp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from osaca.parser import BaseParser
|
||||
from osaca.parser.instruction_form import InstructionForm
|
||||
from osaca.parser.operand import Operand
|
||||
@@ -30,7 +27,32 @@ class ParserRISCV(BaseParser):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.isa = "riscv"
|
||||
# Initialize parser, but don't set 'isa' directly as an attribute
|
||||
self._isa_str = "riscv"
|
||||
|
||||
def isa(self):
|
||||
"""Return the ISA string."""
|
||||
return self._isa_str
|
||||
|
||||
def start_marker(self):
|
||||
"""Return the OSACA start marker for RISC-V assembly."""
|
||||
# Parse the RISC-V start marker (li a1, 111 followed by NOP)
|
||||
# This matches how start marker is defined in marker_utils.py for RISC-V
|
||||
marker_str = (
|
||||
"li a1, 111 # OSACA START MARKER\n"
|
||||
".byte 19,0,0,0 # OSACA START MARKER\n"
|
||||
)
|
||||
return self.parse_file(marker_str)
|
||||
|
||||
def end_marker(self):
|
||||
"""Return the OSACA end marker for RISC-V assembly."""
|
||||
# Parse the RISC-V end marker (li a1, 222 followed by NOP)
|
||||
# This matches how end marker is defined in marker_utils.py for RISC-V
|
||||
marker_str = (
|
||||
"li a1, 222 # OSACA END MARKER\n"
|
||||
".byte 19,0,0,0 # OSACA END MARKER\n"
|
||||
)
|
||||
return self.parse_file(marker_str)
|
||||
|
||||
def construct_parser(self):
|
||||
"""Create parser for RISC-V ISA."""
|
||||
@@ -52,10 +74,15 @@ class ParserRISCV(BaseParser):
|
||||
vector_identifier = pp.Word(pp.alphas, pp.alphanums)
|
||||
special_identifier = pp.Word(pp.alphas + "%")
|
||||
|
||||
# First character of an identifier
|
||||
first = pp.Word(pp.alphas + "_.", exact=1)
|
||||
# Rest of the identifier
|
||||
rest = pp.Word(pp.alphanums + "_.")
|
||||
# PLT suffix (@plt) for calls to shared libraries
|
||||
plt_suffix = pp.Optional(pp.Literal("@") + pp.Word(pp.alphas))
|
||||
|
||||
identifier = pp.Group(
|
||||
pp.Combine(first + pp.Optional(rest)).setResultsName("name")
|
||||
(pp.Combine(first + pp.Optional(rest) + plt_suffix)).setResultsName("name")
|
||||
+ pp.Optional(
|
||||
pp.Suppress(pp.Literal("+"))
|
||||
+ (hex_number | decimal_number).setResultsName("offset")
|
||||
@@ -346,7 +373,6 @@ class ParserRISCV(BaseParser):
|
||||
return return_dict
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error parsing instruction: {instruction} - {str(e)}")
|
||||
# For special vector instructions or ones with % in them
|
||||
if "%" in instruction or instruction.startswith("v"):
|
||||
parts = instruction.split("#")[0].strip().split(None, 1)
|
||||
@@ -640,4 +666,53 @@ class ParserRISCV(BaseParser):
|
||||
elif name.startswith("csr"):
|
||||
return "csr" # Control and Status Register
|
||||
|
||||
return "unknown"
|
||||
return "unknown"
|
||||
|
||||
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
|
||||
"""
|
||||
Normalize instruction form for RISC-V instructions.
|
||||
|
||||
:param instruction_form: instruction form to normalize
|
||||
:param isa_model: ISA model to use for normalization
|
||||
:param arch_model: architecture model to use for normalization
|
||||
"""
|
||||
if instruction_form.normalized:
|
||||
return
|
||||
|
||||
if instruction_form.mnemonic is None:
|
||||
instruction_form.normalized = True
|
||||
return
|
||||
|
||||
# Normalize the mnemonic if needed
|
||||
if instruction_form.mnemonic:
|
||||
# Handle any RISC-V specific mnemonic normalization
|
||||
# For example, convert aliases or pseudo-instructions to their base form
|
||||
pass
|
||||
|
||||
# Normalize the operands if needed
|
||||
for i, operand in enumerate(instruction_form.operands):
|
||||
if isinstance(operand, ImmediateOperand):
|
||||
# Normalize immediate operands
|
||||
instruction_form.operands[i] = self.normalize_imd(operand)
|
||||
elif isinstance(operand, RegisterOperand):
|
||||
# Convert register names to canonical form if needed
|
||||
pass
|
||||
|
||||
instruction_form.normalized = True
|
||||
|
||||
def get_regular_source_operands(self, instruction_form):
|
||||
"""Get source operand of given instruction form assuming regular src/dst behavior."""
|
||||
# For RISC-V, the first operand is typically the destination,
|
||||
# and the rest are sources
|
||||
if len(instruction_form.operands) == 1:
|
||||
return [instruction_form.operands[0]]
|
||||
else:
|
||||
return [op for op in instruction_form.operands[1:]]
|
||||
|
||||
def get_regular_destination_operands(self, instruction_form):
|
||||
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
|
||||
# For RISC-V, the first operand is typically the destination
|
||||
if len(instruction_form.operands) == 1:
|
||||
return []
|
||||
else:
|
||||
return instruction_form.operands[:1]
|
||||
Reference in New Issue
Block a user