Add RISC-V vector add and triad benchmarks with corresponding Makefiles and assembly files

This commit is contained in:
Metehan Dundar
2025-05-08 11:57:06 +02:00
parent d782f06e84
commit aa3753d024
13 changed files with 1103 additions and 533 deletions

24
examples/add/Makefile Normal file
View File

@@ -0,0 +1,24 @@
# Makefile for RISC-V add example
CC = gcc
CFLAGS = -O3
CFLAGS_VEC = -O3 -march=rv64gcv
# Default target with -O3
all: add_riscv add_riscv_vec
# Build with -O3 optimization
add_riscv: add_riscv.c
$(CC) $(CFLAGS) -o add_riscv add_riscv.c
$(CC) $(CFLAGS) -S -o add_riscv.s add_riscv.c
# Build with vector extensions
add_riscv_vec: add_riscv.c
$(CC) $(CFLAGS_VEC) -o add_riscv_vec add_riscv.c
$(CC) $(CFLAGS_VEC) -S -o add_riscv_vec.s add_riscv.c
# Clean up
clean:
rm -f add_riscv add_riscv_vec add_riscv.s add_riscv_vec.s
.PHONY: all clean

54
examples/add/add_riscv.c Normal file
View File

@@ -0,0 +1,54 @@
// Vector add benchmark for RISC-V testing
// a[i] = b[i] + c[i]
#include <stdio.h>
#include <stdlib.h>
#define DTYPE double
void kernel(DTYPE* a, DTYPE* b, DTYPE* c, const int size)
{
// OSACA start marker will be added around this loop
for(int i=0; i<size; i++) {
a[i] = b[i] + c[i];
}
// OSACA end marker will be added
}
int main(int argc, char *argv[]) {
int size = 1000;
if(argc > 1) {
size = atoi(argv[1]);
}
printf("RISC-V Vector add: a[i] = b[i] + c[i], size=%d\n", size);
// Allocate memory
DTYPE* a = (DTYPE*)malloc(size * sizeof(DTYPE));
DTYPE* b = (DTYPE*)malloc(size * sizeof(DTYPE));
DTYPE* c = (DTYPE*)malloc(size * sizeof(DTYPE));
// Initialize arrays
for(int i=0; i<size; i++) {
a[i] = 0.0;
b[i] = i;
c[i] = i * 2.0;
}
// Run kernel
kernel(a, b, c, size);
// Check result (to prevent optimization)
DTYPE checksum = 0.0;
for(int i=0; i<size; i++) {
checksum += a[i];
}
printf("Checksum: %f\n", checksum);
// Cleanup
free(a);
free(b);
free(c);
return 0;
}

View File

@@ -0,0 +1,178 @@
.file "add_riscv.c"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl kernel
.type kernel, @function
kernel:
.LFB22:
.cfi_startproc
ble a3,zero,.L1
slli a3,a3,3
add a5,a1,a3
.L3:
fld fa5,0(a1)
fld fa4,0(a2)
addi a1,a1,8
addi a2,a2,8
fadd.d fa5,fa5,fa4
addi a0,a0,8
fsd fa5,-8(a0)
bne a1,a5,.L3
.L1:
ret
.cfi_endproc
.LFE22:
.size kernel, .-kernel
.section .rodata.str1.8,"aMS",@progbits,1
.align 3
.LC0:
.string "RISC-V Vector add: a[i] = b[i] + c[i], size=%d\n"
.align 3
.LC1:
.string "Checksum: %f\n"
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
.LFB23:
.cfi_startproc
addi sp,sp,-48
.cfi_def_cfa_offset 48
sd ra,40(sp)
sd s0,32(sp)
sd s1,24(sp)
sd s2,16(sp)
sd s3,8(sp)
sd s4,0(sp)
.cfi_offset 1, -8
.cfi_offset 8, -16
.cfi_offset 9, -24
.cfi_offset 18, -32
.cfi_offset 19, -40
.cfi_offset 20, -48
li a5,1
bgt a0,a5,.L21
li a1,1000
lla a0,.LC0
call printf@plt
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s1,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s2,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
li s0,8192
mv s3,a0
addi s0,s0,-192
li s4,1000
.L13:
slli a5,s4,32
srli a2,a5,29
li a1,0
mv a0,s1
call memset@plt
mv a4,s2
mv a3,s2
li a5,0
.L9:
fcvt.d.w fa5,a5
mv a1,a5
addiw a5,a5,1
fsd fa5,0(a3)
addi a3,a3,8
bne a5,s4,.L9
mv a2,s3
mv a3,s3
li a5,0
.L10:
fcvt.d.w fa5,a5
mv a0,a5
addi a3,a3,8
fadd.d fa5,fa5,fa5
addiw a5,a5,1
fsd fa5,-8(a3)
bne a0,a1,.L10
mv a5,s1
add a1,s2,s0
mv a3,s1
.L11:
fld fa5,0(a4)
fld fa4,0(a2)
addi a4,a4,8
addi a2,a2,8
fadd.d fa5,fa5,fa4
addi a3,a3,8
fsd fa5,-8(a3)
bne a4,a1,.L11
fmv.d.x fa5,zero
add s0,s1,s0
.L12:
fld fa4,0(a5)
addi a5,a5,8
fadd.d fa5,fa5,fa4
bne a5,s0,.L12
.L8:
fmv.x.d a1,fa5
lla a0,.LC1
call printf@plt
mv a0,s1
call free@plt
mv a0,s2
call free@plt
mv a0,s3
call free@plt
ld ra,40(sp)
.cfi_remember_state
.cfi_restore 1
ld s0,32(sp)
.cfi_restore 8
ld s1,24(sp)
.cfi_restore 9
ld s2,16(sp)
.cfi_restore 18
ld s3,8(sp)
.cfi_restore 19
ld s4,0(sp)
.cfi_restore 20
li a0,0
addi sp,sp,48
.cfi_def_cfa_offset 0
jr ra
.L21:
.cfi_restore_state
ld a0,8(a1)
li a2,10
li a1,0
call strtol@plt
sext.w s4,a0
mv a1,s4
lla a0,.LC0
call printf@plt
slli s0,s4,3
mv a0,s0
call malloc@plt
mv s1,a0
mv a0,s0
call malloc@plt
mv s2,a0
mv a0,s0
call malloc@plt
mv s3,a0
bgt s4,zero,.L13
fmv.d.x fa5,zero
j .L8
.cfi_endproc
.LFE23:
.size main, .-main
.ident "GCC: (GNU) 14.2.1 20250207"
.section .note.GNU-stack,"",@progbits

View File

@@ -0,0 +1,225 @@
.file "add_riscv.c"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl kernel
.type kernel, @function
kernel:
.LFB22:
.cfi_startproc
ble a3,zero,.L10
addiw a5,a3,-1
li a4,2
bleu a5,a4,.L3
addi a5,a2,8
addi a4,a1,8
sub a5,a0,a5
sub a4,a0,a4
bgtu a5,a4,.L14
.L4:
csrr a4,vlenb
addi a4,a4,-16
bleu a5,a4,.L3
.L5:
vsetvli a5,a3,e64,m1,ta,ma
vle64.v v1,0(a1)
vle64.v v2,0(a2)
slli a4,a5,3
sub a3,a3,a5
add a1,a1,a4
add a2,a2,a4
vfadd.vv v1,v1,v2
vse64.v v1,0(a0)
add a0,a0,a4
bne a3,zero,.L5
ret
.L3:
slli a3,a3,3
add a3,a1,a3
.L7:
fld fa5,0(a1)
fld fa4,0(a2)
addi a1,a1,8
addi a2,a2,8
fadd.d fa5,fa5,fa4
addi a0,a0,8
fsd fa5,-8(a0)
bne a1,a3,.L7
.L10:
ret
.L14:
mv a5,a4
j .L4
.cfi_endproc
.LFE22:
.size kernel, .-kernel
.section .rodata.str1.8,"aMS",@progbits,1
.align 3
.LC0:
.string "RISC-V Vector add: a[i] = b[i] + c[i], size=%d\n"
.align 3
.LC2:
.string "Checksum: %f\n"
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
.LFB23:
.cfi_startproc
addi sp,sp,-48
.cfi_def_cfa_offset 48
sd ra,40(sp)
sd s0,32(sp)
sd s1,24(sp)
sd s2,16(sp)
sd s3,8(sp)
sd s4,0(sp)
.cfi_offset 1, -8
.cfi_offset 8, -16
.cfi_offset 9, -24
.cfi_offset 18, -32
.cfi_offset 19, -40
.cfi_offset 20, -48
li a5,1
bgt a0,a5,.L35
li a1,1000
lla a0,.LC0
call printf@plt
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s0,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s2,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
li s3,8192
mv s1,a0
addi s3,s3,-192
li s4,1000
.L22:
slli a5,s4,32
srli a2,a5,29
li a1,0
mv a0,s0
call memset@plt
mv a4,s2
li a5,0
.L18:
fcvt.d.w fa5,a5
addiw a5,a5,1
addi a4,a4,8
fsd fa5,-8(a4)
bne a5,s4,.L18
fld fa5,.LC1,a4
vsetvli a3,zero,e64,m1,ta,ma
mv a2,a5
vfmv.v.f v4,fa5
vsetvli zero,zero,e32,mf2,ta,ma
vid.v v2
mv a3,s1
.L19:
vsetvli a4,a2,e32,mf2,ta,ma
vfwcvt.f.x.v v1,v2
vsetvli a0,zero,e32,mf2,ta,ma
vmv.v.x v3,a4
vsetvli zero,a4,e64,m1,ta,ma
vfmul.vv v1,v1,v4
vsetvli a0,zero,e32,mf2,ta,ma
vadd.vv v2,v2,v3
vsetvli zero,a4,e64,m1,ta,ma
slli a1,a4,3
sub a2,a2,a4
vse64.v v1,0(a3)
add a3,a3,a1
bne a2,zero,.L19
mv a3,s0
mv a0,s1
mv a1,s2
.L20:
vsetvli a4,a5,e64,m1,ta,ma
vle64.v v2,0(a1)
vle64.v v1,0(a0)
slli a2,a4,3
sub a5,a5,a4
add a1,a1,a2
add a0,a0,a2
vfadd.vv v1,v1,v2
vse64.v v1,0(a3)
add a3,a3,a2
bne a5,zero,.L20
fmv.d.x fa5,zero
add s3,s0,s3
mv a5,s0
.L21:
fld fa4,0(a5)
addi a5,a5,8
fadd.d fa5,fa5,fa4
bne s3,a5,.L21
.L17:
fmv.x.d a1,fa5
lla a0,.LC2
call printf@plt
mv a0,s0
call free@plt
mv a0,s2
call free@plt
mv a0,s1
call free@plt
ld ra,40(sp)
.cfi_remember_state
.cfi_restore 1
ld s0,32(sp)
.cfi_restore 8
ld s1,24(sp)
.cfi_restore 9
ld s2,16(sp)
.cfi_restore 18
ld s3,8(sp)
.cfi_restore 19
ld s4,0(sp)
.cfi_restore 20
li a0,0
addi sp,sp,48
.cfi_def_cfa_offset 0
jr ra
.L35:
.cfi_restore_state
ld a0,8(a1)
li a2,10
li a1,0
call strtol@plt
sext.w s4,a0
mv a1,s4
lla a0,.LC0
call printf@plt
slli s3,s4,3
mv a0,s3
call malloc@plt
mv s0,a0
mv a0,s3
call malloc@plt
mv s2,a0
mv a0,s3
call malloc@plt
mv s1,a0
bgt s4,zero,.L22
fmv.d.x fa5,zero
j .L17
.cfi_endproc
.LFE23:
.size main, .-main
.section .rodata.cst8,"aM",@progbits,8
.align 3
.LC1:
.word 0
.word 1073741824
.ident "GCC: (GNU) 14.2.1 20250207"
.section .note.GNU-stack,"",@progbits

View File

@@ -0,0 +1,184 @@
.file "triad.c"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl kernel
.type kernel, @function
kernel:
.LFB22:
.cfi_startproc
ble a3,zero,.L1
slli a3,a3,3
add a5,a1,a3
.L3:
fld fa5,0(a2)
fld fa4,0(a1)
addi a1,a1,8
addi a2,a2,8
fmadd.d fa5,fa5,fa0,fa4
addi a0,a0,8
fsd fa5,-8(a0)
bne a1,a5,.L3
.L1:
ret
.cfi_endproc
.LFE22:
.size kernel, .-kernel
.section .rodata.str1.8,"aMS",@progbits,1
.align 3
.LC0:
.string "RISC-V STREAM triad: a[i] = b[i] + s * c[i], size=%d\n"
.align 3
.LC2:
.string "Checksum: %f\n"
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
.LFB23:
.cfi_startproc
addi sp,sp,-48
.cfi_def_cfa_offset 48
sd ra,40(sp)
sd s0,32(sp)
sd s1,24(sp)
sd s2,16(sp)
sd s3,8(sp)
sd s4,0(sp)
.cfi_offset 1, -8
.cfi_offset 8, -16
.cfi_offset 9, -24
.cfi_offset 18, -32
.cfi_offset 19, -40
.cfi_offset 20, -48
li a5,1
bgt a0,a5,.L21
li a1,1000
lla a0,.LC0
call printf@plt
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s1,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s2,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
li s0,8192
mv s3,a0
addi s0,s0,-192
li s4,1000
.L13:
slli a5,s4,32
srli a2,a5,29
li a1,0
mv a0,s1
call memset@plt
mv a4,s2
mv a3,s2
li a5,0
.L9:
fcvt.d.w fa5,a5
mv a1,a5
addiw a5,a5,1
fsd fa5,0(a3)
addi a3,a3,8
bne a5,s4,.L9
mv a2,s3
mv a3,s3
li a5,0
.L10:
fcvt.d.w fa5,a5
mv a0,a5
addi a3,a3,8
fadd.d fa5,fa5,fa5
addiw a5,a5,1
fsd fa5,-8(a3)
bne a0,a1,.L10
fld fa3,.LC1,a5
add a1,s2,s0
mv a5,s1
mv a3,s1
.L11:
fld fa5,0(a2)
fld fa4,0(a4)
addi a4,a4,8
addi a2,a2,8
fmadd.d fa5,fa5,fa3,fa4
addi a3,a3,8
fsd fa5,-8(a3)
bne a4,a1,.L11
fmv.d.x fa5,zero
add s0,s1,s0
.L12:
fld fa4,0(a5)
addi a5,a5,8
fadd.d fa5,fa5,fa4
bne a5,s0,.L12
.L8:
fmv.x.d a1,fa5
lla a0,.LC2
call printf@plt
mv a0,s1
call free@plt
mv a0,s2
call free@plt
mv a0,s3
call free@plt
ld ra,40(sp)
.cfi_remember_state
.cfi_restore 1
ld s0,32(sp)
.cfi_restore 8
ld s1,24(sp)
.cfi_restore 9
ld s2,16(sp)
.cfi_restore 18
ld s3,8(sp)
.cfi_restore 19
ld s4,0(sp)
.cfi_restore 20
li a0,0
addi sp,sp,48
.cfi_def_cfa_offset 0
jr ra
.L21:
.cfi_restore_state
ld a0,8(a1)
li a2,10
li a1,0
call strtol@plt
sext.w s4,a0
mv a1,s4
lla a0,.LC0
call printf@plt
slli s0,s4,3
mv a0,s0
call malloc@plt
mv s1,a0
mv a0,s0
call malloc@plt
mv s2,a0
mv a0,s0
call malloc@plt
mv s3,a0
bgt s4,zero,.L13
fmv.d.x fa5,zero
j .L8
.cfi_endproc
.LFE23:
.size main, .-main
.section .rodata.cst8,"aM",@progbits,8
.align 3
.LC1:
.word 1374389535
.word 1074339512
.ident "GCC: (GNU) 14.2.1 20250207"
.section .note.GNU-stack,"",@progbits

View File

@@ -0,0 +1,234 @@
.file "triad.c"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl kernel
.type kernel, @function
kernel:
.LFB22:
.cfi_startproc
ble a3,zero,.L10
addiw a5,a3,-1
li a4,4
bleu a5,a4,.L3
addi a5,a2,8
addi a4,a1,8
sub a5,a0,a5
sub a4,a0,a4
bgtu a5,a4,.L14
.L4:
csrr a4,vlenb
addi a4,a4,-16
bleu a5,a4,.L3
vsetvli a5,zero,e64,m1,ta,ma
vfmv.v.f v3,fa0
.L5:
vsetvli a5,a3,e64,m1,ta,ma
vle64.v v2,0(a1)
vle64.v v1,0(a2)
slli a4,a5,3
sub a3,a3,a5
add a1,a1,a4
add a2,a2,a4
vfmadd.vv v1,v3,v2
vse64.v v1,0(a0)
add a0,a0,a4
bne a3,zero,.L5
ret
.L3:
slli a3,a3,3
add a3,a1,a3
.L7:
fld fa5,0(a2)
fld fa4,0(a1)
addi a1,a1,8
addi a2,a2,8
fmadd.d fa5,fa0,fa5,fa4
addi a0,a0,8
fsd fa5,-8(a0)
bne a3,a1,.L7
.L10:
ret
.L14:
mv a5,a4
j .L4
.cfi_endproc
.LFE22:
.size kernel, .-kernel
.section .rodata.str1.8,"aMS",@progbits,1
.align 3
.LC0:
.string "RISC-V STREAM triad: a[i] = b[i] + s * c[i], size=%d\n"
.align 3
.LC3:
.string "Checksum: %f\n"
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
.LFB23:
.cfi_startproc
addi sp,sp,-48
.cfi_def_cfa_offset 48
sd ra,40(sp)
sd s0,32(sp)
sd s1,24(sp)
sd s2,16(sp)
sd s3,8(sp)
sd s4,0(sp)
.cfi_offset 1, -8
.cfi_offset 8, -16
.cfi_offset 9, -24
.cfi_offset 18, -32
.cfi_offset 19, -40
.cfi_offset 20, -48
li a5,1
bgt a0,a5,.L35
li a1,1000
lla a0,.LC0
call printf@plt
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s0,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
mv s2,a0
li a0,8192
addi a0,a0,-192
call malloc@plt
li s3,8192
mv s1,a0
addi s3,s3,-192
li s4,1000
.L22:
slli a5,s4,32
srli a2,a5,29
li a1,0
mv a0,s0
call memset@plt
mv a4,s2
li a5,0
.L18:
fcvt.d.w fa5,a5
addiw a5,a5,1
addi a4,a4,8
fsd fa5,-8(a4)
bne a5,s4,.L18
fld fa5,.LC1,a4
vsetvli a3,zero,e64,m1,ta,ma
mv a2,a5
vfmv.v.f v4,fa5
vsetvli zero,zero,e32,mf2,ta,ma
vid.v v2
mv a3,s1
.L19:
vsetvli a4,a2,e32,mf2,ta,ma
vfwcvt.f.x.v v1,v2
vsetvli a0,zero,e32,mf2,ta,ma
vmv.v.x v3,a4
vsetvli zero,a4,e64,m1,ta,ma
vfmul.vv v1,v1,v4
vsetvli a0,zero,e32,mf2,ta,ma
vadd.vv v2,v2,v3
vsetvli zero,a4,e64,m1,ta,ma
slli a1,a4,3
sub a2,a2,a4
vse64.v v1,0(a3)
add a3,a3,a1
bne a2,zero,.L19
fld fa5,.LC2,a4
vsetvli a0,zero,e64,m1,ta,ma
mv a3,s0
vfmv.v.f v3,fa5
mv a0,s1
mv a1,s2
.L20:
vsetvli a4,a5,e64,m1,ta,ma
vle64.v v2,0(a1)
vle64.v v1,0(a0)
slli a2,a4,3
sub a5,a5,a4
add a1,a1,a2
add a0,a0,a2
vfmadd.vv v1,v3,v2
vse64.v v1,0(a3)
add a3,a3,a2
bne a5,zero,.L20
fmv.d.x fa5,zero
add s3,s0,s3
mv a5,s0
.L21:
fld fa4,0(a5)
addi a5,a5,8
fadd.d fa5,fa5,fa4
bne s3,a5,.L21
.L17:
fmv.x.d a1,fa5
lla a0,.LC3
call printf@plt
mv a0,s0
call free@plt
mv a0,s2
call free@plt
mv a0,s1
call free@plt
ld ra,40(sp)
.cfi_remember_state
.cfi_restore 1
ld s0,32(sp)
.cfi_restore 8
ld s1,24(sp)
.cfi_restore 9
ld s2,16(sp)
.cfi_restore 18
ld s3,8(sp)
.cfi_restore 19
ld s4,0(sp)
.cfi_restore 20
li a0,0
addi sp,sp,48
.cfi_def_cfa_offset 0
jr ra
.L35:
.cfi_restore_state
ld a0,8(a1)
li a2,10
li a1,0
call strtol@plt
sext.w s4,a0
mv a1,s4
lla a0,.LC0
call printf@plt
slli s3,s4,3
mv a0,s3
call malloc@plt
mv s0,a0
mv a0,s3
call malloc@plt
mv s2,a0
mv a0,s3
call malloc@plt
mv s1,a0
bgt s4,zero,.L22
fmv.d.x fa5,zero
j .L17
.cfi_endproc
.LFE23:
.size main, .-main
.section .rodata.cst8,"aM",@progbits,8
.align 3
.LC1:
.word 0
.word 1073741824
.align 3
.LC2:
.word 1374389535
.word 1074339512
.ident "GCC: (GNU) 14.2.1 20250207"
.section .note.GNU-stack,"",@progbits

24
examples/triad/Makefile Normal file
View File

@@ -0,0 +1,24 @@
# Makefile for RISC-V triad example
CC = gcc
CFLAGS = -O3
CFLAGS_VEC = -O3 -march=rv64gcv
# Default target with -O3
all: triad triad_vec
# Build with -O3 optimization
triad: triad.c
$(CC) $(CFLAGS) -o triad triad.c
$(CC) $(CFLAGS) -S -o triad.s triad.c
# Build with vector extensions
triad_vec: triad.c
$(CC) $(CFLAGS_VEC) -o triad_vec triad.c
$(CC) $(CFLAGS_VEC) -S -o triad_vec.s triad.c
# Clean up
clean:
rm -f triad triad_vec triad.s triad_vec.s
.PHONY: all clean

55
examples/triad/triad.c Normal file
View File

@@ -0,0 +1,55 @@
// STREAM triad benchmark for RISC-V testing
// a[i] = b[i] + s * c[i]
#include <stdio.h>
#include <stdlib.h>
#define DTYPE double
void kernel(DTYPE* a, DTYPE* b, DTYPE* c, const DTYPE s, const int size)
{
// OSACA start marker will be added around this loop
for(int i=0; i<size; i++) {
a[i] = b[i] + s * c[i];
}
// OSACA end marker will be added
}
int main(int argc, char *argv[]) {
int size = 1000;
if(argc > 1) {
size = atoi(argv[1]);
}
printf("RISC-V STREAM triad: a[i] = b[i] + s * c[i], size=%d\n", size);
// Allocate memory
DTYPE* a = (DTYPE*)malloc(size * sizeof(DTYPE));
DTYPE* b = (DTYPE*)malloc(size * sizeof(DTYPE));
DTYPE* c = (DTYPE*)malloc(size * sizeof(DTYPE));
// Initialize arrays
for(int i=0; i<size; i++) {
a[i] = 0.0;
b[i] = i;
c[i] = i * 2.0;
}
// Run kernel
DTYPE scalar = 3.14;
kernel(a, b, c, scalar, size);
// Check result (to prevent optimization)
DTYPE checksum = 0.0;
for(int i=0; i<size; i++) {
checksum += a[i];
}
printf("Checksum: %f\n", checksum);
// Cleanup
free(a);
free(b);
free(c);
return 0;
}

View File

@@ -1,5 +1,5 @@
---
osaca_version: 0.6.1
osaca_version: 0.7.0
isa: riscv
# Contains all operand-irregular instruction forms OSACA supports for RISC-V.
# Operand-regular for a RISC-V instruction form with N operands in the shape of
@@ -8,259 +8,6 @@ isa: riscv
# For vector instructions with suffixes (.v, .vv, .vf), the operand behavior follows
# the base instruction pattern.
instruction_forms:
- name: addi
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: immediate
imd: int
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: add
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: sub
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: mul
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: div
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: and
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: or
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: xor
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: sll
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: srl
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: sra
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: register
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: false
destination: true
- class: flag
name: N
source: false
destination: true
- name: lw
operands:
- class: register
@@ -364,11 +111,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: true
destination: false
- name: bne
operands:
- class: register
@@ -382,11 +124,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
hidden_operands:
- class: flag
name: Z
source: true
destination: false
- name: blt
operands:
- class: register
@@ -400,11 +137,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
hidden_operands:
- class: flag
name: N
source: true
destination: false
- name: bge
operands:
- class: register
@@ -418,11 +150,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
hidden_operands:
- class: flag
name: N
source: true
destination: false
- name: jal
operands:
- class: register
@@ -432,67 +159,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
- name: jalr
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- class: immediate
imd: int
source: true
destination: false
- name: lui
operands:
- class: register
prefix: x
source: false
destination: true
- class: immediate
imd: int
source: true
destination: false
- name: auipc
operands:
- class: register
prefix: x
source: false
destination: true
- class: immediate
imd: int
source: true
destination: false
- name: li
operands:
- class: register
prefix: x
source: false
destination: true
- class: immediate
imd: int
source: true
destination: false
- name: mv
operands:
- class: register
prefix: x
source: false
destination: true
- class: register
prefix: x
source: true
destination: false
- name: ret
operands:
- class: register
prefix: x
name: '1' # x1 is the return address register (ra)
source: true
destination: false
- name: j
operands:
- class: identifier
@@ -534,82 +200,6 @@ instruction_forms:
post_indexed: '*'
source: false
destination: true
- name: fadd
operands:
- class: register
prefix: f
source: false
destination: true
- class: register
prefix: f
source: true
destination: false
- class: register
prefix: f
source: true
destination: false
hidden_operands:
- class: flag
name: FSR
source: true
destination: true
- name: fsub
operands:
- class: register
prefix: f
source: false
destination: true
- class: register
prefix: f
source: true
destination: false
- class: register
prefix: f
source: true
destination: false
hidden_operands:
- class: flag
name: FSR
source: true
destination: true
- name: fmul
operands:
- class: register
prefix: f
source: false
destination: true
- class: register
prefix: f
source: true
destination: false
- class: register
prefix: f
source: true
destination: false
hidden_operands:
- class: flag
name: FSR
source: true
destination: true
- name: fdiv
operands:
- class: register
prefix: f
source: false
destination: true
- class: register
prefix: f
source: true
destination: false
- class: register
prefix: f
source: true
destination: false
hidden_operands:
- class: flag
name: FSR
source: true
destination: true
- name: fmv.x.w
operands:
- class: register
@@ -652,11 +242,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
hidden_operands:
- class: flag
name: VSR
source: false
destination: true
- name: vsetivli
operands:
- class: register
@@ -673,11 +258,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
hidden_operands:
- class: flag
name: VSR
source: false
destination: true
- name: vle32.v
operands:
- class: register
@@ -693,11 +273,6 @@ instruction_forms:
post_indexed: '*'
source: true
destination: false
hidden_operands:
- class: flag
name: VSR
source: true
destination: true
- name: vse32.v
operands:
- class: register
@@ -713,30 +288,6 @@ instruction_forms:
post_indexed: '*'
source: false
destination: true
hidden_operands:
- class: flag
name: VSR
source: true
destination: true
- name: vadd.vv
operands:
- class: register
prefix: v
source: false
destination: true
- class: register
prefix: v
source: true
destination: false
- class: register
prefix: v
source: true
destination: false
hidden_operands:
- class: flag
name: VSR
source: true
destination: true
- name: vfmv.v.f
operands:
- class: register
@@ -747,30 +298,6 @@ instruction_forms:
prefix: f
source: true
destination: false
hidden_operands:
- class: flag
name: VSR
source: true
destination: true
- name: vfmadd.vv
operands:
- class: register
prefix: v
source: false
destination: true
- class: register
prefix: v
source: true
destination: false
- class: register
prefix: v
source: true
destination: false
hidden_operands:
- class: flag
name: VSR
source: true
destination: true
- name: vfmacc.vf
operands:
- class: register
@@ -785,12 +312,7 @@ instruction_forms:
prefix: v
source: true
destination: false
hidden_operands:
- class: flag
name: VSR
source: true
destination: true
# CSR instructions
# CSR instructions
- name: csrr
operands:
- class: register
@@ -800,11 +322,6 @@ instruction_forms:
- class: identifier
source: true
destination: false
hidden_operands:
- class: flag
name: CSR
source: true
destination: false
- name: csrw
operands:
- class: identifier
@@ -814,11 +331,6 @@ instruction_forms:
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: CSR
source: false
destination: true
- name: csrs
operands:
- class: identifier
@@ -828,11 +340,6 @@ instruction_forms:
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: CSR
source: true
destination: true
- name: csrc
operands:
- class: identifier
@@ -842,12 +349,7 @@ instruction_forms:
prefix: x
source: true
destination: false
hidden_operands:
- class: flag
name: CSR
source: true
destination: true
# Atomic instructions
# Atomic instructions
- name: lr.w
operands:
- class: register
@@ -863,11 +365,6 @@ instruction_forms:
post_indexed: '*'
source: true
destination: false
hidden_operands:
- class: flag
name: CSR
source: true
destination: true
- name: sc.w
operands:
- class: register
@@ -886,9 +383,4 @@ instruction_forms:
pre_indexed: '*'
post_indexed: '*'
source: true
destination: true
hidden_operands:
- class: flag
name: CSR
source: true
destination: true
destination: true

View File

@@ -1,5 +1,5 @@
---
osaca_version: 0.6.1
osaca_version: 0.7.0
micro_architecture: rv64
arch_code: rv64
isa: riscv
@@ -653,4 +653,4 @@ instruction_forms:
operands: []
latency: 1
throughput: 1
port_pressure: [[1, ["ALU"]]]
port_pressure: [[1, ["ALU"]]]

View File

@@ -107,8 +107,8 @@ def create_parser(parser=None):
"--arch",
type=str,
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ICX, SPR, ZEN1, ZEN2, ZEN3, "
"ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a "
"default uarch for x86/AArch64.",
"ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2, RV64). If no architecture is given, OSACA assumes a "
"default uarch for the detected ISA.",
)
parser.add_argument(
"--fixed",
@@ -325,15 +325,28 @@ def inspect(args, output_file=sys.stdout):
except Exception as e:
# probably the wrong parser based on heuristic
if args.arch is None:
# change ISA and try again
arch = (
DEFAULT_ARCHS["x86"]
if BaseParser.detect_ISA(code) == "aarch64"
else DEFAULT_ARCHS["aarch64"]
)
isa = MachineModel.get_isa_for_arch(arch)
parser = get_asm_parser(arch)
parsed_code = parser.parse_file(code)
# Try all supported ISAs in order if auto-detection may have failed
detected_isa = BaseParser.detect_ISA(code)
fallback_isas = ["x86", "aarch64", "riscv"]
# Remove already tried ISA from fallback options
if detected_isa in fallback_isas:
fallback_isas.remove(detected_isa)
# Try each remaining ISA until one works
for fallback_isa in fallback_isas:
try:
arch = DEFAULT_ARCHS[fallback_isa]
isa = MachineModel.get_isa_for_arch(arch)
parser = get_asm_parser(arch)
parsed_code = parser.parse_file(code)
# If parsing succeeds, break out of the loop
break
except Exception:
# Continue trying with next ISA
continue
else:
# If none of the parsers work, raise the original exception
raise e
else:
raise e

View File

@@ -34,12 +34,24 @@ class BaseParser(object):
heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"]
# 2) check for v and z vector registers and x/w general-purpose registers
heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"]
matches = {"x86": 0, "aarch64": 0}
# 3) check for RISC-V registers (x0-x31, a0-a7, t0-t6, s0-s11) and instructions
heuristics_riscv = [
r"\bx[0-9]|x[1-2][0-9]|x3[0-1]\b", # x0-x31 registers
r"\ba[0-7]\b", # a0-a7 registers
r"\bt[0-6]\b", # t0-t6 registers
r"\bs[0-9]|s1[0-1]\b", # s0-s11 registers
r"\bzero\b|\bra\b|\bsp\b|\bgp\b", # zero, ra, sp, gp registers
r"\bvsetvli\b|\bvle\b|\bvse\b", # RV Vector instructions
r"\baddi\b|\bsd\b|\bld\b|\bjal\b" # Common RISC-V instructions
]
matches = {"x86": 0, "aarch64": 0, "riscv": 0}
for h in heuristics_x86ATT:
matches["x86"] += len(re.findall(h, file_content))
for h in heuristics_aarch64:
matches["aarch64"] += len(re.findall(h, file_content))
for h in heuristics_riscv:
matches["riscv"] += len(re.findall(h, file_content))
return max(matches.items(), key=operator.itemgetter(1))[0]

View File

@@ -1,12 +1,9 @@
#!/usr/bin/env python3
import re
import os
import logging
from copy import deepcopy
import pyparsing as pp
logger = logging.getLogger(__name__)
from osaca.parser import BaseParser
from osaca.parser.instruction_form import InstructionForm
from osaca.parser.operand import Operand
@@ -30,7 +27,32 @@ class ParserRISCV(BaseParser):
def __init__(self):
super().__init__()
self.isa = "riscv"
# Initialize parser, but don't set 'isa' directly as an attribute
self._isa_str = "riscv"
def isa(self):
"""Return the ISA string."""
return self._isa_str
def start_marker(self):
"""Return the OSACA start marker for RISC-V assembly."""
# Parse the RISC-V start marker (li a1, 111 followed by NOP)
# This matches how start marker is defined in marker_utils.py for RISC-V
marker_str = (
"li a1, 111 # OSACA START MARKER\n"
".byte 19,0,0,0 # OSACA START MARKER\n"
)
return self.parse_file(marker_str)
def end_marker(self):
"""Return the OSACA end marker for RISC-V assembly."""
# Parse the RISC-V end marker (li a1, 222 followed by NOP)
# This matches how end marker is defined in marker_utils.py for RISC-V
marker_str = (
"li a1, 222 # OSACA END MARKER\n"
".byte 19,0,0,0 # OSACA END MARKER\n"
)
return self.parse_file(marker_str)
def construct_parser(self):
"""Create parser for RISC-V ISA."""
@@ -52,10 +74,15 @@ class ParserRISCV(BaseParser):
vector_identifier = pp.Word(pp.alphas, pp.alphanums)
special_identifier = pp.Word(pp.alphas + "%")
# First character of an identifier
first = pp.Word(pp.alphas + "_.", exact=1)
# Rest of the identifier
rest = pp.Word(pp.alphanums + "_.")
# PLT suffix (@plt) for calls to shared libraries
plt_suffix = pp.Optional(pp.Literal("@") + pp.Word(pp.alphas))
identifier = pp.Group(
pp.Combine(first + pp.Optional(rest)).setResultsName("name")
(pp.Combine(first + pp.Optional(rest) + plt_suffix)).setResultsName("name")
+ pp.Optional(
pp.Suppress(pp.Literal("+"))
+ (hex_number | decimal_number).setResultsName("offset")
@@ -346,7 +373,6 @@ class ParserRISCV(BaseParser):
return return_dict
except Exception as e:
logger.debug(f"Error parsing instruction: {instruction} - {str(e)}")
# For special vector instructions or ones with % in them
if "%" in instruction or instruction.startswith("v"):
parts = instruction.split("#")[0].strip().split(None, 1)
@@ -640,4 +666,53 @@ class ParserRISCV(BaseParser):
elif name.startswith("csr"):
return "csr" # Control and Status Register
return "unknown"
return "unknown"
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
"""
Normalize instruction form for RISC-V instructions.
:param instruction_form: instruction form to normalize
:param isa_model: ISA model to use for normalization
:param arch_model: architecture model to use for normalization
"""
if instruction_form.normalized:
return
if instruction_form.mnemonic is None:
instruction_form.normalized = True
return
# Normalize the mnemonic if needed
if instruction_form.mnemonic:
# Handle any RISC-V specific mnemonic normalization
# For example, convert aliases or pseudo-instructions to their base form
pass
# Normalize the operands if needed
for i, operand in enumerate(instruction_form.operands):
if isinstance(operand, ImmediateOperand):
# Normalize immediate operands
instruction_form.operands[i] = self.normalize_imd(operand)
elif isinstance(operand, RegisterOperand):
# Convert register names to canonical form if needed
pass
instruction_form.normalized = True
def get_regular_source_operands(self, instruction_form):
"""Get source operand of given instruction form assuming regular src/dst behavior."""
# For RISC-V, the first operand is typically the destination,
# and the rest are sources
if len(instruction_form.operands) == 1:
return [instruction_form.operands[0]]
else:
return [op for op in instruction_form.operands[1:]]
def get_regular_destination_operands(self, instruction_form):
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
# For RISC-V, the first operand is typically the destination
if len(instruction_form.operands) == 1:
return []
else:
return instruction_form.operands[:1]