mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-06 19:20:07 +01:00
restructured repo and renamed files in lowercase
This commit is contained in:
41
osaca/create_testcase.py
Executable file
41
osaca/create_testcase.py
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/apps/python/3.5-anaconda/bin/python
|
||||
|
||||
from param import *
|
||||
from testcase import *
|
||||
|
||||
# Choose out of various operands
|
||||
reg8 = Register('al')
|
||||
reg16 = Register('ax')
|
||||
reg32 = Register('eax')
|
||||
reg64 = Register('rax')
|
||||
xmm = Register('xmm0')
|
||||
ymm = Register('ymm0')
|
||||
zmm = Register('zmm0')
|
||||
mem0 = MemAddr('(%rax, %esi, 4)')
|
||||
imd1 = Parameter('IMD')
|
||||
|
||||
|
||||
#-----------------------------------------------
|
||||
#-USER INPUT------------------------------------
|
||||
#-----------------------------------------------
|
||||
# Enter your mnemonic
|
||||
mnemonic = 'vxorpd'
|
||||
|
||||
# Define your operands. If you don't need it, just type in None
|
||||
dst = xmm
|
||||
op1 = xmm
|
||||
op2 = xmm
|
||||
|
||||
# Define the number of instructions per loop (default: 12)
|
||||
per_loop = '128'
|
||||
|
||||
#-----------------------------------------------
|
||||
#-----------------------------------------------
|
||||
|
||||
# Start
|
||||
operands = [x for x in [dst, op1, op2] if x is not None]
|
||||
opListStr = ', '.join([str(x) for x in operands])
|
||||
print('Create Testcase for {} {}'.format(mnemonic, opListStr ), end='')
|
||||
tc = Testcase(mnemonic, operands, per_loop)
|
||||
tc.write_testcase()
|
||||
print(' --------> SUCCEEDED')
|
||||
53
osaca/data/ivb_data.csv
Normal file
53
osaca/data/ivb_data.csv
Normal file
@@ -0,0 +1,53 @@
|
||||
instr,TP,LT,ports
|
||||
jmp-lbl,0.0,0.0,"((5,),)"
|
||||
jo-lbl,0.0,0.0,"((5,),)"
|
||||
jno-lbl,0.0,0.0,"((5,),)"
|
||||
js-lbl,0.0,0.0,"((5,),)"
|
||||
jns-lbl,0.0,0.0,"((5,),)"
|
||||
je-lbl,0.0,0.0,"((5,),)"
|
||||
jz-lbl,0.0,0.0,"((5,),)"
|
||||
jne-lbl,0.0,0.0,"((5,),)"
|
||||
jnz-lbl,0.0,0.0,"((5,),)"
|
||||
jb-lbl,0.0,0.0,"((5,),)"
|
||||
jnae-lbl,0.0,0.0,"((5,),)"
|
||||
jc-lbl,0.0,0.0,"((5,),)"
|
||||
jnb-lbl,0.0,0.0,"((5,),)"
|
||||
jae-lbl,0.0,0.0,"((5,),)"
|
||||
jnc-lbl,0.0,0.0,"((5,),)"
|
||||
jbe-lbl,0.0,0.0,"((5,),)"
|
||||
jna-lbl,0.0,0.0,"((5,),)"
|
||||
ja-lbl,0.0,0.0,"((5,),)"
|
||||
jnbe-lbl,0.0,0.0,"((5,),)"
|
||||
jl-lbl,0.0,0.0,"((5,),)"
|
||||
jnge-lbl,0.0,0.0,"((5,),)"
|
||||
jge-lbl,0.0,0.0,"((5,),)"
|
||||
jnl-lbl,0.0,0.0,"((5,),)"
|
||||
jle-lbl,0.0,0.0,"((5,),)"
|
||||
jng-lbl,0.0,0.0,"((5,),)"
|
||||
jg-lbl,0.0,0.0,"((5,),)"
|
||||
jnle-lbl,0.0,0.0,"((5,),)"
|
||||
jp-lbl,0.0,0.0,"((5,),)"
|
||||
jpe-lbl,0.0,0.0,"((5,),)"
|
||||
jnp-lbl,0.0,0.0,"((5,),)"
|
||||
jpo-lbl,0.0,0.0,"((5,),)"
|
||||
jcxz-lbl,0.0,0.0,"((5,),)"
|
||||
jecxz-lbl,0.0,0.0,"((5,),)"
|
||||
jo-lbl,0.0,0.0,"((5,),)"
|
||||
jno-lbl,0.0,0.0,"((5,),)"
|
||||
js-lbl,0.0,0.0,"((5,),)"
|
||||
jns-lbl,0.0,0.0,"((5,),)"
|
||||
lea-r64_mem,1.0,1.0,"((2,),(3,))"
|
||||
lea-r32_mem,1.0,1.0,"((2,),(3,))"
|
||||
vcvtsi2ss-xmm_xmm_r64,1.0,3.0,"((0,1),(1,5))"
|
||||
vcvtsi2ss-xmm_xmm_r32,1.0,3.0,"((1,5),(0,1))"
|
||||
vmulss-xmm_xmm_xmm,1.0,5.0,"((0,),)"
|
||||
vaddss-xmm_xmm_mem,1.0,3.0,"((1,),)"
|
||||
vaddss-xmm_xmm_xmm,1.0,3.0,"((1,),)"
|
||||
vxorps-xmm_xmm_xmm,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
vmovss-xmm_mem,0.5,1.0,"((2,),(3,))"
|
||||
vmovss-mem_xmm,1.0,1.0,"((2,4),(3,4))"
|
||||
inc-r32,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
inc-r64,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
cmp-r64_imd,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
cmp-r32_mem,0.5,1.0,"((0,2),(0,3),(1,2),(1,3),(2,5),(3,5))"
|
||||
cmp-r32_r32,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
|
92
osaca/data/ivb_data_old.csv
Normal file
92
osaca/data/ivb_data_old.csv
Normal file
@@ -0,0 +1,92 @@
|
||||
instr,TP,LT
|
||||
jmp-lbl,0.0,-1.0
|
||||
jo-lbl,0.0,-1.0
|
||||
jno-lbl,0.0,-1.0
|
||||
js-lbl,0.0,-1.0
|
||||
jns-lbl,0.0,-1.0
|
||||
je-lbl,0.0,-1.0
|
||||
jz-lbl,0.0,-1.0
|
||||
jne-lbl,0.0,-1.0
|
||||
jnz-lbl,0.0,-1.0
|
||||
jb-lbl,0.0,-1.0
|
||||
jnae-lbl,0.0,-1.0
|
||||
jc-lbl,0.0,-1.0
|
||||
jnb-lbl,0.0,-1.0
|
||||
jae-lbl,0.0,-1.0
|
||||
jnc-lbl,0.0,-1.0
|
||||
jbe-lbl,0.0,-1.0
|
||||
jna-lbl,0.0,-1.0
|
||||
ja-lbl,0.0,-1.0
|
||||
jnbe-lbl,0.0,-1.0
|
||||
jl-lbl,0.0,-1.0
|
||||
jnge-lbl,0.0,-1.0
|
||||
jge-lbl,0.0,-1.0
|
||||
jnl-lbl,0.0,-1.0
|
||||
jle-lbl,0.0,-1.0
|
||||
jng-lbl,0.0,-1.0
|
||||
jg-lbl,0.0,-1.0
|
||||
jnle-lbl,0.0,-1.0
|
||||
jp-lbl,0.0,-1.0
|
||||
jpe-lbl,0.0,-1.0
|
||||
jnp-lbl,0.0,-1.0
|
||||
jpo-lbl,0.0,-1.0
|
||||
jcxz-lbl,0.0,-1.0
|
||||
jecxz-lbl,0.0,-1.0
|
||||
jo-lbl,0.0,-1.0
|
||||
jno-lbl,0.0,-1.0
|
||||
js-lbl,0.0,-1.0
|
||||
jns-lbl,0.0,-1.0
|
||||
vmulss-xmm_xmm_xmm,1.0,-1.0
|
||||
vaddss-xmm_xmm_xmm,1.0,-1.0
|
||||
vxorps-xmm_xmm_xmm,0.25,-1.0
|
||||
inc-r64,0.3333333333333333,-1.0
|
||||
xor-r32_r32,0.3333333333333333,-1.0
|
||||
vcvtsi2ss-xmm_xmm_r32,1.0,-1.0
|
||||
vaddss-xmm_xmm_mem,1.0,-1.0
|
||||
vmovupd-load-avx,1.0,-1.0
|
||||
lea-r32_mem,1.0,-1.0
|
||||
vmovss-xmm_mem,0.5,-1.0
|
||||
vmovss-mem_xmm,1.0,-1.0
|
||||
vmovupd-store-avx,2.0,-1.0
|
||||
lea-r64_mem,1.0,-1.0
|
||||
movslq-r64_mem,0.5,-1.0
|
||||
mov-r64_mem,0.5,-1.0
|
||||
vaddpd-ymm_ymm_ymm,1.0,-1.0
|
||||
cmp-r32_r32,0.3333333333333333,-1.0
|
||||
vmovsd-xmm_xmm_xmm,1.0,-1.0
|
||||
vmulsd-xmm_xmm_mem,1.0,-1.0
|
||||
vmovsd-mem_xmm,1.0,-1.0
|
||||
vmovhpd-xmm_xmm_mem,1.0,-1.0
|
||||
vsubpd-ymm_ymm_ymm,1.0,-1.0
|
||||
vmovq-xmm_r64,1.0,-1.0
|
||||
vunpckhpd-xmm_xmm_xmm,1.0,-1.0
|
||||
vmulpd-ymm_ymm_mem,1.0,-1.0
|
||||
mov-mem_r64,1.0,-1.0
|
||||
movzbl-r32_r8,0.29600000000000004,-1.0
|
||||
vmulsd-xmm_xmm_xmm,1.0,-1.0
|
||||
vaddsd-xmm_xmm_mem,1.0,-1.0
|
||||
vmovq-r64_xmm,1.0,-1.0
|
||||
vmulpd-ymm_ymm_ymm,1.0,-1.0
|
||||
mov-r32_mem,0.5,-1.0
|
||||
cmp-r32_mem,0.5,-1.0
|
||||
vaddpd-xmm_xmm_xmm,1.0,-1.0
|
||||
mov-mem_r32,1.0,-1.0
|
||||
vmovsd-xmm_mem,0.5,-1.0
|
||||
vsubsd-xmm_xmm_xmm,1.0,-1.0
|
||||
vmovaps-xmm_xmm,0.845,-1.0
|
||||
vaddsd-xmm_xmm_xmm,1.0,-1.0
|
||||
add-r32_mem,0.5,-1.0
|
||||
vmovupd-xmm_mem,0.5,-1.0
|
||||
test-r32_r32,0.3333333333333333,-1.0
|
||||
add-r64_r64,0.3333333333333333,-1.0
|
||||
dec-r32,0.3333333333333333,-1.0
|
||||
movslq-r64_r32,0.3333333333333333,-1.0
|
||||
vxorpd-ymm_ymm_ymm,0.25,-1.0
|
||||
sub-r32_r32,0.3333333333333333,-1.0
|
||||
inc-r32,0.3333333333333333,-1.0
|
||||
neg-r32,0.3333333333333333,-1.0
|
||||
cmp-r64_imd,0.3333333333333333,-1.0
|
||||
vxorpd-xmm_xmm_xmm,0.25,-1.0
|
||||
vmovapd-ymm_ymm,0.856,-1.0
|
||||
vmovapd-xmm_xmm,0.855,-1.0
|
||||
mov-r32_r32,0.3333333333333333,-1.0
|
||||
|
107
osaca/data/res_ivb.dat
Normal file
107
osaca/data/res_ivb.dat
Normal file
@@ -0,0 +1,107 @@
|
||||
Using frequency 2.20GHz.
|
||||
vmovsd-xmm_mem: 0.503 (clock cycles) [DEBUG - result: 3.141590]
|
||||
lea-r64_mem-TP: 1.015 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovupd-load-avx-TP: 1.004 (clock cycles) [DEBUG - result: 3.141590]
|
||||
movslq-r64_mem-TP: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
lea-r32_mem-TP: 1.015 (clock cycles) [DEBUG - result: 1.000000]
|
||||
cmp-r32_mem: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
sub-r32_r32: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
test-r32_r32-TP: 0.345 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddss-xmm_xmm_xmm-TP: 1.015 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vsubsd-xmm_xmm_xmm: 3.005 (clock cycles) [DEBUG - result: -1.000000]
|
||||
vunpckhpd-xmm_xmm_xmm: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
movzbl-r32_r8: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddss-xmm_xmm_mem: 3.005 (clock cycles) [DEBUG - result: 2.000002]
|
||||
dec-r32: 1.003 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vxorpd-ymm_ymm_ymm: 0.517 (clock cycles) [DEBUG - result: inf]
|
||||
vaddpd-xmm_xmm_xmm: 3.005 (clock cycles) [DEBUG - result: inf]
|
||||
cmp-r64_imd-TP: 0.341 (clock cycles) [DEBUG - result: 1.000000]
|
||||
cmp-r64_imd: 0.341 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddsd-xmm_xmm_xmm: 3.004 (clock cycles) [DEBUG - result: inf]
|
||||
vmovapd-ymm_ymm-TP: 0.864 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovaps-xmm_xmm: 0.681 (clock cycles) [DEBUG - result: 2.000000]
|
||||
vmovq-xmm_r64: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vxorpd-xmm_xmm_xmm: 0.517 (clock cycles) [DEBUG - result: inf]
|
||||
vmovq-r64_xmm: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vcvtsi2ss-xmm_xmm_r32-TP: 1.033 (clock cycles) [DEBUG - result: 1.000000]
|
||||
inc-r64: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovsd-mem_xmm: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddpd-ymm_ymm_ymm-TP: 1.014 (clock cycles) [DEBUG - result: 1.000000]
|
||||
add-r32_mem: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulsd-xmm_xmm_mem: 5.007 (clock cycles) [DEBUG - result: inf]
|
||||
lea-r64_mem: 1.015 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vcvtsi2ss-xmm_xmm_r32: 3.005 (clock cycles) [DEBUG - result: 2.000000]
|
||||
movslq-r64_mem: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
lea-r32_mem: 1.015 (clock cycles) [DEBUG - result: 1.000000]
|
||||
cmp-r32_r32-TP: 0.345 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vxorpd-xmm_xmm_xmm-TP: 0.261 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovsd-xmm_xmm_xmm-TP: 1.018 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovapd-ymm_ymm: 0.681 (clock cycles) [DEBUG - result: 2.000000]
|
||||
vaddss-xmm_xmm_xmm: 3.005 (clock cycles) [DEBUG - result: 2.000000]
|
||||
vmulsd-xmm_xmm_mem-TP: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovsd-mem_xmm-TP: 1.003 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-r32_mem: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulss-xmm_xmm_xmm: 5.012 (clock cycles) [DEBUG - result: 2.000000]
|
||||
vmovhpd-xmm_xmm_mem-TP: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vsubpd-ymm_ymm_ymm-TP: 1.014 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovss-xmm_mem-TP: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovq-xmm_r64-TP: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vunpckhpd-xmm_xmm_xmm-TP: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
add-r64_r64-TP: 0.345 (clock cycles) [DEBUG - result: 1.000000]
|
||||
inc-r32: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-r64_mem: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulpd-ymm_ymm_mem-TP: 1.016 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-mem_r64-TP: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovupd-xmm_mem: 0.503 (clock cycles) [DEBUG - result: 3.141590]
|
||||
movzbl-r32_r8-TP: 0.286 (clock cycles) [DEBUG - result: 1.000000]
|
||||
dec-r32-TP: 0.345 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-r32_r32-TP: 0.287 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulpd-ymm_ymm_mem: 5.007 (clock cycles) [DEBUG - result: inf]
|
||||
vaddpd-ymm_ymm_ymm: 3.005 (clock cycles) [DEBUG - result: inf]
|
||||
movslq-r64_r32-TP: 0.345 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vxorpd-ymm_ymm_ymm-TP: 0.258 (clock cycles) [DEBUG - result: 1.000000]
|
||||
cmp-r32_r32: 0.344 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulsd-xmm_xmm_xmm-TP: 1.016 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-r32_r32: 0.668 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vxorps-xmm_xmm_xmm-TP: 0.258 (clock cycles) [DEBUG - result: 1.000000]
|
||||
neg-r32: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddsd-xmm_xmm_mem-TP: 1.016 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovq-r64_xmm-TP: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulpd-ymm_ymm_ymm-TP: 1.016 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovss-mem_xmm-TP: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-r32_mem-TP: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulpd-ymm_ymm_ymm: 5.007 (clock cycles) [DEBUG - result: inf]
|
||||
test-r32_r32: 0.346 (clock cycles) [DEBUG - result: 1.000000]
|
||||
xor-r32_r32-TP: 0.345 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovupd-store-avx-TP: 2.005 (clock cycles) [DEBUG - result: 0.000000]
|
||||
cmp-r32_mem-TP: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-r64_mem-TP: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovapd-xmm_xmm: 0.681 (clock cycles) [DEBUG - result: 2.000000]
|
||||
vaddpd-xmm_xmm_xmm-TP: 1.014 (clock cycles) [DEBUG - result: 1.000000]
|
||||
sub-r32_r32-TP: 0.345 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovss-xmm_mem: 0.516 (clock cycles) [DEBUG - result: 0.000000]
|
||||
add-r64_r64: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmulsd-xmm_xmm_xmm: 5.007 (clock cycles) [DEBUG - result: inf]
|
||||
vmulss-xmm_xmm_xmm-TP: 1.016 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-mem_r32-TP: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-mem_r64: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovsd-xmm_mem-TP: 0.507 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddss-xmm_xmm_mem-TP: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vsubsd-xmm_xmm_xmm-TP: 1.014 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovaps-xmm_xmm-TP: 0.860 (clock cycles) [DEBUG - result: 1.000000]
|
||||
movslq-r64_r32: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovss-mem_xmm: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
inc-r32-TP: 0.344 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovapd-xmm_xmm-TP: 0.856 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddsd-xmm_xmm_xmm-TP: 1.014 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovhpd-xmm_xmm_mem: 1.017 (clock cycles) [DEBUG - result: 2.000000]
|
||||
vxorps-xmm_xmm_xmm: 0.517 (clock cycles) [DEBUG - result: inf]
|
||||
vmovsd-xmm_xmm_xmm: 1.017 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vaddsd-xmm_xmm_mem: 3.005 (clock cycles) [DEBUG - result: 201061760.000000]
|
||||
add-r32_mem-TP: 0.501 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vmovupd-xmm_mem-TP: 0.509 (clock cycles) [DEBUG - result: 1.000000]
|
||||
mov-mem_r32: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
inc-r64-TP: 0.355 (clock cycles) [DEBUG - result: 1.000000]
|
||||
neg-r32-TP: 0.344 (clock cycles) [DEBUG - result: 1.000000]
|
||||
vsubpd-ymm_ymm_ymm: 3.004 (clock cycles) [DEBUG - result: -1.000000]
|
||||
xor-r32_r32: 1.002 (clock cycles) [DEBUG - result: 1.000000]
|
||||
4
osaca/data/res_test.dat
Normal file
4
osaca/data/res_test.dat
Normal file
@@ -0,0 +1,4 @@
|
||||
Using frequency 2.20GHz.
|
||||
lea-r64_mem-TP: 1.003 (clock cycles) [DEBUG - result: 3.141590]
|
||||
jan-xmm_xmm-TP: 0.995 (clock cycles) [DEBUG - result: 3.141590]
|
||||
jan-xmm_xmm: 2.037 (clock cycles) [DEBUG - result: 3.141590]
|
||||
53
osaca/data/skl_data.csv
Normal file
53
osaca/data/skl_data.csv
Normal file
@@ -0,0 +1,53 @@
|
||||
instr,TP,LT,ports
|
||||
jmp-lbl,0.0,0.0,"((5,),)"
|
||||
jo-lbl,0.0,0.0,"((5,),)"
|
||||
jno-lbl,0.0,0.0,"((5,),)"
|
||||
js-lbl,0.0,0.0,"((5,),)"
|
||||
jns-lbl,0.0,0.0,"((5,),)"
|
||||
je-lbl,0.0,0.0,"((5,),)"
|
||||
jz-lbl,0.0,0.0,"((5,),)"
|
||||
jne-lbl,0.0,0.0,"((5,),)"
|
||||
jnz-lbl,0.0,0.0,"((5,),)"
|
||||
jb-lbl,0.0,0.0,"((5,),)"
|
||||
jnae-lbl,0.0,0.0,"((5,),)"
|
||||
jc-lbl,0.0,0.0,"((5,),)"
|
||||
jnb-lbl,0.0,0.0,"((5,),)"
|
||||
jae-lbl,0.0,0.0,"((5,),)"
|
||||
jnc-lbl,0.0,0.0,"((5,),)"
|
||||
jbe-lbl,0.0,0.0,"((5,),)"
|
||||
jna-lbl,0.0,0.0,"((5,),)"
|
||||
ja-lbl,0.0,0.0,"((5,),)"
|
||||
jnbe-lbl,0.0,0.0,"((5,),)"
|
||||
jl-lbl,0.0,0.0,"((5,),)"
|
||||
jnge-lbl,0.0,0.0,"((5,),)"
|
||||
jge-lbl,0.0,0.0,"((5,),)"
|
||||
jnl-lbl,0.0,0.0,"((5,),)"
|
||||
jle-lbl,0.0,0.0,"((5,),)"
|
||||
jng-lbl,0.0,0.0,"((5,),)"
|
||||
jg-lbl,0.0,0.0,"((5,),)"
|
||||
jnle-lbl,0.0,0.0,"((5,),)"
|
||||
jp-lbl,0.0,0.0,"((5,),)"
|
||||
jpe-lbl,0.0,0.0,"((5,),)"
|
||||
jnp-lbl,0.0,0.0,"((5,),)"
|
||||
jpo-lbl,0.0,0.0,"((5,),)"
|
||||
jcxz-lbl,0.0,0.0,"((5,),)"
|
||||
jecxz-lbl,0.0,0.0,"((5,),)"
|
||||
jo-lbl,0.0,0.0,"((5,),)"
|
||||
jno-lbl,0.0,0.0,"((5,),)"
|
||||
js-lbl,0.0,0.0,"((5,),)"
|
||||
jns-lbl,0.0,0.0,"((5,),)"
|
||||
lea-r64_mem,1.0,1.0,"((2,),(3,))"
|
||||
lea-r32_mem,1.0,1.0,"((2,),(3,))"
|
||||
vcvtsi2ss-xmm_xmm_r64,1.0,3.0,"((0,1),(1,5))"
|
||||
vcvtsi2ss-xmm_xmm_r32,1.0,3.0,"((-1,))"
|
||||
vmulss-xmm_xmm_xmm,1.0,5.0,"((0,),)"
|
||||
vaddss-xmm_xmm_mem,1.0,3.0,"((1,),)"
|
||||
vaddss-xmm_xmm_xmm,1.0,3.0,"((1,),)"
|
||||
vxorps-xmm_xmm_xmm,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
vmovss-xmm_mem,0.5,1.0,"((2,),(3,))"
|
||||
vmovss-mem_xmm,1.0,1.0,"((2,4),(3,4))"
|
||||
inc-r32,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
inc-r64,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
cmp-r64_imd,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
cmp-r32_mem,0.5,1.0,"((0,),(1,),(5,))"
|
||||
cmp-r32_r32,0.3333333333333333,1.0,"((0,),(1,),(5,))"
|
||||
|
92
osaca/data/tmp_ivb_throughput.csv
Normal file
92
osaca/data/tmp_ivb_throughput.csv
Normal file
@@ -0,0 +1,92 @@
|
||||
instr,clock_cycles
|
||||
jmp-lbl-TP,0.0
|
||||
jo-lbl-TP,0.0
|
||||
jno-lbl-TP,0.0
|
||||
js-lbl-TP,0.0
|
||||
jns-lbl-TP,0.0
|
||||
je-lbl-TP,0.0
|
||||
jz-lbl-TP,0.0
|
||||
jne-lbl-TP,0.0
|
||||
jnz-lbl-TP,0.0
|
||||
jb-lbl-TP,0.0
|
||||
jnae-lbl-TP,0.0
|
||||
jc-lbl-TP,0.0
|
||||
jnb-lbl-TP,0.0
|
||||
jae-lbl-TP,0.0
|
||||
jnc-lbl-TP,0.0
|
||||
jbe-lbl-TP,0.0
|
||||
jna-lbl-TP,0.0
|
||||
ja-lbl-TP,0.0
|
||||
jnbe-lbl-TP,0.0
|
||||
jl-lbl-TP,0.0
|
||||
jnge-lbl-TP,0.0
|
||||
jge-lbl-TP,0.0
|
||||
jnl-lbl-TP,0.0
|
||||
jle-lbl-TP,0.0
|
||||
jng-lbl-TP,0.0
|
||||
jg-lbl-TP,0.0
|
||||
jnle-lbl-TP,0.0
|
||||
jp-lbl-TP,0.0
|
||||
jpe-lbl-TP,0.0
|
||||
jnp-lbl-TP,0.0
|
||||
jpo-lbl-TP,0.0
|
||||
jcxz-lbl-TP,0.0
|
||||
jecxz-lbl-TP,0.0
|
||||
jo-lbl-TP,0.0
|
||||
jno-lbl-TP,0.0
|
||||
js-lbl-TP,0.0
|
||||
jns-lbl-TP,0.0
|
||||
vmulss-xmm_xmm_xmm-TP,1.0
|
||||
vaddss-xmm_xmm_xmm-TP,1.0
|
||||
vxorps-xmm_xmm_xmm-TP,0.25
|
||||
inc-r64-TP,0.3333333333333333
|
||||
xor-r32_r32-TP,0.3333333333333333
|
||||
vcvtsi2ss-xmm_xmm_r32-TP,1.0
|
||||
vaddss-xmm_xmm_mem-TP,1.0
|
||||
vmovupd-load-avx-TP,1.0
|
||||
lea-r32_mem-TP,1.0
|
||||
vmovss-xmm_mem-TP,0.5
|
||||
vmovss-mem_xmm-TP,1.0
|
||||
vmovupd-store-avx-TP,2.0
|
||||
lea-r64_mem-TP,1.0
|
||||
movslq-r64_mem-TP,0.5
|
||||
mov-r64_mem-TP,0.5
|
||||
vaddpd-ymm_ymm_ymm-TP,1.0
|
||||
cmp-r32_r32-TP,0.3333333333333333
|
||||
vmovsd-xmm_xmm_xmm-TP,1.0
|
||||
vmulsd-xmm_xmm_mem-TP,1.0
|
||||
vmovsd-mem_xmm-TP,1.0
|
||||
vmovhpd-xmm_xmm_mem-TP,1.0
|
||||
vsubpd-ymm_ymm_ymm-TP,1.0
|
||||
vmovq-xmm_r64-TP,1.0
|
||||
vunpckhpd-xmm_xmm_xmm-TP,1.0
|
||||
vmulpd-ymm_ymm_mem-TP,1.0
|
||||
mov-mem_r64-TP,1.0
|
||||
movzbl-r32_r8-TP,0.29600000000000004
|
||||
vmulsd-xmm_xmm_xmm-TP,1.0
|
||||
vaddsd-xmm_xmm_mem-TP,1.0
|
||||
vmovq-r64_xmm-TP,1.0
|
||||
vmulpd-ymm_ymm_ymm-TP,1.0
|
||||
mov-r32_mem-TP,0.5
|
||||
cmp-r32_mem-TP,0.5
|
||||
vaddpd-xmm_xmm_xmm-TP,1.0
|
||||
mov-mem_r32-TP,1.0
|
||||
vmovsd-xmm_mem-TP,0.5
|
||||
vsubsd-xmm_xmm_xmm-TP,1.0
|
||||
vmovaps-xmm_xmm-TP,0.845
|
||||
vaddsd-xmm_xmm_xmm-TP,1.0
|
||||
add-r32_mem-TP,0.5
|
||||
vmovupd-xmm_mem-TP,0.5
|
||||
test-r32_r32-TP,0.3333333333333333
|
||||
add-r64_r64-TP,0.3333333333333333
|
||||
dec-r32-TP,0.3333333333333333
|
||||
movslq-r64_r32-TP,0.3333333333333333
|
||||
vxorpd-ymm_ymm_ymm-TP,0.25
|
||||
sub-r32_r32-TP,0.3333333333333333
|
||||
inc-r32-TP,0.3333333333333333
|
||||
neg-r32-TP,0.3333333333333333
|
||||
cmp-r64_imd-TP,0.3333333333333333
|
||||
vxorpd-xmm_xmm_xmm-TP,0.25
|
||||
vmovapd-ymm_ymm-TP,0.856
|
||||
vmovapd-xmm_xmm-TP,0.855
|
||||
mov-r32_r32-TP,0.3333333333333333
|
||||
|
331
osaca/eu_sched.py
Executable file
331
osaca/eu_sched.py
Executable file
@@ -0,0 +1,331 @@
|
||||
#!/apps/python/3.5-anaconda/bin/python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import math
|
||||
import ast
|
||||
from param import *
|
||||
from operator import add
|
||||
import pandas as pd
|
||||
|
||||
class Scheduler(object):
|
||||
arch_dict = {'SNB':6, 'IVB':6, 'HSW':8, 'BDW':8, 'SKL':8}
|
||||
ports = None #type: int
|
||||
instrList = None #type: list<list<str,Param[,Param][,Param],str>>
|
||||
# instr, operand(s), instr form
|
||||
df = None #type: DataFrame
|
||||
|
||||
|
||||
def __init__(self, arch, instructionList):
|
||||
arch = arch.upper()
|
||||
try:
|
||||
self.ports = self.arch_dict[arch]
|
||||
except KeyError:
|
||||
print('Architecture not supportet for EU scheduling.')
|
||||
sys.exit()
|
||||
self.instrList = instructionList
|
||||
currDir = os.path.realpath(__file__)[:-11]
|
||||
self.df = pd.read_csv(currDir+'data/'+arch.lower()+'_data.csv', quotechar='"', converters={'ports':ast.literal_eval})
|
||||
|
||||
|
||||
def schedule(self):
|
||||
'''
|
||||
Schedules Instruction Form list and calculates port bindings.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, [int, ...])
|
||||
A tuple containing the graphic output of the schedule as string and
|
||||
the port bindings as list of ints.
|
||||
'''
|
||||
sched = self.get_head()
|
||||
# Initialize ports
|
||||
# groups = [[] for x in range(len(set(portOccurances))-1)]
|
||||
occ_ports = [[0]*self.ports for x in range(len(self.instrList))]
|
||||
# occ_ports = [[0]*self.ports]*len(self.instrList)
|
||||
port_bndgs = [0]*self.ports
|
||||
# Check if there's a port occupation stored in the CSV, otherwise leave the
|
||||
# occ_port list item empty
|
||||
for i,instrForm in enumerate(self.instrList):
|
||||
try:
|
||||
searchString = instrForm[0]+'-'+self.get_operand_suffix(instrForm)
|
||||
entry = self.df.loc[lambda df: df.instr == searchString,'TP':'ports']
|
||||
tup = entry.ports.values[0]
|
||||
if(len(tup) == 1 and tup[0][0] == -1):
|
||||
raise IndexError()
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
sched += self.get_line(occ_ports[i], '* '+instrForm[-1])
|
||||
continue
|
||||
# Get the occurance of each port from the occupation list
|
||||
portOccurances = self.get_port_occurances(tup)
|
||||
# Get 'occurance groups'
|
||||
occuranceGroups = self.get_occurance_groups(portOccurances)
|
||||
# Calculate port dependent throughput
|
||||
TPGes = entry.TP.values[0]*len(occuranceGroups[0])
|
||||
for occGroup in occuranceGroups:
|
||||
for port in occGroup:
|
||||
occ_ports[i][port] = TPGes/len(occGroup)
|
||||
# Write schedule line
|
||||
sched += self.get_line(occ_ports[i], instrForm[-1])
|
||||
# Add throughput to total port binding
|
||||
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
|
||||
return (sched, port_bndgs)
|
||||
|
||||
|
||||
def schedule_FCFS(self):
|
||||
'''
|
||||
Schedules Instruction Form list for a single run with latencies.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, int)
|
||||
A tuple containing the graphic output as string and the total throughput time as int.
|
||||
'''
|
||||
sched = self.get_head()
|
||||
total = 0
|
||||
# Initialize ports
|
||||
occ_ports = [0]*self.ports
|
||||
for i,instrForm in enumerate(self.instrList):
|
||||
try:
|
||||
searchString = instrForm[0]+'-'+self.get_operand_suffix(instrForm)
|
||||
entry = self.df.loc[lambda df: df.instr == searchString,'LT':'ports']
|
||||
tup = entry.ports.values[0]
|
||||
if(len(tup) == 1 and tup[0][0] == -1):
|
||||
raise IndexError()
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
sched += self.get_line([0]*self.ports,'* '+instrForm[-1])
|
||||
continue
|
||||
found = False
|
||||
while(not found):
|
||||
for portOcc in tup:
|
||||
# Test if chosen instruction form port occupation suits the current CPU port occupation
|
||||
if(self.test_ports_FCFS(occ_ports, portOcc)):
|
||||
# Current port occupation fits for chosen port occupation of the instruction!
|
||||
found = True
|
||||
good = [entry.LT.values[0] if (j in portOcc) else 0 for j in range(0,self.ports)]
|
||||
sched += self.get_line(good, instrForm[-1])
|
||||
# Add new occupation
|
||||
occ_ports = [occ_ports[j]+good[j] for j in range(0, self.ports)]
|
||||
break
|
||||
# Step
|
||||
occ_ports = [j-1 if (j > 0) else 0 for j in occ_ports]
|
||||
if(entry.LT.values[0] != 0):
|
||||
total += 1
|
||||
total += max(occ_ports)
|
||||
return (sched, total)
|
||||
|
||||
|
||||
def get_occurance_groups(self, portOccurances):
|
||||
'''
|
||||
Groups ports in groups by the number of their occurance and sorts
|
||||
groups by cardinality
|
||||
|
||||
Parameters
|
||||
----------
|
||||
portOccurances : [int, ...]
|
||||
List with the length of ports containing the number of occurances
|
||||
of each port
|
||||
|
||||
Returns
|
||||
-------
|
||||
[[int, ...], ...]
|
||||
List of lists with all occurance groups sorted by cardinality
|
||||
(smallest group first)
|
||||
'''
|
||||
groups = [[] for x in range(len(set(portOccurances))-1)]
|
||||
for i,groupInd in enumerate(range(min(list(filter(lambda x: x > 0, portOccurances))),max(portOccurances)+1)):
|
||||
for p, occurs in enumerate(portOccurances):
|
||||
if groupInd == occurs:
|
||||
groups[i].append(p)
|
||||
# Sort groups by cardinality
|
||||
groups.sort(key=len)
|
||||
return groups
|
||||
|
||||
|
||||
def get_port_occurances(self, tups):
|
||||
'''
|
||||
Returns the number of each port occurance for the possible port
|
||||
occupations
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tups : ((int, ...), ...)
|
||||
Tuple of tuples of possible port occupations
|
||||
|
||||
Returns
|
||||
-------
|
||||
[int, ...]
|
||||
List in the length of the number of ports for the current architecture,
|
||||
containing the amount of occurances for each port
|
||||
'''
|
||||
ports = [0]*self.ports
|
||||
for tup in tups:
|
||||
for elem in tup:
|
||||
ports[elem] += 1
|
||||
return ports
|
||||
|
||||
|
||||
def test_ports_FCFS(self, occ_ports, needed_ports):
|
||||
'''
|
||||
Test if current configuration of ports is possible and returns boolean
|
||||
|
||||
Parameters
|
||||
----------
|
||||
occ_ports : [int]
|
||||
Tuple to inspect for current port occupation
|
||||
needed_ports : (int)
|
||||
Tuple with needed port(s) for particular instruction form
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if needed ports can get scheduled on current port occupation
|
||||
False if not
|
||||
'''
|
||||
for port in needed_ports:
|
||||
if(occ_ports[port] != 0):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_report_info(self):
|
||||
'''
|
||||
Creates Report information including all needed annotations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the report information
|
||||
'''
|
||||
analysis = 'Throughput Analysis Report\n'+('-'*26)+'\n'
|
||||
annotations = ( '* - No information for this instruction in database\n'
|
||||
'\n')
|
||||
return analysis+annotations
|
||||
|
||||
|
||||
def get_head(self):
|
||||
'''
|
||||
Creates right heading for CPU architecture.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the header
|
||||
'''
|
||||
horizLine = '-'*7*self.ports+'-\n'
|
||||
portAnno = ' '*(math.floor((len(horizLine)-24)/2))+'Ports Pressure in cycles'+' '*(math.ceil((len(horizLine)-24)/2))+'\n'
|
||||
portLine = ''
|
||||
for i in range(0,self.ports):
|
||||
portLine += '| {} '.format(i)
|
||||
portLine += '|\n'
|
||||
head = portAnno+portLine+horizLine
|
||||
return head
|
||||
|
||||
|
||||
def get_line(self, occ_ports, instrName):
|
||||
'''
|
||||
Create line with port occupation for output.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
occ_ports : (int, ...)
|
||||
Integer tuple containing needed ports
|
||||
instrName : str
|
||||
Name of instruction form for output
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String for output containing port scheduling for instrName
|
||||
'''
|
||||
line = ''
|
||||
for i in occ_ports:
|
||||
cycles = ' ' if (i == 0) else '%.2f' % float(i)
|
||||
line += '| '+cycles+' '
|
||||
line += '| '+instrName+'\n'
|
||||
return line
|
||||
|
||||
|
||||
def get_port_binding(self, port_bndg):
|
||||
'''
|
||||
Creates port binding out of scheduling result.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
port_bndg : [int, ...]
|
||||
Integer list containing port bindings
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the port binding graphical output
|
||||
'''
|
||||
header = 'Port Binding in Cycles Per Iteration:\n'
|
||||
horizLine = '-'*10+'-'*6*self.ports+'\n'
|
||||
portLine = '| Port |'
|
||||
for i in range(0, self.ports):
|
||||
portLine += ' {} |'.format(i)
|
||||
portLine += '\n'
|
||||
cycLine = '| Cycles |'
|
||||
for i in range(len(port_bndg)):
|
||||
cycLine += ' {} |'.format(round(port_bndg[i], 2))
|
||||
cycLine += '\n'
|
||||
binding = header+horizLine+portLine+horizLine+cycLine+horizLine
|
||||
return binding
|
||||
|
||||
|
||||
def get_operand_suffix(self, instrForm):
|
||||
'''
|
||||
Creates operand suffix out of list of Parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
instrForm : [str, Parameter, ..., Parameter, str]
|
||||
Instruction Form data structure
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Operand suffix for searching in database
|
||||
'''
|
||||
extension = ''
|
||||
opExt = []
|
||||
for i in range(1, len(instrForm)-1):
|
||||
optmp = ''
|
||||
if(isinstance(instrForm[i], Register) and instrForm[i].reg_type == 'GPR'):
|
||||
optmp = 'r'+str(instrForm[i].size)
|
||||
elif(isinstance(instrForm[i], MemAddr)):
|
||||
optmp = 'mem'
|
||||
else:
|
||||
optmp = str(instrForm[i]).lower()
|
||||
opExt.append(optmp)
|
||||
operands = '_'.join(opExt)
|
||||
return operands
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = [
|
||||
['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x1(%rax,%rax,1),%edx'],
|
||||
['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %edx,%xmm2,%xmm2'],
|
||||
['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm2,%xmm0, %xmm3'],
|
||||
['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x2(%rax,%rax,1),%ecx'],
|
||||
['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm3,%xmm1,%xmm4'],
|
||||
['vxorps',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vxorps %xmm1, %xmm1,%xmm1'],
|
||||
['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %ecx,%xmm1, %xmm1'],
|
||||
['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm1,%xmm0,%xmm5'],
|
||||
['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm4,0x4(%rsp,%rax,8)'],
|
||||
['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm5,%xmm4,%xmm1'],
|
||||
['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm1,0x8(%rsp,%rax,8)'],
|
||||
['inc',Register('RAX'),'inc %rax'],
|
||||
['cmp',Register('RAX'),Parameter('IMD'),'cmp $0x1f3,%rax'],
|
||||
['jb',Parameter('LBL'),'jb 400bc2 <main+0x62>']
|
||||
]
|
||||
|
||||
sched = Scheduler('ivb', data)
|
||||
output,binding = sched.schedule()
|
||||
print(sched.get_port_binding(binding))
|
||||
print(sched.get_report_info(),end='')
|
||||
print(output)
|
||||
print('Block Throughput: {}'.format(round(max(binding),2)))
|
||||
247
osaca/get_instr.py
Executable file
247
osaca/get_instr.py
Executable file
@@ -0,0 +1,247 @@
|
||||
#!/apps/python/3.5-anaconda/bin/python
|
||||
import sys
|
||||
import re
|
||||
from testcase import *
|
||||
from param import *
|
||||
|
||||
marker = r'//STARTLOOP'
|
||||
asm_line = re.compile(r'\s[0-9a-f]+[:]')
|
||||
numSeps = 0
|
||||
sem = 0
|
||||
db = {}
|
||||
sorted_db = []
|
||||
lncnt = 1
|
||||
#cnt=0
|
||||
fname = ""
|
||||
cntChar = ''
|
||||
first = True
|
||||
|
||||
def extract_instr(asmFile):
|
||||
global once
|
||||
global lncnt
|
||||
global fname
|
||||
fname = asmFile
|
||||
#Check if parameter is in the correct file format
|
||||
if(asmFile[-4:] != ".log"):
|
||||
print("Invalid argument")
|
||||
sys.exit()
|
||||
#Open file
|
||||
try:
|
||||
f=open(asmFile, "r")
|
||||
except IOError:
|
||||
print("IOError: File not found")
|
||||
#Analyse code line by line and check the instructions
|
||||
lncnt = 1
|
||||
for line in f:
|
||||
check_line(line)
|
||||
lncnt += 1
|
||||
f.close()
|
||||
|
||||
|
||||
def check_line(line):
|
||||
global numSeps
|
||||
global sem
|
||||
global first
|
||||
#Check if marker is in line and count the number of whitespaces if so
|
||||
if(marker in line):
|
||||
#But first, check if high level code ist indented with whitespaces or tabs
|
||||
if(first):
|
||||
set_counter_char(line)
|
||||
first = False
|
||||
numSeps = (re.split(marker,line)[0]).count(cntChar)
|
||||
sem = 2;
|
||||
elif(sem > 0):
|
||||
#We're in the marked code snipped
|
||||
#Check if the line is ASM code and - if not - check if we're still in the loop
|
||||
match = re.search(asm_line, line)
|
||||
if(match):
|
||||
#Further analysis of instructions
|
||||
# print("".join(re.split(r'\t',line)[-1:]),end="")
|
||||
#Check if there are commetns in line
|
||||
if(r'//' in line):
|
||||
return
|
||||
check_instr("".join(re.split(r'\t',line)[-1:]))
|
||||
elif((re.split(r'\S',line)[0]).count(cntChar) <= numSeps):
|
||||
#Not in the loop anymore - or yet - so we decrement the semaphore
|
||||
sem = sem-1
|
||||
|
||||
#Check if seperator is either tabulator or whitespace
|
||||
def set_counter_char(line):
|
||||
global cntChar
|
||||
numSpaces = (re.split(marker,line)[0]).count(" ")
|
||||
numTabs = (re.split(marker,line)[0]).count("\t")
|
||||
if(numSpaces != 0 and numTabs == 0):
|
||||
cntChar = ' '
|
||||
elif(numSpaces == 0 and numTabs != 0):
|
||||
cntChar = '\t'
|
||||
else:
|
||||
raise NotImplementedError("Indentation of code is only supported for whitespaces and tabs.")
|
||||
|
||||
|
||||
def check_instr(instr):
|
||||
global db
|
||||
global lncnt
|
||||
global cnt
|
||||
global fname
|
||||
#Check for strange clang padding bytes
|
||||
while(instr.startswith("data32")):
|
||||
instr = instr[7:]
|
||||
#Seperate mnemonic and operands
|
||||
mnemonic = instr.split()[0]
|
||||
params = "".join(instr.split()[1:])
|
||||
#Check if line is not only a byte
|
||||
empty_byte = re.compile(r'[0-9a-f]{2}')
|
||||
if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2):
|
||||
return
|
||||
#Check if there's one or more operand and store all in a list
|
||||
param_list = flatten(separate_params(params))
|
||||
opList = list(param_list)
|
||||
#Check operands and seperate them by IMMEDIATE (IMD), REGISTER (REG), MEMORY (MEM) or LABEL (LBL)
|
||||
for i in range(len(param_list)):
|
||||
op = param_list[i]
|
||||
if(len(op) <= 0):
|
||||
op = Parameter("NONE")
|
||||
elif(op[0] == '$'):
|
||||
op = Parameter("IMD")
|
||||
elif(op[0] == '%' and '(' not in op):
|
||||
j = len(op)
|
||||
opmask = False
|
||||
if('{' in op):
|
||||
j = op.index('{')
|
||||
opmask = True
|
||||
op = Register(op[1:j], opmask)
|
||||
elif('<' in op):
|
||||
op = Parameter("LBL")
|
||||
else:
|
||||
op = MemAddr(op)
|
||||
param_list[i] = str(op)
|
||||
opList[i] = op
|
||||
#Join mnemonic and operand(s) to an instruction form
|
||||
if(len(mnemonic) > 7):
|
||||
tabs = "\t"
|
||||
else:
|
||||
tabs = "\t\t"
|
||||
instr_form = mnemonic+tabs+(" ".join(param_list))
|
||||
#Check in database for instruction form and increment the counter
|
||||
if(instr_form in db):
|
||||
db[instr_form] = db[instr_form]+1
|
||||
else:
|
||||
db[instr_form] = 1
|
||||
#Create testcase for instruction form, since it is the first appearance of it
|
||||
#But (as far as now) only for instr forms with only registers as operands
|
||||
# is_Reg = True
|
||||
# for par in opList:
|
||||
# print(par.print()+" is Register: "+str(isinstance(par, Register)))
|
||||
# if(not isinstance(par, Register)):
|
||||
# is_Reg = False
|
||||
# if(is_Reg):
|
||||
#print(mnemonic)
|
||||
# print("create testcase for "+mnemonic+" with params:")
|
||||
# for p in opList:
|
||||
# print(p.print(),end=", ")
|
||||
# print()
|
||||
|
||||
|
||||
#Only create benchmark if no label (LBL) is part of the operands
|
||||
do_bench = True
|
||||
for par in opList:
|
||||
if(str(par) == 'LBL' or str(par) == ''):
|
||||
do_bench = False
|
||||
if(do_bench):
|
||||
#Create testcase with reversed param list, due to the fact its intel syntax!
|
||||
# create_testcase(mnemonic, list(reversed(opList)))
|
||||
# print('menmonic: '+mnemonic+' ops: '+str(list(reversed(opList))))
|
||||
tc = Testcase(mnemonic, list(reversed(opList)), '64')
|
||||
tc.write_testcase()
|
||||
# print("-----------")
|
||||
|
||||
def separate_params(params):
|
||||
param_list = [params]
|
||||
if(',' in params):
|
||||
if(')' in params):
|
||||
if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','):
|
||||
i = params.index(')')+1
|
||||
elif(params.index('(') < params.index(',')):
|
||||
return param_list
|
||||
else:
|
||||
i = params.index(',')
|
||||
else:
|
||||
i = params.index(',')
|
||||
param_list = [params[:i],separate_params(params[i+1:])]
|
||||
elif('#' in params):
|
||||
i = params.index('#')
|
||||
param_list = [params[:i]]
|
||||
return param_list
|
||||
|
||||
|
||||
def sort_db():
|
||||
global sorted_db
|
||||
sorted_db=sorted(db.items(), key=lambda x:x[1], reverse=True)
|
||||
|
||||
|
||||
def print_sorted_db():
|
||||
sort_db()
|
||||
sum = 0
|
||||
print("Number of\tmnemonic")
|
||||
print("calls\n")
|
||||
for i in range(len(sorted_db)):
|
||||
print(str(sorted_db[i][1])+"\t\t"+sorted_db[i][0])
|
||||
sum += sorted_db[i][1]
|
||||
print("\nCumulated number of instructions: "+str(sum))
|
||||
|
||||
|
||||
def save_db():
|
||||
global db
|
||||
file = open(".cnt_asm_ops.db","w")
|
||||
for i in db.items():
|
||||
file.write(i[0]+"\t"+str(i[1])+"\n")
|
||||
file.close()
|
||||
|
||||
|
||||
def load_db():
|
||||
global db
|
||||
try:
|
||||
file = open(".cnt_asm_ops.db", "r")
|
||||
except FileNotFoundError:
|
||||
print("no database found in current directory")
|
||||
return
|
||||
for line in file:
|
||||
mnemonic = line.split('\t')[0]
|
||||
#Join mnemonic and operand(s) to an instruction form
|
||||
if(len(mnemonic) > 7):
|
||||
tabs = "\t"
|
||||
params = line.split('\t')[1]
|
||||
numCalls = line.split("\t")[2][:-1]
|
||||
else:
|
||||
tabs = "\t\t"
|
||||
params = line.split('\t')[2]
|
||||
numCalls = line.split("\t")[3][:-1]
|
||||
instr_form = mnemonic+tabs+params
|
||||
db[instr_form] = int(numCalls)
|
||||
file.close()
|
||||
|
||||
|
||||
def flatten(l):
|
||||
if l == []:
|
||||
return l
|
||||
if(isinstance(l[0], list)):
|
||||
return flatten(l[0]) + flatten(l[1:])
|
||||
return l[:1] + flatten(l[1:])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# load_db()
|
||||
# r0 = Register("ymm0")
|
||||
# r1 = Register("xmm0")
|
||||
# r64 = Register("rax")
|
||||
# r32 = Register("eax")
|
||||
# mem0 = MemAddr('(%rax, %esi, 4)')
|
||||
# tc = Testcase("XOR", [r32, r32], '64')
|
||||
# tc.write_testcase()
|
||||
# create_testcase("VADDPD", [r0, r0, r0])
|
||||
if(len(sys.argv) > 1):
|
||||
for i in range(1,len(sys.argv)):
|
||||
extract_instr(sys.argv[i])
|
||||
print_sorted_db()
|
||||
|
||||
# save_db()
|
||||
826
osaca/osaca.py
Executable file
826
osaca/osaca.py
Executable file
@@ -0,0 +1,826 @@
|
||||
#!/apps/python/3.5-anaconda/bin/python
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import subprocess
|
||||
import os
|
||||
import re
|
||||
from param import *
|
||||
from eu_sched import *
|
||||
from testcase import *
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Osaca(object):
|
||||
arch = None
|
||||
filepath = None
|
||||
srcCode = None
|
||||
df = None
|
||||
instrForms = None
|
||||
# Variables for checking lines
|
||||
numSeps = 0
|
||||
indentChar = ''
|
||||
sem = 0
|
||||
marker = r'//STARTLOOP'
|
||||
|
||||
# Variables for creating output
|
||||
longestInstr = 30
|
||||
# Constants
|
||||
ASM_LINE = re.compile(r'\s[0-9a-f]+[:]')
|
||||
# Matches every variation of the IACA start marker
|
||||
IACA_SM = re.compile(r'\s*movl[ \t]+\$111[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
|
||||
# Matches every variation of the IACA end marker
|
||||
IACA_EM = re.compile(r'\s*movl[ \t]+\$222[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
|
||||
|
||||
def __init__(self, _arch, _filepath):
|
||||
self.arch = _arch
|
||||
self.filepath = _filepath
|
||||
self.instrForms = []
|
||||
|
||||
|
||||
##-------------------main functions depending on arguments----------------------
|
||||
def include_ibench(self):
|
||||
"""
|
||||
Reads ibench output and includes it in the architecture specific csv
|
||||
file.
|
||||
"""
|
||||
# Check args and exit program if something's wrong
|
||||
if(not self.check_arch()):
|
||||
print('Invalid microarchitecture.')
|
||||
sys.exit()
|
||||
if(not self.check_file()):
|
||||
print('Invalid file path or file format.')
|
||||
sys.exit()
|
||||
# Check for database for the chosen architecture
|
||||
self.df = self.read_csv()
|
||||
# Create sequence of numbers and their reciprokals for validate the measurements
|
||||
cycList,reciList = self.create_sequences()
|
||||
print('Everything seems fine! Let\'s start!')
|
||||
newData = []
|
||||
addedValues = 0
|
||||
for line in self.srcCode:
|
||||
if('Using frequency' in line or len(line) == 0):
|
||||
continue
|
||||
clmn = 'LT'
|
||||
instr = line.split()[0][:-1]
|
||||
if('TP' in line):
|
||||
# We found a command with a throughput value. Get instruction and the number of
|
||||
# clock cycles and remove the '-TP' suffix.
|
||||
clmn = 'TP'
|
||||
instr = instr[:-3]
|
||||
# Otherwise it is a latency value. Nothing to do.
|
||||
clkC = line.split()[1]
|
||||
clkC_tmp = clkC
|
||||
clkC = self.validate_val(clkC, instr, True if (clmn == 'TP') else False, cycList, reciList)
|
||||
txtOutput = True if (clkC_tmp == clkC) else False
|
||||
val = -2
|
||||
new = False
|
||||
try:
|
||||
entry = self.df.loc[lambda df: df.instr == instr,clmn]
|
||||
val = entry.values[0]
|
||||
except IndexError:
|
||||
# Instruction not in database yet --> add it
|
||||
new = True
|
||||
# First check if LT or TP value has already been added before
|
||||
for i,item in enumerate(newData):
|
||||
if(instr in item):
|
||||
if(clmn == 'TP'):
|
||||
newData[i][1] = clkC
|
||||
elif(clmn == 'LT'):
|
||||
newData[i][2] = clkC
|
||||
new = False
|
||||
break
|
||||
if(new and clmn == 'TP'):
|
||||
newData.append([instr,clkC,'-1',((-1,),)])
|
||||
elif(new and clmn == 'LT'):
|
||||
newData.append([instr,'-1',clkC,((-1,),)])
|
||||
new = True
|
||||
addedValues += 1
|
||||
pass
|
||||
# If val is -1 (= not filled with a valid value) add it immediately
|
||||
if(val == -1):
|
||||
self.df.set_value(entry.index[0], clmn, clkC)
|
||||
addedValues += 1
|
||||
continue
|
||||
if(not new and abs((val/np.float64(clkC))-1) > 0.05):
|
||||
print('Different measurement for {} ({}): {}(old) vs. {}(new)\nPlease check for correctness (no changes were made).'.format(instr, clmn, val, clkC))
|
||||
txtOutput = True
|
||||
if(txtOutput):
|
||||
print()
|
||||
txtOutput = False
|
||||
# Now merge the DataFrames and write new csv file
|
||||
self.df = self.df.append(pd.DataFrame(newData, columns=['instr','TP','LT','ports']), ignore_index=True)
|
||||
csv = self.df.to_csv(index=False)
|
||||
self.write_csv(csv)
|
||||
print('ibench output {} successfully in database included.'.format(self.filepath.split('/')[-1]))
|
||||
print('{} values were added.'.format(addedValues))
|
||||
|
||||
|
||||
def inspect_binary(self):
|
||||
"""
|
||||
Main function of OSACA. Inspect binary file and create analysis.
|
||||
"""
|
||||
# Check args and exit program if something's wrong
|
||||
if(not self.check_arch()):
|
||||
print('Invalid microarchitecture.')
|
||||
sys.exit()
|
||||
if(not self.check_elffile()):
|
||||
print('Invalid file path or file format.')
|
||||
sys.exit()
|
||||
# Finally check for database for the chosen architecture
|
||||
self.read_csv()
|
||||
|
||||
print('Everything seems fine! Let\'s start checking!')
|
||||
for i,line in enumerate(self.srcCode):
|
||||
if(i == 0):
|
||||
self.check_line(line, True)
|
||||
else:
|
||||
self.check_line(line)
|
||||
output = self.create_output()
|
||||
print(output)
|
||||
|
||||
|
||||
def inspect_with_iaca(self):
|
||||
"""
|
||||
Main function of OSACA with IACA markers instead of OSACA marker.
|
||||
Inspect binary file and create analysis.
|
||||
"""
|
||||
# Check args and exit program if something's wrong
|
||||
if(not self.check_arch()):
|
||||
print('Invalid microarchitecture.')
|
||||
sys.exit()
|
||||
# Check if input file is a binary or assembly file
|
||||
try:
|
||||
binaryFile = True
|
||||
if(not self.check_elffile()):
|
||||
print('Invalid file path or file format.')
|
||||
sys.exit()
|
||||
except (TypeError,IndexError):
|
||||
binaryFile = False
|
||||
if(not self.check_file(True)):
|
||||
print('Invalid file path or file format.')
|
||||
sys.exit()
|
||||
# Finally check for database for the chosen architecture
|
||||
self.read_csv()
|
||||
|
||||
print('Everything seems fine! Let\'s start checking!')
|
||||
if(binaryFile):
|
||||
self.iaca_bin()
|
||||
else:
|
||||
self.iaca_asm()
|
||||
output = self.create_output()
|
||||
print(output)
|
||||
|
||||
##------------------------------------------------------------------------------
|
||||
|
||||
def check_arch(self):
|
||||
"""
|
||||
Check if the architecture is valid.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if arch is supported
|
||||
False if arch is not supported
|
||||
|
||||
"""
|
||||
archList = ['SNB','IVB','HSW', 'BDW', 'SKL']
|
||||
if(self.arch in archList):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def check_elffile(self):
|
||||
"""
|
||||
Check if the given filepath exists, if the format is the needed elf64
|
||||
and store file data in attribute srcCode.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if file is expected elf64 file
|
||||
False if file does not exist or is not an elf64 file
|
||||
|
||||
"""
|
||||
if(os.path.isfile(self.filepath)):
|
||||
self.store_srcCode_elf()
|
||||
if('file format elf64' in self.srcCode[1]):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_file(self,iacaFlag=False):
|
||||
"""
|
||||
Check if the given filepath exists and store file data in attribute
|
||||
srcCode.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
iacaFlag : bool
|
||||
store file data as a string in attribute srcCode if True,
|
||||
store it as a list of strings (lines) if False (default False)
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if file exists
|
||||
False if file does not exist
|
||||
|
||||
"""
|
||||
if(os.path.isfile(self.filepath)):
|
||||
self.store_srcCode(iacaFlag)
|
||||
return True
|
||||
return False
|
||||
|
||||
def store_srcCode_elf(self):
|
||||
"""
|
||||
Load binary file compiled with '-g' in class attribute srcCode and
|
||||
separate by line.
|
||||
"""
|
||||
self.srcCode = subprocess.run(['objdump', '--source', self.filepath], stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n')
|
||||
|
||||
|
||||
def store_srcCode(self,iacaFlag=False):
|
||||
"""
|
||||
Load arbitrary file in class attribute srcCode.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
iacaFlag : bool
|
||||
store file data as a string in attribute srcCode if True,
|
||||
store it as a list of strings (lines) if False (default False)
|
||||
"""
|
||||
try:
|
||||
f = open(self.filepath, 'r')
|
||||
except IOError:
|
||||
print('IOError: file \'{}\' not found'.format(self.filepath))
|
||||
self.srcCode = ''
|
||||
for line in f:
|
||||
self.srcCode += line
|
||||
f.close()
|
||||
if(iacaFlag):
|
||||
return
|
||||
self.srcCode = self.srcCode.split('\n')
|
||||
|
||||
|
||||
def read_csv(self):
|
||||
"""
|
||||
Reads architecture dependent CSV from data directory.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame
|
||||
CSV as DataFrame object
|
||||
"""
|
||||
currDir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
|
||||
df = pd.read_csv(currDir+'/data/'+self.arch.lower()+'_data.csv')
|
||||
return df
|
||||
|
||||
|
||||
def write_csv(self,csv):
|
||||
"""
|
||||
Writes architecture dependent CSV into data directory.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
csv : str
|
||||
CSV data as string
|
||||
"""
|
||||
try:
|
||||
f = open('data/'+self.arch.lower()+'_data.csv', 'w')
|
||||
except IOError:
|
||||
print('IOError: file \'{}\' not found in ./data'.format(self.arch.lower()+'_data.csv'))
|
||||
f.write(csv)
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
def create_sequences(self,end=101):
|
||||
"""
|
||||
Creates list of integers from 1 to end and list of their reciprocals.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
end : int
|
||||
End value for list of integers (default 101)
|
||||
|
||||
Returns
|
||||
-------
|
||||
[int]
|
||||
cycList of integers
|
||||
[float]
|
||||
reciList of floats
|
||||
"""
|
||||
cycList = []
|
||||
reciList = []
|
||||
for i in range(1, end):
|
||||
cycList.append(i)
|
||||
reciList.append(1/i)
|
||||
return cycList,reciList
|
||||
|
||||
|
||||
def validate_val(self,clkC, instr, isTP, cycList, reciList):
|
||||
"""
|
||||
Validate given clock cycle clkC and return rounded value in case of
|
||||
success.
|
||||
|
||||
A succeeded validation means the clock cycle clkC is only 5% higher or
|
||||
lower than an integer value from cycList or - if clkC is a throughput
|
||||
value - 5% higher or lower than a reciprocal from the reciList.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
clkC : float
|
||||
Clock cycle to validate
|
||||
instr : str
|
||||
Instruction for warning output
|
||||
isTP : bool
|
||||
True if a throughput value is to check, False for a latency value
|
||||
cycList : [int]
|
||||
Cycle list for validating
|
||||
reciList : [float]
|
||||
Reciprocal cycle list for validating
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Clock cycle, either rounded to an integer or its reciprocal or the
|
||||
given clkC parameter
|
||||
"""
|
||||
clmn = 'LT'
|
||||
if(isTP):
|
||||
clmn = 'TP'
|
||||
for i in range(0, len(cycList)):
|
||||
if(cycList[i]*1.05 > float(clkC) and cycList[i]*0.95 < float(clkC)):
|
||||
# Value is probably correct, so round it to the estimated value
|
||||
return cycList[i]
|
||||
# Check reciprocal only if it is a throughput value
|
||||
elif(isTP and reciList[i]*1.05 > float(clkC) and reciList[i]*0.95 < float(clkC)):
|
||||
# Value is probably correct, so round it to the estimated value
|
||||
return reciList[i]
|
||||
# No value close to an integer or its reciprocal found, we assume the
|
||||
# measurement is incorrect
|
||||
print('Your measurement for {} ({}) is probably wrong. Please inspect your benchmark!'.format(instr, clmn))
|
||||
print('The program will continue with the given value')
|
||||
return clkC
|
||||
|
||||
|
||||
def check_line(self,line,firstAppearance=False):
|
||||
"""
|
||||
Inspect line of source code and process it if inside the marked snippet.
|
||||
|
||||
Parameter
|
||||
---------
|
||||
line : str
|
||||
Line of source code
|
||||
firstAppearance : bool
|
||||
Necessary for setting indenting character (default False)
|
||||
"""
|
||||
# Check if marker is in line
|
||||
if(self.marker in line):
|
||||
# First, check if high level code in indented with whitespaces or tabs
|
||||
if(firstAppearance):
|
||||
self.indentChar = self.get_indent_chars(line)
|
||||
# Now count the number of whitespaces
|
||||
self.numSeps = (re.split(self.marker, line)[0]).count(self.indentChar)
|
||||
self.sem = 2
|
||||
elif(self.sem > 0):
|
||||
# We're in the marked code snippet
|
||||
# Check if the line is ASM code and - if not - check if we're still in the loop
|
||||
match = re.search(self.ASM_LINE, line)
|
||||
if(match):
|
||||
# Further analysis of instructions
|
||||
# Check if there are comments in line
|
||||
if(r'//' in line):
|
||||
return
|
||||
self.check_instr(''.join(re.split(r'\t', line)[-1:]))
|
||||
elif((re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps):
|
||||
# Not in the loop anymore - or yet. We decrement the semaphore
|
||||
self.sem = self.sem-1
|
||||
|
||||
|
||||
def get_indent_chars(self,line):
|
||||
"""
|
||||
Check if indentation characters are either tabulators or whitespaces
|
||||
|
||||
Parameters
|
||||
----------
|
||||
line : str
|
||||
Line with start marker in it
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Indentation character as string
|
||||
"""
|
||||
numSpaces = (re.split(self.marker, line)[0]).count(' ')
|
||||
numTabs = (re.split(self.marker, line)[0]).count('\t')
|
||||
if(numSpaces != 0 and numTabs == 0):
|
||||
return ' '
|
||||
elif(numSpaces == 0 and numTabs != 0):
|
||||
return '\t'
|
||||
else:
|
||||
raise NotImplementedError('Indentation of code is only supported for whitespaces and tabs.')
|
||||
|
||||
|
||||
def iaca_bin(self):
|
||||
"""
|
||||
Extract instruction forms out of binary file using IACA markers.
|
||||
"""
|
||||
self.marker = r'fs addr32 nop'
|
||||
for line in self.srcCode:
|
||||
# Check if marker is in line
|
||||
if(self.marker in line):
|
||||
self.sem += 1
|
||||
elif(self.sem == 1):
|
||||
# We're in the marked code snippet
|
||||
# Check if the line is ASM code
|
||||
match = re.search(self.ASM_LINE, line)
|
||||
if(match):
|
||||
# Further analysis of instructions
|
||||
# Check if there are comments in line
|
||||
if(r'//' in line):
|
||||
continue
|
||||
# Do the same instruction check as for the OSACA marker line check
|
||||
self.check_instr(''.join(re.split(r'\t', line)[-1:]))
|
||||
elif(self.sem == 2):
|
||||
# Not in the loop anymore. Due to the fact it's the IACA marker we can stop here
|
||||
# After removing the last line which belongs to the IACA marker
|
||||
del self.instrForms[-1:]
|
||||
return
|
||||
|
||||
|
||||
def iaca_asm(self):
|
||||
"""
|
||||
Extract instruction forms out of assembly file using IACA markers.
|
||||
"""
|
||||
# Extract the code snippet surround by the IACA markers
|
||||
code = self.srcCode
|
||||
# Search for the start marker
|
||||
match = re.match(self.IACA_SM, code)
|
||||
while(not match):
|
||||
code = code.split('\n',1)[1]
|
||||
match = re.match(self.IACA_SM, code)
|
||||
# Search for the end marker
|
||||
code = (code.split('144',1)[1]).split('\n',1)[1]
|
||||
res = ''
|
||||
match = re.match(self.IACA_EM, code)
|
||||
while(not match):
|
||||
res += code.split('\n',1)[0]+'\n'
|
||||
code = code.split('\n',1)[1]
|
||||
match = re.match(self.IACA_EM, code)
|
||||
# Split the result by line go on like with OSACA markers
|
||||
res = res.split('\n')
|
||||
for line in res:
|
||||
line = line.split('#')[0]
|
||||
line = line.lstrip()
|
||||
if(len(line) == 0 or '//' in line or line.startswith('..')):
|
||||
continue
|
||||
self.check_instr(line)
|
||||
|
||||
|
||||
def check_instr(self,instr):
|
||||
"""
|
||||
Inspect instruction for its parameters and add it to the instruction forms
|
||||
pool instrForm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
instr : str
|
||||
Instruction as string
|
||||
"""
|
||||
# Check for strange clang padding bytes
|
||||
while(instr.startswith('data32')):
|
||||
instr = instr[7:]
|
||||
# Separate mnemonic and operands
|
||||
mnemonic = instr.split()[0]
|
||||
params = ''.join(instr.split()[1:])
|
||||
# Check if line is not only a byte
|
||||
empty_byte = re.compile(r'[0-9a-f]{2}')
|
||||
if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2):
|
||||
return
|
||||
# Check if there's one or more operands and store all in a list
|
||||
param_list = self.flatten(self.separate_params(params))
|
||||
param_list_types = list(param_list)
|
||||
# Check operands and separate them by IMMEDIATE (IMD), REGISTER (REG),
|
||||
# MEMORY (MEM) or LABEL(LBL)
|
||||
for i in range(len(param_list)):
|
||||
op = param_list[i]
|
||||
if(len(op) <= 0):
|
||||
op = Parameter('NONE')
|
||||
elif(op[0] == '$'):
|
||||
op = Parameter('IMD')
|
||||
elif(op[0] == '%' and '(' not in op):
|
||||
j = len(op)
|
||||
opmask = False
|
||||
if('{' in op):
|
||||
j = op.index('{')
|
||||
opmask = True
|
||||
op = Register(op[1:j], opmask)
|
||||
elif('<' in op or op.startswith('.')):
|
||||
op = Parameter('LBL')
|
||||
else:
|
||||
op = MemAddr(op)
|
||||
param_list[i] = str(op)
|
||||
param_list_types[i] = op
|
||||
# Add to list
|
||||
if(len(instr) > self.longestInstr):
|
||||
self.longestInstr = len(instr)
|
||||
instrForm = [mnemonic]+list(reversed(param_list_types))+[instr]
|
||||
self.instrForms.append(instrForm)
|
||||
# If flag is set, create testcase for instruction form
|
||||
# Do this in reversed param list order, du to the fact it's intel syntax
|
||||
# Only create benchmark if no label (LBL) is part of the operands
|
||||
if('LBL' in param_list or '' in param_list):
|
||||
return
|
||||
tc = Testcase(mnemonic, list(reversed(param_list_types)), '64')
|
||||
# Only write a testcase if it not already exists
|
||||
writeTP, writeLT = tc._Testcase__is_in_dir()
|
||||
tc.write_testcase(not writeTP, not writeLT)
|
||||
|
||||
|
||||
def separate_params(self,params):
|
||||
"""
|
||||
Delete comments, separates parameters and return them as a list.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
params : str
|
||||
Splitted line after mnemonic
|
||||
|
||||
Returns
|
||||
-------
|
||||
[[...[str]]]
|
||||
Nested list of strings. The number of nest levels depend on the
|
||||
number of parametes given.
|
||||
"""
|
||||
param_list = [params]
|
||||
if(',' in params):
|
||||
if(')' in params):
|
||||
if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','):
|
||||
i = params.index(')')+1
|
||||
elif(params.index('(') < params.index(',')):
|
||||
return param_list
|
||||
else:
|
||||
i = params.index(',')
|
||||
else:
|
||||
i = params.index(',')
|
||||
param_list = [params[:i],self.separate_params(params[i+1:])]
|
||||
elif('#' in params):
|
||||
i = params.index('#')
|
||||
param_list = [params[:i]]
|
||||
return param_list
|
||||
|
||||
def flatten(self,l):
|
||||
"""
|
||||
Flatten a nested list of strings.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
l : [[...[str]]]
|
||||
Nested list of strings
|
||||
|
||||
Returns
|
||||
-------
|
||||
[str]
|
||||
List of strings
|
||||
"""
|
||||
if l == []:
|
||||
return l
|
||||
if(isinstance(l[0], list)):
|
||||
return self.flatten(l[0]) + self.flatten(l[1:])
|
||||
return l[:1] + self.flatten(l[1:])
|
||||
|
||||
|
||||
def create_output(self,tp_list=False,pr_sched=True):
|
||||
"""
|
||||
Creates output of analysed file including a time stamp.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tp_list : bool
|
||||
Boolean for indicating the need for the throughput list as output
|
||||
(default False)
|
||||
pr_sched : bool
|
||||
Boolean for indicating the need for predicting a scheduling
|
||||
(default True)
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
OSACA output
|
||||
"""
|
||||
# Check the output alignment depending on the longest instruction
|
||||
if(self.longestInstr > 70):
|
||||
self.longestInstr = 70
|
||||
horizLine = self.create_horiz_sep()
|
||||
ws = ' '*(len(horizLine)-23)
|
||||
# Write general information about the benchmark
|
||||
output = ( '--'+horizLine+'\n'
|
||||
'| Analyzing of file:\t'+os.path.abspath(self.filepath)+'\n'
|
||||
'| Architecture:\t\t'+self.arch+'\n'
|
||||
'| Timestamp:\t\t'+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'\n')
|
||||
|
||||
if(tp_list):
|
||||
output += self.create_TP_list(horizLine)
|
||||
if(pr_sched):
|
||||
output += '\n\n'
|
||||
sched = Scheduler(self.arch, self.instrForms)
|
||||
schedOutput,portBinding = sched.schedule()
|
||||
binding = sched.get_port_binding(portBinding)
|
||||
output += sched.get_report_info()+'\n'+binding+'\n\n'+schedOutput
|
||||
blockTP = round(max(portBinding), 2)
|
||||
output += 'Total number of estimated throughput: '+str(blockTP)
|
||||
return output
|
||||
|
||||
|
||||
def create_horiz_sep(self):
|
||||
"""
|
||||
Calculate and return horizontal separator line.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Horizontal separator line
|
||||
"""
|
||||
return '-'*(self.longestInstr+8)
|
||||
|
||||
|
||||
def create_TP_list(self,horizLine):
|
||||
"""
|
||||
Create list of instruction forms with the proper throughput value.
|
||||
|
||||
Parameter
|
||||
---------
|
||||
horizLine : str
|
||||
Calculated horizontal line for nice alignement
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Throughput list output for printing
|
||||
"""
|
||||
warning = False
|
||||
ws = ' '*(len(horizLine)-23)
|
||||
|
||||
output = ('\n| INSTRUCTION'+ws+'CLOCK CYCLES\n'
|
||||
'| '+horizLine+'\n|\n')
|
||||
# Check for the throughput data in CSV
|
||||
for elem in self.instrForms:
|
||||
extension = ''
|
||||
opExt = []
|
||||
for i in range(1, len(elem)-1):
|
||||
optmp = ''
|
||||
if(isinstance(elem[i], Register) and elem[i].reg_type == 'GPR'):
|
||||
optmp = 'r'+str(elem[i].size)
|
||||
elif(isinstance(elem[i], MemAddr)):
|
||||
optmp = 'mem'
|
||||
else:
|
||||
optmp = str(elem[i]).lower()
|
||||
opExt.append(optmp)
|
||||
operands = '_'.join(opExt)
|
||||
# Now look up the value in the dataframe
|
||||
# Check if there is a stored throughput value in database
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", 'This pattern has match groups')
|
||||
series = self.df['instr'].str.contains(elem[0]+'-'+operands)
|
||||
if( True in series.values):
|
||||
# It's a match!
|
||||
notFound = False
|
||||
try:
|
||||
tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0]
|
||||
except IndexError:
|
||||
# Something went wrong
|
||||
print('Error while fetching data from database')
|
||||
continue
|
||||
# Did not found the exact instruction form.
|
||||
# Try to find the instruction form for register operands only
|
||||
else:
|
||||
opExtRegs = []
|
||||
for operand in opExt:
|
||||
try:
|
||||
regTmp = Register(operand)
|
||||
opExtRegs.append(True)
|
||||
except KeyError:
|
||||
opExtRegs.append(False)
|
||||
pass
|
||||
if(not True in opExtRegs):
|
||||
# No register in whole instruction form. How can I find out what regsize we need?
|
||||
print('Feature not included yet: ', end='')
|
||||
print(elem[0]+' for '+operands)
|
||||
tp = 0
|
||||
notFound = True
|
||||
warning = True
|
||||
|
||||
numWhitespaces = self.longestInstr-len(elem[-1])
|
||||
ws = ' '*numWhitespaces+'| '
|
||||
n_f = ' '*(5-len(str(tp)))+'*'
|
||||
data = '| '+elem[-1]+ws+str(tp)+n_f+'\n'
|
||||
output += data
|
||||
continue
|
||||
if(opExtRegs[0] == False):
|
||||
# Instruction stores result in memory. Check for storing in register instead.
|
||||
if(len(opExt) > 1):
|
||||
if(opExtRegs[1] == True):
|
||||
opExt[0] = opExt[1]
|
||||
elif(len(optExt > 2)):
|
||||
if(opExtRegs[2] == True):
|
||||
opExt[0] = opExt[2]
|
||||
if(len(opExtRegs) == 2 and opExtRegs[1] == False):
|
||||
# Instruction loads value from memory and has only two operands. Check for
|
||||
# loading from register instead
|
||||
if(opExtRegs[0] == True):
|
||||
opExt[1] = opExt[0]
|
||||
if(len(opExtRegs) == 3 and opExtRegs[2] == False):
|
||||
# Instruction loads value from memory and has three operands. Check for loading
|
||||
# from register instead
|
||||
opExt[2] = opExt[0]
|
||||
operands = '_'.join(opExt)
|
||||
# Check for register equivalent instruction
|
||||
series = self.df['instr'].str.contains(elem[0]+'-'+operands)
|
||||
if( True in series.values):
|
||||
# It's a match!
|
||||
notFound = False
|
||||
try:
|
||||
tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0]
|
||||
|
||||
except IndexError:
|
||||
# Something went wrong
|
||||
print('Error while fetching data from database')
|
||||
continue
|
||||
# Did not found the register instruction form. Set warning and go on with
|
||||
# throughput 0
|
||||
else:
|
||||
tp = 0
|
||||
notFound = True
|
||||
warning = True
|
||||
# Check the alignement again
|
||||
numWhitespaces = self.longestInstr-len(elem[-1])
|
||||
ws = ' '*numWhitespaces+'| '
|
||||
n_f = ''
|
||||
if(notFound):
|
||||
n_f = ' '*(5-len(str(tp)))+'*'
|
||||
data = '| '+elem[-1]+ws+'{:3.2f}'.format(tp)+n_f+'\n'
|
||||
output += data
|
||||
# Finally end the list of throughput values
|
||||
numWhitespaces = self.longestInstr-27
|
||||
ws = ' '+' '*numWhitespaces
|
||||
output += '| '+horizLine+'\n'
|
||||
if(warning):
|
||||
output += ('\n\n* There was no throughput value found '
|
||||
'for the specific instruction form.'
|
||||
'\n Please create a testcase via the create_testcase-method '
|
||||
'or add a value manually.')
|
||||
return output
|
||||
|
||||
|
||||
##------------------------------------------------------------------------------
|
||||
##------------Main method--------------
|
||||
def main():
|
||||
# Parse args
|
||||
parser = argparse.ArgumentParser(description='Analyzes a marked innermost loop snippet for a given architecture type and prints out the estimated average throughput')
|
||||
parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.1')
|
||||
parser.add_argument('--arch', dest='arch', type=str, help='define architecture (SNB, IVB, HSW, BDW, SKL)')
|
||||
parser.add_argument('filepath', type=str, help='path to object (Binary, ASM, CSV)')
|
||||
group = parser.add_mutually_exclusive_group(required=False)
|
||||
group.add_argument('-i', '--include-ibench', dest='incl', action='store_true', help='includes the given values in form of the output of ibench in the database')
|
||||
group.add_argument('--iaca', dest='iaca', action='store_true', help='search for IACA markers instead the OSACA marker')
|
||||
group.add_argument('-m', '--insert-marker', dest='insert_marker', action='store_true', help='try to find blocks probably corresponding to loops in assembly and insert IACA marker')
|
||||
|
||||
# Store args in global variables
|
||||
inp = parser.parse_args()
|
||||
if(inp.arch is None and inp.insert_marker is None):
|
||||
raise ValueError('Please specify an architecture')
|
||||
if(inp.arch is not None):
|
||||
arch = inp.arch.upper()
|
||||
filepath = inp.filepath
|
||||
inclIbench = inp.incl
|
||||
iacaFlag = inp.iaca
|
||||
insert_m = inp.insert_marker
|
||||
|
||||
# Create Osaca object
|
||||
if(inp.arch is not None):
|
||||
osaca = Osaca(arch, filepath)
|
||||
|
||||
if(inclIbench):
|
||||
osaca.include_ibench()
|
||||
elif(iacaFlag):
|
||||
osaca.inspect_with_iaca()
|
||||
elif(insert_m):
|
||||
try:
|
||||
from kerncraft import iaca
|
||||
except ImportError:
|
||||
print('ImportError: Module kerncraft not installed. Use \'pip install --user kerncraft\' for installation.\nFor more information see https://github.com/RRZE-HPC/kerncraft')
|
||||
sys.exit()
|
||||
iaca.iaca_instrumentation(input_file=filepath, output_file=filepath,
|
||||
block_selection='manual', pointer_increment=1)
|
||||
else:
|
||||
osaca.inspect_binary()
|
||||
|
||||
|
||||
##------------Main method--------------
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
109
osaca/param.py
Executable file
109
osaca/param.py
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/apps/python/3.5-anaconda/bin/python
|
||||
class Parameter(object):
|
||||
type_list = ["REG", "MEM", "IMD", "LBL", "NONE"]
|
||||
def __init__(self, ptype, name="NONE"):
|
||||
self.ptype = ptype.upper()
|
||||
if(self.ptype not in self.type_list):
|
||||
raise NameError("Type not supported: "+ptype)
|
||||
|
||||
def __str__(self):
|
||||
'''returns string representation'''
|
||||
if(self.ptype == "NONE"):
|
||||
return ""
|
||||
else:
|
||||
return self.ptype
|
||||
|
||||
class MemAddr(Parameter):
|
||||
segment_regs = ["CS", "DS", "SS", "ES", "FS", "GS"]
|
||||
scales = [1, 2, 4, 8]
|
||||
def __init__(self, name):
|
||||
self.sreg = False
|
||||
self.offset = False
|
||||
self.base = False
|
||||
self.index = False
|
||||
self.scale = False
|
||||
if(':' in name):
|
||||
if(name[1:name.index(':')].upper() not in self.segment_regs):
|
||||
raise NameError("Type not supported: "+name)
|
||||
self.sreg = True
|
||||
self.offset = True
|
||||
if('(' not in name or ('(' in name and name.index('(') != 0)):
|
||||
self.offset = True
|
||||
if('(' in name):
|
||||
self.parentheses = name[name.index('(')+1:-1]
|
||||
self.commacnt = self.parentheses.count(',')
|
||||
if(self.commacnt == 0):
|
||||
self.base = True
|
||||
elif(self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1):
|
||||
self.base = True
|
||||
self.index = True
|
||||
elif(self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales):
|
||||
self.base = True
|
||||
self.index = True
|
||||
self.scale = True
|
||||
else:
|
||||
raise NameError("Type not supported: "+name)
|
||||
|
||||
def __str__(self):
|
||||
'''returns string representation'''
|
||||
mem_format = "MEM("
|
||||
if(self.sreg):
|
||||
mem_format += "sreg:"
|
||||
if(self.offset):
|
||||
mem_format += "offset"
|
||||
if(self.base and not self.index):
|
||||
mem_format += "(base)"
|
||||
elif(self.base and self.index and self.scale):
|
||||
mem_format += "(base, index, scale)"
|
||||
mem_format += ")"
|
||||
return mem_format
|
||||
|
||||
|
||||
|
||||
class Register(Parameter):
|
||||
sizes = {
|
||||
#General Purpose Registers
|
||||
"AH":(8,"GPR"), "AL":(8,"GPR"), "BH":(8,"GPR"), "BL":(8,"GPR"), "CH":(8,"GPR"), "CL":(8,"GPR"), "DH":(8,"GPR"), "DL":(8,"GPR"), "BPL":(8,"GPR"), "SIL":(8,"GPR"), "DIL":(8,"GPR"), "SPL":(8,"GPR"), "R8L":(8,"GPR"), "R9L":(8,"GPR"), "R10L":(8,"GPR"), "R11L":(8,"GPR"), "R12L":(8,"GPR"), "R13L":(8,"GPR"), "R14L":(8,"GPR"), "R15L":(8,"GPR"),
|
||||
"R8B":(8,"GPR"),"R9B":(8,"GPR"),"R10B":(8,"GPR"),"R11B":(8,"GPR"),"R12B":(8,"GPR"),"R13B":(8,"GPR"),"R14B":(8,"GPR"),"R15B":(8,"GPR"),
|
||||
"AX":(16,"GPR"), "BC":(16,"GPR"), "CX":(16,"GPR"), "DX":(16,"GPR"), "BP":(16,"GPR"), "SI":(16,"GPR"), "DI":(16,"GPR"), "SP":(16,"GPR"), "R8W":(16,"GPR"), "R9W":(16,"GPR"), "R10W":(16,"GPR"), "R11W":(16,"GPR"), "R12W":(16,"GPR"), "R13W":(16,"GPR"), "R14W":(16,"GPR"), "R15W":(16,"GPR"),
|
||||
"EAX":(32,"GPR"), "EBX":(32,"GPR"), "ECX":(32,"GPR"), "EDX":(32,"GPR"), "EBP":(32,"GPR"), "ESI":(32,"GPR"), "EDI":(32,"GPR"), "ESP":(32,"GPR"), "R8D":(32,"GPR"), "R9D":(32,"GPR"), "R10D":(32,"GPR"), "R11D":(32,"GPR"), "R12D":(32,"GPR"), "R13D":(32,"GPR"), "R14D":(32,"GPR"), "R15D":(32,"GPR"),
|
||||
"RAX":(64,"GPR"), "RBX":(64,"GPR"), "RCX":(64,"GPR"), "RDX":(64,"GPR"), "RBP":(64,"GPR"), "RSI":(64,"GPR"), "RDI":(64,"GPR"), "RSP":(64,"GPR"), "R8":(64,"GPR"), "R9":(64,"GPR"), "R10":(64,"GPR"), "R11":(64,"GPR"), "R12":(64,"GPR"), "R13":(64,"GPR"), "R14":(64,"GPR"), "R15":(64,"GPR"),
|
||||
"CS":(16,"GPR"), "DS":(16,"GPR"), "SS":(16,"GPR"), "ES":(16,"GPR"), "FS":(16,"GPR"), "GS":(16,"GPR"),
|
||||
"EFLAGS":(32,"GPR"), "RFLAGS":(64,"GPR"), "EIP":(32,"GPR"), "RIP":(64,"GPR"),
|
||||
#FPU Registers
|
||||
"ST0":(80,"FPU"),"ST1":(80,"FPU"),"ST2":(80,"FPU"),"ST3":(80,"FPU"),"ST4":(80,"FPU"),"ST5":(80,"FPU"),"ST6":(80,"FPU"),"ST7":(80,"FPU"),
|
||||
#MMX Registers
|
||||
"MM0":(64,"MMX"),"MM1":(64,"MMX"),"MM2":(64,"MMX"),"MM3":(64,"MMX"),"MM4":(64,"MMX"),"MM5":(64,"MMX"),"MM6":(64,"MMX"),"MM7":(64,"MMX"),
|
||||
#XMM Registers
|
||||
"XMM0":(128,"XMM"),"XMM1":(128,"XMM"),"XMM2":(128,"XMM"),"XMM3":(128,"XMM"),"XMM4":(128,"XMM"),"XMM5":(128,"XMM"),"XMM6":(128,"XMM"),"XMM7":(128,"XMM"), "XMM8":(128,"XMM"), "XMM9":(128,"XMM"), "XMM10":(128,"XMM"), "XMM11":(128,"XMM"), "XMM12":(128,"XMM"), "XMM13":(128,"XMM"), "XMM14":(128,"XMM"), "XMM15":(128,"XMM"), "XMM16":(128,"XMM"), "XMM17":(128,"XMM"), "XMM18":(128,"XMM"), "XMM19":(128,"XMM"), "XMM20":(128,"XMM"), "XMM21":(128,"XMM"), "XMM22":(128,"XMM"), "XMM23":(128,"XMM"), "XMM24":(128,"XMM"), "XMM25":(128,"XMM"), "XMM26":(128,"XMM"), "XMM27":(128,"XMM"), "XMM28":(128,"XMM"), "XMM29":(128,"XMM"), "XMM30":(128,"XMM"), "XMM31":(128,"XMM"),
|
||||
#YMM Registers
|
||||
"YMM0":(256,"YMM"),"YMM1":(256,"YMM"),"YMM2":(256,"YMM"),"YMM3":(256,"YMM"),"YMM4":(256,"YMM"),"YMM5":(256,"YMM"),"YMM6":(256,"YMM"),"YMM7":(256,"YMM"), "YMM8":(256,"YMM"), "YMM9":(256,"YMM"), "YMM10":(256,"YMM"), "YMM11":(256,"YMM"), "YMM12":(256,"YMM"), "YMM13":(256,"YMM"), "YMM14":(256,"YMM"), "YMM15":(256,"YMM"), "YMM16":(256,"YMM"), "YMM17":(256,"YMM"), "YMM18":(256,"YMM"), "YMM19":(256,"YMM"), "YMM20":(256,"YMM"), "YMM21":(256,"YMM"), "YMM22":(256,"YMM"), "YMM23":(256,"YMM"), "YMM24":(256,"YMM"), "YMM25":(256,"YMM"), "YMM26":(256,"YMM"), "YMM27":(256,"YMM"), "YMM28":(256,"YMM"), "YMM29":(256,"YMM"), "YMM30":(256,"YMM"), "YMM31":(256,"YMM"),
|
||||
#ZMM Registers
|
||||
"ZMM0":(512,"ZMM"),"ZMM1":(512,"ZMM"),"ZMM2":(512,"ZMM"),"ZMM3":(512,"ZMM"),"ZMM4":(512,"ZMM"),"ZMM5":(512,"ZMM"),"ZMM6":(512,"ZMM"),"ZMM7":(512,"ZMM"), "ZMM8":(512,"ZMM"), "ZMM9":(512,"ZMM"), "ZMM10":(512,"ZMM"), "ZMM11":(512,"ZMM"), "ZMM12":(512,"ZMM"), "ZMM13":(512,"ZMM"), "ZMM14":(512,"ZMM"), "ZMM15":(512,"ZMM"), "ZMM16":(512,"ZMM"), "ZMM17":(512,"ZMM"), "ZMM18":(512,"ZMM"), "ZMM19":(512,"ZMM"), "ZMM20":(512,"ZMM"), "ZMM21":(512,"ZMM"), "ZMM22":(512,"ZMM"), "ZMM23":(512,"ZMM"), "ZMM24":(512,"ZMM"), "ZMM25":(512,"ZMM"), "ZMM26":(512,"ZMM"), "ZMM27":(512,"ZMM"), "ZMM28":(512,"ZMM"), "ZMM29":(512,"ZMM"), "ZMM30":(512,"ZMM"), "ZMM31":(512,"ZMM"),
|
||||
#Opmask Register
|
||||
"K0":(64,"K"), "K1":(64,"K"), "K2":(64,"K"), "K3":(64,"K"), "K4":(64,"K"), "K5":(64,"K"), "K6":(64,"K"), "K7":(64,"K"),
|
||||
#Bounds Registers
|
||||
"BND0":(128,"BND"),"BND1":(128,"BND"),"BND2":(128,"BND"),"BND3":(128,"BND"),
|
||||
#Registers in gerneral
|
||||
"R8":(8,"GPR"), "R16":(16,"GPR"), "R32":(32,"GPR"), "R64":(64,"GPR"), "FPU":(80,"FPU"), "MMX":(64,"MMX"), "XMM":(128,"XMM"), "YMM":(256,"YMM"), "ZMM":(512,"ZMM"), "K":(64,"K"), "BND":(128,"BND")
|
||||
}
|
||||
|
||||
def __init__(self,name,mask=False):
|
||||
self.name = name.upper()
|
||||
self.mask = mask
|
||||
# try:
|
||||
if[name in self.sizes]:
|
||||
self.size = self.sizes[self.name][0]
|
||||
self.reg_type = self.sizes[self.name][1]
|
||||
else:
|
||||
print(lncnt)
|
||||
raise NameError("Register name not in dictionary: "+self.name)
|
||||
# except KeyError:
|
||||
# print(lncnt)
|
||||
|
||||
def __str__(self):
|
||||
'''returns string representation'''
|
||||
opmask = ""
|
||||
if(self.mask):
|
||||
opmask = "{opmask}"
|
||||
return(self.reg_type+opmask)
|
||||
367
osaca/testcase.py
Executable file
367
osaca/testcase.py
Executable file
@@ -0,0 +1,367 @@
|
||||
#!/apps/python/3.5-anaconda/bin/python
|
||||
|
||||
import os
|
||||
from subprocess import call
|
||||
from math import ceil
|
||||
from param import *
|
||||
|
||||
class Testcase(object):
|
||||
|
||||
##------------------Constant variables--------------------------
|
||||
# Lookup tables for regs
|
||||
gprs64 = ['rax', 'rbx', 'rcx', 'rdx', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15']
|
||||
gprs32 = ['eax', 'ebx', 'ecx', 'edx', 'r9d', 'r10d', 'r11d', 'r12d', 'r13d', 'r14d', 'r15d']
|
||||
gprs16 = ['ax', 'bx', 'cx', 'dx', 'r9w', 'r10w', 'r11w', 'r12w', 'r13w', 'r14w', 'r15w']
|
||||
gprs8 = ['al', 'bl', 'cl', 'dl', 'r9l', 'r10l', 'r11l', 'r12l', 'r13l', 'r14l', 'r15l']
|
||||
fpus = ['st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', 'st7']
|
||||
mmxs = ['mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7']
|
||||
ks = ['k0', 'k1', 'k2', 'k3', 'k4', 'k5', 'k6', 'k7']
|
||||
bnds = ['bnd0', 'bnd1', 'bnd2', 'bnd3', 'bnd4', 'bnd5', 'bnd6', 'bnd7']
|
||||
xmms = ['xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'xmm8', 'xmm9',
|
||||
'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15']
|
||||
ymms = ['ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', 'ymm6', 'ymm7', 'ymm8', 'ymm9',
|
||||
'ymm10', 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15']
|
||||
zmms = ['zmm0', 'zmm1', 'zmm2', 'zmm3', 'zmm4', 'zmm5', 'zmm6', 'zmm7', 'zmm8', 'zmm9',
|
||||
'zmm10', 'zmm11', 'zmm12', 'zmm13', 'zmm14', 'zmm15']
|
||||
# Lookup table for memory
|
||||
mems = ['[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]']
|
||||
# Lookup table for immediates
|
||||
imds = ['1', '2', '13', '22', '8', '78', '159', '222', '3', '9', '5', '55', '173', '317', '254', '255']
|
||||
# TODO Differentiate between AVX512 (with additional xmm16-31) and the rest
|
||||
# ...
|
||||
# ...
|
||||
# end TODO
|
||||
|
||||
ops = {'gpr64':gprs64, 'gpr32':gprs32, 'gpr16':gprs16, 'gpr8':gprs8, 'fpu':fpus, 'mmx':mmxs, 'k':ks, 'bnd':bnds, 'xmm':xmms, 'ymm':ymms, 'zmm':zmms, 'mem':mems, 'imd':imds}
|
||||
|
||||
# Create Single Precision 1.0
|
||||
sp1 = '\t\t# create SP 1.0\n'
|
||||
sp1 += '\t\tvpcmpeqw xmm0, xmm0, xmm0\n'
|
||||
sp1 += '\t\tvpslld xmm0, xmm0, 25\t\t\t# logical left shift: 11111110..0 (25=32-(8-1))\n'
|
||||
sp1 += '\t\tvpsrld xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading mantissa bit is zero\n'
|
||||
sp1 += '\t\t# copy SP 1.0\n'
|
||||
# Create Double Precision 1.0
|
||||
dp1 = '\t\t# create DP 1.0\n'
|
||||
dp1 += '\t\tvpcmpeqw xmm0, xmm0, xmm0\t\t# all ones\n'
|
||||
dp1 += '\t\tvpsllq xmm0, xmm0, 54\t\t\t# logical left shift: 11111110..0 (54=64-(10-1))\n'
|
||||
dp1 += '\t\tvpsrlq xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading mantissa bit is zero\n'
|
||||
# Create epilogue
|
||||
done = ('done:\n'
|
||||
'\t\tmov\trsp, rbp\n'
|
||||
'\t\tpop\trbp\n'
|
||||
'\t\tret\n'
|
||||
'.size latency, .-latency')
|
||||
##----------------------------------------------------------------
|
||||
|
||||
# Constructor
|
||||
def __init__(self, _mnemonic, _param_list, _num_instr='32'):
|
||||
self.instr = _mnemonic.lower()
|
||||
self.param_list = _param_list
|
||||
# num_instr must be an even number
|
||||
self.num_instr = str(ceil(int(_num_instr)/2)*2)
|
||||
# Check for the number of operands and initialise the GPRs if necessary
|
||||
self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = self.__define_operands()
|
||||
self.num_operands = len(self.param_list)
|
||||
|
||||
# Create asm header
|
||||
self.def_instr, self.ninstr, self.init, self.expand = self.__define_header()
|
||||
# Create latency and throughput loop
|
||||
self.loop_lat = self.__define_loop_lat()
|
||||
self.loop_thrpt = self.__define_loop_thrpt()
|
||||
# Create extension for testcase name
|
||||
sep1 = '_' if (self.num_operands > 1) else ''
|
||||
sep2 = '_' if (self.num_operands > 2) else ''
|
||||
self.extension = ('-'+(self.op_a if ('gpr' not in self.op_a) else 'r' + self.op_a[3:]) + sep1 + (self.op_b if ('gpr' not in self.op_b) else 'r'+self.op_b[3:]) + sep2 + (self.op_c if ('gpr' not in self.op_c) else 'r'+self.op_c[3:]))
|
||||
|
||||
|
||||
def write_testcase(self, TP=True, LT=True):
|
||||
"""
|
||||
Write testcase for class attributes in a file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
TP : bool
|
||||
Controls if throughput testcase should be written
|
||||
(default True)
|
||||
|
||||
LT : bool
|
||||
Controls if latency testcase should be written
|
||||
(default True)
|
||||
"""
|
||||
if(LT):
|
||||
# Write latency file
|
||||
call(['mkdir', '-p', os.path.dirname(__file__)+'/../testcases'])
|
||||
f = open(os.path.dirname(__file__)+'/../testcases/'+self.instr+self.extension+'.S', 'w')
|
||||
data = (self.def_instr+self.ninstr+self.init+self.dp1+self.expand+self.gprPush+self.zeroGPR+self.copy+self.loop_lat+self.gprPop+self.done)
|
||||
f.write(data)
|
||||
f.close()
|
||||
if(TP):
|
||||
# Write throughput file
|
||||
f = open(os.path.dirname(__file__)+'/../testcases/'+self.instr+self.extension+'-TP.S', 'w')
|
||||
data = (self.def_instr+self.ninstr+self.init+self.dp1+self.expand+self.gprPush+self.zeroGPR+self.copy+self.loop_thrpt+self.gprPop+self.done)
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
||||
|
||||
# Check operands
|
||||
def __define_operands(self):
|
||||
"""
|
||||
Check for the number of operands and initialise the GPRs if necessary.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, str, str, str, str, str)
|
||||
String tuple containing types of operands and if needed push/pop operations, the
|
||||
initialisation of general purpose regs and the copy if registers.
|
||||
"""
|
||||
oprnds = self.param_list
|
||||
op_a, op_b, op_c = ('', '', '')
|
||||
gprPush, gprPop, zeroGPR = ('', '', '')
|
||||
if(isinstance(oprnds[0], Register)):
|
||||
op_a = oprnds[0].reg_type.lower()
|
||||
elif(isinstance(oprnds[0], MemAddr)):
|
||||
op_a = 'mem'
|
||||
elif(isinstance(oprnds[0], Parameter) and str(oprnds[0]) == 'IMD'):
|
||||
op_a = 'imd'
|
||||
if(op_a == 'gpr'):
|
||||
gprPush, gprPop, zeroGPR = self.__initialise_gprs()
|
||||
op_a += str(oprnds[0].size)
|
||||
if(len(oprnds) > 1):
|
||||
if(isinstance(oprnds[1], Register)):
|
||||
op_b = oprnds[1].reg_type.lower()
|
||||
elif(isinstance(oprnds[1], MemAddr)):
|
||||
op_b = 'mem'
|
||||
elif(isinstance(oprnds[1], Parameter) and str(oprnds[1]) == 'IMD'):
|
||||
op_b = 'imd'
|
||||
if(op_b == 'gpr'):
|
||||
op_b += str(oprnds[1].size)
|
||||
if('gpr' not in op_a):
|
||||
gprPush, gprPop, zeroGPR = self.__initialise_gprs()
|
||||
if(len(oprnds) == 3):
|
||||
if(isinstance(oprnds[2], Register)):
|
||||
op_c = oprnds[2].reg_type.lower()
|
||||
elif(isinstance(oprnds[2], MemAddr)):
|
||||
op_c = 'mem'
|
||||
elif(isinstance(oprnds[2], Parameter) and str(oprnds[2]) == 'IMD'):
|
||||
op_c = 'imd'
|
||||
if(op_c == 'gpr'):
|
||||
op_c += str(oprnds[2].size)
|
||||
if(('gpr' not in op_a) and ('gpr'not in op_b)):
|
||||
gprPush, gprPop, zeroGPR = self.__initialise_gprs()
|
||||
if(len(oprnds) == 1 and isinstance(oprnds[0], Register)):
|
||||
copy = self.__copy_regs(oprnds[0])
|
||||
elif(len(oprnds) > 1 and isinstance(oprnds[1], Register)):
|
||||
copy = self.__copy_regs(oprnds[1])
|
||||
elif(len(oprnds) > 2 and isinstance(oprnds[2], Register)):
|
||||
copy = self.__copy_regs(oprnds[1])
|
||||
else:
|
||||
copy = ''
|
||||
return (op_a, op_b, op_c, gprPush, gprPop, zeroGPR, copy)
|
||||
|
||||
|
||||
def __initialise_gprs(self):
|
||||
"""
|
||||
Initialise eleven general purpose registers and set them to zero.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, str, str)
|
||||
String tuple for push, pop and initalisation operations
|
||||
"""
|
||||
|
||||
gprPush = ''
|
||||
gprPop = ''
|
||||
zeroGPR = ''
|
||||
for reg in self.gprs64:
|
||||
gprPush += '\t\tpush {}\n'.format(reg)
|
||||
for reg in reversed(self.gprs64):
|
||||
gprPop += '\t\tpop {}\n'.format(reg)
|
||||
for reg in self.gprs64:
|
||||
zeroGPR += '\t\txor {}, {}\n'.format(reg, reg)
|
||||
return (gprPush, gprPop, zeroGPR)
|
||||
|
||||
|
||||
# Copy created values in specific register
|
||||
def __copy_regs(self, reg):
|
||||
"""
|
||||
Copy created values in specific register.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
reg : Register
|
||||
Register for copying the value
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String containing the copy instructions
|
||||
"""
|
||||
copy = '\t\t# copy DP 1.0\n'
|
||||
# Different handling for GPR, MMX and SSE/AVX registers
|
||||
if(reg.reg_type == 'GPR'):
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][1])
|
||||
copy += '\t\t# Create DP 2.0\n'
|
||||
copy += '\t\tadd {}, {}\n'.format(self.ops['gpr64'][1], self.ops['gpr64'][0])
|
||||
copy += '\t\t# Create DP 0.5\n'
|
||||
copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\tmovq {}, {}\n'.format(self.ops['gpr64'][2], self.ops['gpr64'][0])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
|
||||
elif(reg.reg_type == 'MMX'):
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][0])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][1])
|
||||
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\t# Create DP 2.0\n'
|
||||
copy += '\t\tadd {}, {}\n'.format(ops['mmx'][1], ops['mmx'][0])
|
||||
copy += '\t\t# Create DP 0.5\n'
|
||||
copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0])
|
||||
copy += '\t\tmovq {}, {}\n'.format(self.ops['mmx'][2], self.ops['gpr64'][0])
|
||||
elif(reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM'):
|
||||
key = reg.reg_type.lower()
|
||||
copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][0], self.ops[key][0])
|
||||
copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][1], self.ops[key][0])
|
||||
copy += '\t\t# Create DP 2.0\n'
|
||||
copy += '\t\tvaddpd {}, {}, {}\n'.format(self.ops[key][1], self.ops[key][1], self.ops[key][1])
|
||||
copy += '\t\t# Create DP 0.5\n'
|
||||
copy += '\t\tvdivpd {}, {}, {}\n'.format(self.ops[key][2], self.ops[key][0], self.ops[key][1])
|
||||
else:
|
||||
copy = ''
|
||||
return copy
|
||||
|
||||
|
||||
def __define_header(self):
|
||||
"""
|
||||
Define header.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, str, str, str)
|
||||
String tuple containing the header, value initalisations and extensions
|
||||
"""
|
||||
def_instr = '#define INSTR '+self.instr+'\n'
|
||||
ninstr = '#define NINST '+self.num_instr+'\n'
|
||||
pi = ('PI:\n'
|
||||
'.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #128 bit
|
||||
'0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #256 bit
|
||||
'0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #384 bit
|
||||
'0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9\n') #512 bit
|
||||
init = ('#define N edi\n' \
|
||||
'#define i r8d\n\n\n'
|
||||
'.intel_syntax noprefix\n'
|
||||
'.globl ninst\n'
|
||||
'.data\n'
|
||||
'ninst:\n'
|
||||
'.long NINST\n'
|
||||
'.align 32\n'
|
||||
+pi+
|
||||
'.text\n'
|
||||
'.globl latency\n'
|
||||
'.type latency, @function\n'
|
||||
'.align 32\n'
|
||||
'latency:\n'
|
||||
'\t\tpush rbp\n'
|
||||
'\t\tmov rbp, rsp\n'
|
||||
'\t\txor i, i\n'
|
||||
'\t\ttest N, N\n'
|
||||
'\t\tjle done\n')
|
||||
# Expand to AVX(512) if necessary
|
||||
expand = ''
|
||||
if(self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm'):
|
||||
expand = ('\t\t# expand from SSE to AVX\n'
|
||||
'\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n')
|
||||
if(self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm'):
|
||||
expand = ('\t\t# expand from SSE to AVX\n'
|
||||
'\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n'
|
||||
'\t\t# expand from AVX to AVX512\n'
|
||||
'\t\tvinsert64x4 zmm0, zmm0, ymm0, 0x1\n')
|
||||
return (def_instr, ninstr, init, expand)
|
||||
|
||||
|
||||
def __define_loop_lat(self):
|
||||
"""
|
||||
Create latency loop.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Latency loop as string
|
||||
"""
|
||||
loop_lat = ('loop:\n'
|
||||
'\t\tinc i\n')
|
||||
if(self.num_operands == 1):
|
||||
for i in range(0, int(self.num_instr)):
|
||||
loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.op_a][0])
|
||||
elif(self.num_operands == 2 and self.op_a == self.op_b):
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1])
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1], self.ops[self.op_b][0])
|
||||
elif(self.num_operands == 2 and self.op_a != self.op_b):
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0])
|
||||
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0])
|
||||
elif(self.num_operands == 3 and self.op_a == self.op_b):
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1], self.ops[self.op_c][0])
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], self.ops[self.op_b][0], self.ops[self.op_c][0])
|
||||
elif(self.num_operands == 3 and self.op_a == self.op_c):
|
||||
for i in range(0, int(self.num_instr), 2):
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0], self.ops[self.op_c][0])
|
||||
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], self.ops[self.op_b][0], self.ops[self.op_c][0])
|
||||
loop_lat += ('\t\tcmp i, N\n'
|
||||
'\t\tjl loop\n')
|
||||
return loop_lat
|
||||
|
||||
|
||||
def __define_loop_thrpt(self):
|
||||
"""
|
||||
Create throughput loop.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Throughput loop as string
|
||||
"""
|
||||
loop_thrpt = ('loop:\n'
|
||||
'\t\tinc i\n')
|
||||
ext = ''
|
||||
ext1 = False
|
||||
ext2 = False
|
||||
if(self.num_operands == 2):
|
||||
ext1 = True
|
||||
if(self.num_operands == 3):
|
||||
ext1 = True
|
||||
ext2 = True
|
||||
for i in range(0, int(self.num_instr)):
|
||||
if(ext1):
|
||||
ext = ', {}'.format(self.ops[self.op_b][i%3])
|
||||
if(ext2):
|
||||
ext += ', {}'.format(self.ops[self.op_c][i%3])
|
||||
regNum = (i%(len(self.ops[self.op_a])-3))+3
|
||||
loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.op_a][regNum], ext)
|
||||
loop_thrpt += ('\t\tcmp i, N\n'
|
||||
'\t\tjl loop\n')
|
||||
return loop_thrpt
|
||||
|
||||
|
||||
def __is_in_dir(self):
|
||||
"""
|
||||
Check if testcases with the same name already exist in testcase
|
||||
directory.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(bool, bool)
|
||||
True if file is in directory
|
||||
False if file is not in directory
|
||||
While the first value stands for the throughput testcase
|
||||
and the second value stands for the latency testcase
|
||||
"""
|
||||
TP = False
|
||||
LT = False
|
||||
name = self.instr+self.extension
|
||||
for root, dirs, files in os.walk(os.path.dirname(__file__)+'/testcases'):
|
||||
if((name+'-TP.S') in files):
|
||||
TP = True
|
||||
if name+'.S' in files:
|
||||
LT = True
|
||||
return (TP,LT)
|
||||
Reference in New Issue
Block a user