From ea0a4e6cb352c897f67d5b75f4276cf9536c681a Mon Sep 17 00:00:00 2001 From: Jan Laukemann Date: Sat, 22 Jul 2017 18:29:28 +0200 Subject: [PATCH] added memory testcase functionality --- Testcase.py | 137 +++++++++++++++++++++---------------- create_testcase.py | 40 +++++++++++ data/ivb_throughput.csv | 90 +++++++++++++----------- get_instr.py | 148 ++++++++-------------------------------- osaca.py | 13 +++- 5 files changed, 207 insertions(+), 221 deletions(-) create mode 100755 create_testcase.py diff --git a/Testcase.py b/Testcase.py index a70965d..e5be1ec 100755 --- a/Testcase.py +++ b/Testcase.py @@ -3,7 +3,7 @@ import os from subprocess import call from math import ceil -from Params import Register +from Params import * class Testcase(object): @@ -23,12 +23,14 @@ class Testcase(object): 'ymm10', 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15'] zmms = ['zmm0', 'zmm1', 'zmm2', 'zmm3', 'zmm4', 'zmm5', 'zmm6', 'zmm7', 'zmm8', 'zmm9', 'zmm10', 'zmm11', 'zmm12', 'zmm13', 'zmm14', 'zmm15'] +# Lookup table for memory + mems = ['[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]'] # TODO Differentiate between AVX512 (with additional xmm16-31) and the rest # ... # ... # end TODO - ops = {'gpr64':gprs64, 'gpr32':gprs32, 'gpr16':gprs16, 'gpr8':gprs8, 'fpu':fpus, 'mmx':mmxs, 'k':ks, 'bnd':bnds, 'xmm':xmms, 'ymm':ymms, 'zmm':zmms} + ops = {'gpr64':gprs64, 'gpr32':gprs32, 'gpr16':gprs16, 'gpr8':gprs8, 'fpu':fpus, 'mmx':mmxs, 'k':ks, 'bnd':bnds, 'xmm':xmms, 'ymm':ymms, 'zmm':zmms, 'mem':mems} # Create Single Precision 1.0 sp1 = '\t\t# create SP 1.0\n' @@ -56,8 +58,8 @@ class Testcase(object): # num_instr must be an even number self.num_instr = str(ceil(int(_num_instr)/2)*2) # Check for the number of operands and initialise the GPRs if necessary - self.reg_a, self.reg_b, self.reg_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = self.__define_regs() - self.num_regs = len(self.param_list) + self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = self.__define_operands() + self.num_operands = len(self.param_list) # Create asm header self.def_instr, self.ninstr, self.init, self.expand = self.__define_header() @@ -70,11 +72,11 @@ class Testcase(object): regs = self.param_list extension = '' # Add operands - sep1 = '_' if (self.num_regs > 1) else '' - sep2 = '_' if (self.num_regs > 2) else '' - extension += ('-'+(self.reg_a if ('gpr' not in self.reg_a) else 'r'+self.reg_a[3:]) + sep1 + - (self.reg_b if ('gpr' not in self.reg_b) else 'r'+self.reg_b[3:]) + sep2 + - (self.reg_c if ('gpr' not in self.reg_c) else 'r'+self.reg_c[3:])) + sep1 = '_' if (self.num_operands > 1) else '' + sep2 = '_' if (self.num_operands > 2) else '' + extension += ('-'+(self.op_a if ('gpr' not in self.op_a) else 'r'+self.op_a[3:]) + sep1 + + (self.op_b if ('gpr' not in self.op_b) else 'r'+self.op_b[3:]) + sep2 + + (self.op_c if ('gpr' not in self.op_c) else 'r'+self.op_c[3:])) # Write latency file call(['mkdir', '-p', 'testcases']) f = open('./testcases/'+self.instr+extension+'.S', 'w') @@ -88,33 +90,45 @@ class Testcase(object): f.close() -# Check register - def __define_regs(self): - regs = self.param_list - reg_a, reg_b, reg_c = ('', '', '') +# Check operands + def __define_operands(self): + oprnds = self.param_list + op_a, op_b, op_c = ('', '', '') gprPush, gprPop, zeroGPR = ('', '', '') - reg_a = regs[0].reg_type.lower() - if(reg_a == 'gpr'): + if(isinstance(oprnds[0], Register)): + op_a = oprnds[0].reg_type.lower() + elif(isinstance(oprnds[0], MemAddr)): + op_a = 'mem' + if(op_a == 'gpr'): gprPush, gprPop, zeroGPR = self.__initialise_gprs() - reg_a += str(regs[0].size) - if(len(regs) > 1): - reg_b = regs[1].reg_type.lower() - if(reg_b == 'gpr'): - reg_b += str(regs[1].size) - if('gpr' not in reg_a): + op_a += str(oprnds[0].size) + if(len(oprnds) > 1): + if(isinstance(oprnds[1], Register)): + op_b = oprnds[1].reg_type.lower() + elif(isinstance(oprnds[1], MemAddr)): + op_b = 'mem' + if(op_b == 'gpr'): + op_b += str(oprnds[1].size) + if('gpr' not in op_a): gprPush, gprPop, zeroGPR = self.__initialise_gprs() - if(len(regs) == 3): - reg_c = regs[2].reg_type.lower() - if(reg_c == 'gpr'): - reg_c += str(regs[2].size) - if(('gpr' not in reg_a) and ('gpr'not in reg_b)): + if(len(oprnds) == 3): + if(isinstance(oprnds[2], Register)): + op_c = oprnds[2].reg_type.lower() + elif(isinstance(oprnds[2], MemAddr)): + op_c = 'mem' + if(op_c == 'gpr'): + op_c += str(oprnds[2].size) + if(('gpr' not in op_a) and ('gpr'not in op_b)): gprPush, gprPop, zeroGPR = self.__initialise_gprs() - if(len(regs) == 1): - copy = self.__copy_regs(regs[0]) + if(len(oprnds) == 1 and isinstance(oprnds[0], Register)): + copy = self.__copy_regs(oprnds[0]) + elif(len(oprnds) > 1 and isinstance(oprnds[1], Register)): + copy = self.__copy_regs(oprnds[1]) + elif(len(oprnds) > 2 and isinstance(oprnds[2], Register)): + copy = self.__copy_regs(oprnds[1]) else: - copy = self.__copy_regs(regs[1]) - return (reg_a, reg_b, reg_c, gprPush, gprPop, zeroGPR, copy) - + copy = '' + return (op_a, op_b, op_c, gprPush, gprPop, zeroGPR, copy) # Initialise 11 general purpose registers and set them to zero def __initialise_gprs(self): @@ -168,6 +182,11 @@ class Testcase(object): def __define_header(self): def_instr = '#define INSTR '+self.instr+'\n' ninstr = '#define NINST '+self.num_instr+'\n' + pi = ('PI:\n' + '.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #128 bit + '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #256 bit + '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #384 bit + '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9\n') #512 bit init = ('#define N edi\n' \ '#define i r8d\n\n\n' '.intel_syntax noprefix\n' @@ -175,22 +194,24 @@ class Testcase(object): '.data\n' 'ninst:\n' '.long NINST\n' + '.align 32\n' + +pi+ '.text\n' '.globl latency\n' '.type latency, @function\n' '.align 32\n' 'latency:\n' - '\t\tpush\trbp\n' - '\t\tmov\trbp, rsp\n' - '\t\txor\ti, i\n' - '\t\ttest\tN, N\n' - '\t\tjle\tdone\n') + '\t\tpush rbp\n' + '\t\tmov rbp, rsp\n' + '\t\txor i, i\n' + '\t\ttest N, N\n' + '\t\tjle done\n') # Expand to AVX(512) if necessary expand = '' - if(self.reg_a == 'ymm' or self.reg_b == 'ymm' or self.reg_c == 'ymm'): + if(self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm'): expand = ('\t\t# expand from SSE to AVX\n' '\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n') - if(self.reg_a == 'zmm' or self.reg_b == 'zmm' or self.reg_c == 'zmm'): + if(self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm'): expand = ('\t\t# expand from SSE to AVX\n' '\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n' '\t\t# expand from AVX to AVX512\n' @@ -201,25 +222,25 @@ class Testcase(object): def __define_loop_lat(self): loop_lat = ('loop:\n' '\t\tinc i\n') - if(self.num_regs == 1): + if(self.num_operands == 1): for i in range(0, int(self.num_instr)): - loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.reg_a][0]) - elif(self.num_regs == 2 and self.reg_a == self.reg_b): + loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.op_a][0]) + elif(self.num_operands == 2 and self.op_a == self.op_b): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.reg_a][0], self.ops[self.reg_b][1]) - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.reg_b][1], self.ops[self.reg_b][0]) - elif(self.num_regs == 2 and self.reg_a != self.reg_b): + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1]) + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1], self.ops[self.op_b][0]) + elif(self.num_operands == 2 and self.op_a != self.op_b): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.reg_a][0], self.ops[self.reg_b][0]) - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.reg_a][0], self.ops[self.reg_b][0]) - elif(self.num_regs == 3 and self.reg_a == self.reg_b): + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) + elif(self.num_operands == 3 and self.op_a == self.op_b): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.reg_a][0], self.ops[self.reg_b][1], self.ops[self.reg_c][0]) - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.reg_a][1], self.ops[self.reg_b][0], self.ops[self.reg_c][0]) - elif(self.num_regs == 3 and self.reg_a == self.reg_c): + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1], self.ops[self.op_c][0]) + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], self.ops[self.op_b][0], self.ops[self.op_c][0]) + elif(self.num_operands == 3 and self.op_a == self.op_c): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.reg_a][0], self.ops[self.reg_b][0], self.ops[self.reg_c][0]) - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.reg_a][1], self.ops[self.reg_b][0], self.ops[self.reg_c][0]) + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0], self.ops[self.op_c][0]) + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], self.ops[self.op_b][0], self.ops[self.op_c][0]) loop_lat += ('\t\tcmp i, N\n' '\t\tjl loop\n') return loop_lat @@ -231,18 +252,18 @@ class Testcase(object): ext = '' ext1 = False ext2 = False - if(self.num_regs == 2): + if(self.num_operands == 2): ext1 = True - if(self.num_regs == 3): + if(self.num_operands == 3): ext1 = True ext2 = True for i in range(0, int(self.num_instr)): if(ext1): - ext = ', {}'.format(self.ops[self.reg_b][i%3]) + ext = ', {}'.format(self.ops[self.op_b][i%3]) if(ext2): - ext += ', {}'.format(self.ops[self.reg_c][i%3]) - regNum = i%len(self.ops[self.reg_a]) if (i > 2) else (i+3)%len(self.ops[self.reg_a]) - loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.reg_a][regNum], ext) + ext += ', {}'.format(self.ops[self.op_c][i%3]) + regNum = (i%(len(self.ops[self.op_a])-3))+3 + loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.op_a][regNum], ext) loop_thrpt += ('\t\tcmp i, N\n' '\t\tjl loop\n') return loop_thrpt diff --git a/create_testcase.py b/create_testcase.py new file mode 100755 index 0000000..a349f0a --- /dev/null +++ b/create_testcase.py @@ -0,0 +1,40 @@ +#!/apps/python/3.5-anaconda/bin/python + +from Params import * +from Testcase import * + +# Choose out of various operands +reg8 = Register('al') +reg16 = Register('ax') +reg32 = Register('eax') +reg64 = Register('rax') +xmm = Register('xmm0') +ymm = Register('ymm0') +zmm = Register('zmm0') +mem0 = MemAddr('(%rax, %esi, 4)') + + +#----------------------------------------------- +#-USER INPUT------------------------------------ +#----------------------------------------------- +# Enter your mnemonic +mnemonic = 'vmovupd' + +# Define your operands. If you don't need it, just type in None +dst = xmm +op1 = mem0 +op2 = None + +# Define the number of instructions per loop (default: 12) +per_loop = '12' + +#----------------------------------------------- +#----------------------------------------------- + +# Start +operands = [x for x in [dst, op1, op2] if x is not None] +opListStr = ', '.join([x.print() for x in operands]) +print('Create Testcase for {} {}'.format(mnemonic, opListStr ), end='') +tc = Testcase(mnemonic, operands, per_loop) +tc.write_testcase() +print(' --------> SUCCEEDED') diff --git a/data/ivb_throughput.csv b/data/ivb_throughput.csv index f2f89b2..3c347e5 100644 --- a/data/ivb_throughput.csv +++ b/data/ivb_throughput.csv @@ -1,42 +1,50 @@ instr,clock_cycles -vmovapd-TP,0.84 -vaddsd-TP,1.016 -inc-TP,0.446 -cmp-TP,0.447 -inc-rrxmm-TP,0.446 -cmp-rrxmm-TP,0.446 -vmovq-TP,1.17 -vmovsd-TP,1.17 -xor-TP,0.336 -vxorpd-avx-TP,0.335 -vmovq-rxmmxmm-TP,1.004 -vxorps-TP,0.336 -vunpckhpd-TP,1.177 -test-TP,0.446 -vmulsd-TP,1.0170000000000001 -test-rrxmm-TP,0.446 -add-TP,0.47200000000000003 -neg-TP,0.447 -add-rrxmm-TP,0.47100000000000003 -mov-TP,0.386 -mov-rrxmm-TP,0.37 -vaddpd-avx-TP,1.016 -xor-rrxmm-TP,0.336 -sub-TP,0.335 -sub-rrxmm-TP,0.336 -vxorpd-TP,0.336 -vmovapd-avx-TP,0.8370000000000001 -vmulpd-avx-TP,1.021 -vsubsd-TP,1.014 -vmovaps-TP,0.836 -vaddpd-TP,1.015 -vsubpd-avx-TP,1.014 -dec-TP,0.447 -lea-TP,0.5 -jb-TP,0.447 -vmulss-xmmxmmxmm-TP,1.0 -vaddss-xmmxmmxmm-TP,1.0 -vcvtsi2ss-xmmxmmr-TP,1.0859999999999999 -xor-rr-TP,0.413 -vxorps-xmmxmmxmm-TP,0.3333333333333333 -inc-rxmmxmm-TP,0.390 +jmp-lbl-TP,0.0 +jo-lbl-TP,0.0 +jno-lbl-TP,0.0 +js-lbl-TP,0.0 +jns-lbl-TP,0.0 +je-lbl-TP,0.0 +jz-lbl-TP,0.0 +jne-lbl-TP,0.0 +jnz-lbl-TP,0.0 +jb-lbl-TP,0.0 +jnae-lbl-TP,0.0 +jc-lbl-TP,0.0 +jnb-lbl-TP,0.0 +jae-lbl-TP,0.0 +jnc-lbl-TP,0.0 +jbe-lbl-TP,0.0 +jna-lbl-TP,0.0 +ja-lbl-TP,0.0 +jnbe-lbl-TP,0.0 +jl-lbl-TP,0.0 +jnge-lbl-TP,0.0 +jge-lbl-TP,0.0 +jnl-lbl-TP,0.0 +jle-lbl-TP,0.0 +jng-lbl-TP,0.0 +jg-lbl-TP,0.0 +jnle-lbl-TP,0.0 +jp-lbl-TP,0.0 +jpe-lbl-TP,0.0 +jnp-lbl-TP,0.0 +jpo-lbl-TP,0.0 +jcxz-lbl-TP,0.0 +jecxz-lbl-TP,0.0 +jo-lbl-TP,0.0 +jno-lbl-TP,0.0 +js-lbl-TP,0.0 +jns-lbl-TP,0.0 +vmulss-xmm_xmm_xmm-TP,1.0 +vaddss-xmm_xmm_xmm-TP,1.0 +vxorps-xmm_xmm_xmm-TP,0.3333333333333333 +inc-r64-TP,0.3333333333333333 +xor-r32_r32-TP,0.3333333333333333 +vcvtsi2ss-xmm_xmm_r32-TP,1.0 +vaddss-xmm_xmm_mem-TP,1.0 +vmovupd-load-avx-TP,1.0 +lea-r32_mem-TP,1.0 +vmovss-xmm_mem-TP,0.5 +vmovss-mem_xmm-TP,1.0 +vmovupd-store-avx-TP,2.0 diff --git a/get_instr.py b/get_instr.py index 740bca9..e29cecd 100755 --- a/get_instr.py +++ b/get_instr.py @@ -2,6 +2,7 @@ import sys import re from Testcase import * +from Params import * marker = r'//STARTLOOP' asm_line = re.compile(r'\s[0-9a-f]+[:]') @@ -94,7 +95,7 @@ def check_instr(instr): return #Check if there's one or more operand and store all in a list param_list = flatten(separate_params(params)) - regList = list(param_list) + opList = list(param_list) #Check operands and seperate them by IMMEDIATE (IMD), REGISTER (REG), MEMORY (MEM) or LABEL (LBL) for i in range(len(param_list)): op = param_list[i] @@ -114,7 +115,7 @@ def check_instr(instr): else: op = MemAddr(op) param_list[i] = op.print() - regList[i] = op + opList[i] = op #Join mnemonic and operand(s) to an instruction form if(len(mnemonic) > 7): tabs = "\t" @@ -128,20 +129,30 @@ def check_instr(instr): db[instr_form] = 1 #Create testcase for instruction form, since it is the first appearance of it #But (as far as now) only for instr forms with only registers as operands - is_Reg = True - for par in regList: +# is_Reg = True +# for par in opList: # print(par.print()+" is Register: "+str(isinstance(par, Register))) - if(not isinstance(par, Register)): - is_Reg = False - if(is_Reg): +# if(not isinstance(par, Register)): +# is_Reg = False +# if(is_Reg): #print(mnemonic) # print("create testcase for "+mnemonic+" with params:") -# for p in regList: +# for p in opList: # print(p.print(),end=", ") # print() + + +#Only create benchmark if no label (LBL) is part of the operands +# And for now only for instr forms without immediates (will be implemented soon) + do_bench = True + for par in opList: + if(not isinstance(par, Register) and not isinstance(par, MemAddr)): + do_bench = False + if(do_bench): #Create testcase with reversed param list, due to the fact its intel syntax! -# create_testcase(mnemonic, list(reversed(regList))) - tc = Testcase(mnemonic, list(reversed(regList)), '12') +# create_testcase(mnemonic, list(reversed(opList))) +# print('menmonic: '+mnemonic+' ops: '+str(list(reversed(opList)))) + tc = Testcase(mnemonic, list(reversed(opList)), '32') tc.write_testcase() # print("-----------") @@ -219,118 +230,15 @@ def flatten(l): return l[:1] + flatten(l[1:]) - - -class Parameter(object): - type_list = ["REG", "MEM", "IMD", "LBL", "NONE"] - def __init__(self, ptype, name=""): - self.ptype = ptype.upper() - if(self.ptype not in self.type_list): - raise NameError("Type not supported: "+ptype) - - def print(self): - if(self.ptype == "NONE"): - return "" - else: - return self.ptype - -class MemAddr(Parameter): - segment_regs = ["CS", "DS", "SS", "ES", "FS", "GS"] - scales = [1, 2, 4, 8] - def __init__(self, name): - self.sreg = False - self.offset = False - self.base = False - self.index = False - self.scale = False - if(':' in name): - if(name[1:name.index(':')].upper() not in self.segment_regs): - raise NameError("Type not supported: "+name) - self.sreg = True - self.offset = True - if('(' not in name or ('(' in name and name.index('(') != 0)): - self.offset = True - if('(' in name): - self.parentheses = name[name.index('(')+1:-1] - self.commacnt = self.parentheses.count(',') - if(self.commacnt == 0): - self.base = True - elif(self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales): - self.base = True - self.index = True - self.scale = True - else: - raise NameError("Type not supported: "+name) - - def print(self): - self.mem_format = "MEM(" - if(self.sreg): - self.mem_format += "sreg:" - if(self.offset): - self.mem_format += "offset" - if(self.base and not self.index): - self.mem_format += "(base)" - elif(self.base and self.index and self.scale): - self.mem_format += "(base, index, scale)" - self.mem_format += ")" - return self.mem_format - - - -class Register(Parameter): - sizes = { -#General Purpose Registers - "AH":(8,"GPR"), "AL":(8,"GPR"), "BH":(8,"GPR"), "BL":(8,"GPR"), "CH":(8,"GPR"), "CL":(8,"GPR"), "DH":(8,"GPR"), "DL":(8,"GPR"), "BPL":(8,"GPR"), "SIL":(8,"GPR"), "DIL":(8,"GPR"), "SPL":(8,"GPR"), "R8L":(8,"GPR"), "R9L":(8,"GPR"), "R10L":(8,"GPR"), "R11L":(8,"GPR"), "R12L":(8,"GPR"), "R13L":(8,"GPR"), "R14L":(8,"GPR"), "R15L":(8,"GPR"), - "R8B":(8,"GPR"),"R9B":(8,"GPR"),"R10B":(8,"GPR"),"R11B":(8,"GPR"),"R12B":(8,"GPR"),"R13B":(8,"GPR"),"R14B":(8,"GPR"),"R15B":(8,"GPR"), - "AX":(16,"GPR"), "BC":(16,"GPR"), "CX":(16,"GPR"), "DX":(16,"GPR"), "BP":(16,"GPR"), "SI":(16,"GPR"), "DI":(16,"GPR"), "SP":(16,"GPR"), "R8W":(16,"GPR"), "R9W":(16,"GPR"), "R10W":(16,"GPR"), "R11W":(16,"GPR"), "R12W":(16,"GPR"), "R13W":(16,"GPR"), "R14W":(16,"GPR"), "R15W":(16,"GPR"), - "EAX":(32,"GPR"), "EBX":(32,"GPR"), "ECX":(32,"GPR"), "EDX":(32,"GPR"), "EBP":(32,"GPR"), "ESI":(32,"GPR"), "EDI":(32,"GPR"), "ESP":(32,"GPR"), "R8D":(32,"GPR"), "R9D":(32,"GPR"), "R10D":(32,"GPR"), "R11D":(32,"GPR"), "R12D":(32,"GPR"), "R13D":(32,"GPR"), "R14D":(32,"GPR"), "R15D":(32,"GPR"), - "RAX":(64,"GPR"), "RBX":(64,"GPR"), "RCX":(64,"GPR"), "RDX":(64,"GPR"), "RBP":(64,"GPR"), "RSI":(64,"GPR"), "RDI":(64,"GPR"), "RSP":(64,"GPR"), "R8":(64,"GPR"), "R9":(64,"GPR"), "R10":(64,"GPR"), "R11":(64,"GPR"), "R12":(64,"GPR"), "R13":(64,"GPR"), "R14":(64,"GPR"), "R15":(64,"GPR"), - "CS":(16,"GPR"), "DS":(16,"GPR"), "SS":(16,"GPR"), "ES":(16,"GPR"), "FS":(16,"GPR"), "GS":(16,"GPR"), - "EFLAGS":(32,"GPR"), "RFLAGS":(64,"GPR"), "EIP":(32,"GPR"), "RIP":(64,"GPR"), -#FPU Registers - "ST0":(80,"FPU"),"ST1":(80,"FPU"),"ST2":(80,"FPU"),"ST3":(80,"FPU"),"ST4":(80,"FPU"),"ST5":(80,"FPU"),"ST6":(80,"FPU"),"ST7":(80,"FPU"), -#MMX Registers - "MM0":(64,"MMX"),"MM1":(64,"MMX"),"MM2":(64,"MMX"),"MM3":(64,"MMX"),"MM4":(64,"MMX"),"MM5":(64,"MMX"),"MM6":(64,"MMX"),"MM7":(64,"MMX"), -#XMM Registers - "XMM0":(128,"XMM"),"XMM1":(128,"XMM"),"XMM2":(128,"XMM"),"XMM3":(128,"XMM"),"XMM4":(128,"XMM"),"XMM5":(128,"XMM"),"XMM6":(128,"XMM"),"XMM7":(128,"XMM"), "XMM8":(128,"XMM"), "XMM9":(128,"XMM"), "XMM10":(128,"XMM"), "XMM11":(128,"XMM"), "XMM12":(128,"XMM"), "XMM13":(128,"XMM"), "XMM14":(128,"XMM"), "XMM15":(128,"XMM"), "XMM16":(128,"XMM"), "XMM17":(128,"XMM"), "XMM18":(128,"XMM"), "XMM19":(128,"XMM"), "XMM20":(128,"XMM"), "XMM21":(128,"XMM"), "XMM22":(128,"XMM"), "XMM23":(128,"XMM"), "XMM24":(128,"XMM"), "XMM25":(128,"XMM"), "XMM26":(128,"XMM"), "XMM27":(128,"XMM"), "XMM28":(128,"XMM"), "XMM29":(128,"XMM"), "XMM30":(128,"XMM"), "XMM31":(128,"XMM"), -#YMM Registers - "YMM0":(256,"YMM"),"YMM1":(256,"YMM"),"YMM2":(256,"YMM"),"YMM3":(256,"YMM"),"YMM4":(256,"YMM"),"YMM5":(256,"YMM"),"YMM6":(256,"YMM"),"YMM7":(256,"YMM"), "YMM8":(256,"YMM"), "YMM9":(256,"YMM"), "YMM10":(256,"YMM"), "YMM11":(256,"YMM"), "YMM12":(256,"YMM"), "YMM13":(256,"YMM"), "YMM14":(256,"YMM"), "YMM15":(256,"YMM"), "YMM16":(256,"YMM"), "YMM17":(256,"YMM"), "YMM18":(256,"YMM"), "YMM19":(256,"YMM"), "YMM20":(256,"YMM"), "YMM21":(256,"YMM"), "YMM22":(256,"YMM"), "YMM23":(256,"YMM"), "YMM24":(256,"YMM"), "YMM25":(256,"YMM"), "YMM26":(256,"YMM"), "YMM27":(256,"YMM"), "YMM28":(256,"YMM"), "YMM29":(256,"YMM"), "YMM30":(256,"YMM"), "YMM31":(256,"YMM"), -#ZMM Registers - "ZMM0":(512,"ZMM"),"ZMM1":(512,"ZMM"),"ZMM2":(512,"ZMM"),"ZMM3":(512,"ZMM"),"ZMM4":(512,"ZMM"),"ZMM5":(512,"ZMM"),"ZMM6":(512,"ZMM"),"ZMM7":(512,"ZMM"), "ZMM8":(512,"ZMM"), "ZMM9":(512,"ZMM"), "ZMM10":(512,"ZMM"), "ZMM11":(512,"ZMM"), "ZMM12":(512,"ZMM"), "ZMM13":(512,"ZMM"), "ZMM14":(512,"ZMM"), "ZMM15":(512,"ZMM"), "ZMM16":(512,"ZMM"), "ZMM17":(512,"ZMM"), "ZMM18":(512,"ZMM"), "ZMM19":(512,"ZMM"), "ZMM20":(512,"ZMM"), "ZMM21":(512,"ZMM"), "ZMM22":(512,"ZMM"), "ZMM23":(512,"ZMM"), "ZMM24":(512,"ZMM"), "ZMM25":(512,"ZMM"), "ZMM26":(512,"ZMM"), "ZMM27":(512,"ZMM"), "ZMM28":(512,"ZMM"), "ZMM29":(512,"ZMM"), "ZMM30":(512,"ZMM"), "ZMM31":(512,"ZMM"), -#Opmask Register - "K0":(64,"K"), "K1":(64,"K"), "K2":(64,"K"), "K3":(64,"K"), "K4":(64,"K"), "K5":(64,"K"), "K6":(64,"K"), "K7":(64,"K"), -#Bounds Registers - "BND0":(128,"BND"),"BND1":(128,"BND"),"BND2":(128,"BND"),"BND3":(128,"BND") - } - - def __init__(self,name,mask=False): - self.name = name.upper() - self.mask = mask -# try: - if[name in self.sizes]: - self.size = self.sizes[self.name][0] - self.reg_type = self.sizes[self.name][1] - else: - print(lncnt) - raise NameError("Register name not in dictionary: "+self.name) -# except KeyError: -# print(lncnt) - - def print(self): - opmask = "" - if(self.mask): - opmask = "{opmask}" - return(self.reg_type+str(self.size)+opmask) - - - if __name__ == "__main__": # load_db() - r0 = Register("ymm0") - r1 = Register("xmm0") - r2 = Register("rax") - mem0 = MemAddr('(%rax, %esi, 4)') -# Testcase("ADD", [mem0,r1]) +# r0 = Register("ymm0") +# r1 = Register("xmm0") +# r64 = Register("rax") +# r32 = Register("eax") +# mem0 = MemAddr('(%rax, %esi, 4)') +# tc = Testcase("XOR", [r32, r32], '64') +# tc.write_testcase() # create_testcase("VADDPD", [r0, r0, r0]) if(len(sys.argv) > 1): for i in range(1,len(sys.argv)): diff --git a/osaca.py b/osaca.py index 6cfd496..cf743c7 100755 --- a/osaca.py +++ b/osaca.py @@ -8,7 +8,7 @@ import re from Params import * import pandas as pd from datetime import datetime - +import numpy as np #----------Global variables-------------- arch = '' @@ -373,12 +373,15 @@ def include_ibench(): print('Everything seems fine! Let\'s start checking!') newData = [] + addedValues = 0 for line in srcCode: if('TP' in line): # We found a command with a throughput value. Get instruction and the number of clock cycles instr = line.split()[0][:-1] clkC = line.split()[1] + clkC_tmp = clkC clkC = validate_TP(clkC, instr) + txtOutput = True if (clkC_tmp == clkC) else False tp = -1 new = False try: @@ -387,14 +390,20 @@ def include_ibench(): # Instruction not in database yet --> add it newData.append([instr,clkC]) new = True + addedValues += 1 pass - if(not new and tp != clkC): + if(not new and abs((tp/np.float64(clkC))-1) > 0.05): print('Different measurement for {}: {}(old) vs. {}(new)\nPlease check for correctness (no changes were made).'.format(instr, tp, clkC)) + txtOutput = True + if(txtOutput): + print() + txtOutput = False # Now merge the DataFrames and write new csv file df = df.append(pd.DataFrame(newData, columns=['instr','clock_cycles']), ignore_index=True) csv = df.to_csv(index=False) write_csv(csv) print('ibench output {} successfully in database included.'.format(filepath.split('/')[-1])) + print('{} values were added.'.format(addedValues)) # main function of the tool