From 561e0aab733734bd4d5942c3f280b6118d70ea3d Mon Sep 17 00:00:00 2001 From: Jan Laukemann Date: Mon, 2 Oct 2017 09:32:17 +0200 Subject: [PATCH] style update --- osaca/osaca.py | 5 +- osaca/testcase.py | 183 +++++++++++++++++++++++----------------------- 2 files changed, 94 insertions(+), 94 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index c95dc17..5b2d0cc 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -78,7 +78,7 @@ class Osaca(object): clk_cyc = line.split()[1] clk_cyc_tmp = clk_cyc clk_cyc = self.validate_val(clk_cyc, instr, True if (clmn == 'TP') else False, - cyc_list, reci_list) + cyc_list, reci_list) txt_output = True if (clk_cyc_tmp == clk_cyc) else False val = -2 new = False @@ -357,7 +357,8 @@ class Osaca(object): # Value is probably correct, so round it to the estimated value return cyc_list[i] # Check reciprocal only if it is a throughput value - elif(is_tp and reci_list[i]*1.05 > float(clk_cyc) and reci_list[i]*0.95 < float(clk_cyc)): + elif(is_tp and reci_list[i]*1.05 > float(clk_cyc) + and reci_list[i]*0.95 < float(clk_cyc)): # Value is probably correct, so round it to the estimated value return reci_list[i] # No value close to an integer or its reciprocal found, we assume the diff --git a/osaca/testcase.py b/osaca/testcase.py index 3efcd5a..c6aa16e 100755 --- a/osaca/testcase.py +++ b/osaca/testcase.py @@ -5,9 +5,9 @@ from subprocess import call from math import ceil from param import Register, MemAddr, Parameter + class Testcase(object): - - ##------------------Constant variables-------------------------- + # ------------------Constant variables-------------------------- # Lookup tables for regs gprs64 = ['rax', 'rbx', 'rcx', 'rdx', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15'] gprs32 = ['eax', 'ebx', 'ecx', 'edx', 'r9d', 'r10d', 'r11d', 'r12d', 'r13d', 'r14d', 'r15d'] @@ -18,44 +18,46 @@ class Testcase(object): ks = ['k0', 'k1', 'k2', 'k3', 'k4', 'k5', 'k6', 'k7'] bnds = ['bnd0', 'bnd1', 'bnd2', 'bnd3', 'bnd4', 'bnd5', 'bnd6', 'bnd7'] xmms = ['xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'xmm8', 'xmm9', - 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15'] + 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15'] ymms = ['ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', 'ymm6', 'ymm7', 'ymm8', 'ymm9', - 'ymm10', 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15'] + 'ymm10', 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15'] zmms = ['zmm0', 'zmm1', 'zmm2', 'zmm3', 'zmm4', 'zmm5', 'zmm6', 'zmm7', 'zmm8', 'zmm9', - 'zmm10', 'zmm11', 'zmm12', 'zmm13', 'zmm14', 'zmm15'] + 'zmm10', 'zmm11', 'zmm12', 'zmm13', 'zmm14', 'zmm15'] # Lookup table for memory - mems = ['[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]'] + mems = ['[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]', '[rip+PI]', + '[rip+PI]'] # Lookup table for immediates - imds = ['1', '2', '13', '22', '8', '78', '159', '222', '3', '9', '5', '55', '173', '317', + imds = ['1', '2', '13', '22', '8', '78', '159', '222', '3', '9', '5', '55', '173', '317', '254', '255'] # TODO Differentiate between AVX512 (with additional xmm16-31) and the rest # ... # ... # end TODO - ops = {'gpr64':gprs64, 'gpr32':gprs32, 'gpr16':gprs16, 'gpr8':gprs8, 'fpu':fpus, 'mmx':mmxs, - 'k':ks, 'bnd':bnds, 'xmm':xmms, 'ymm':ymms, 'zmm':zmms, 'mem':mems, 'imd':imds} + ops = {'gpr64': gprs64, 'gpr32': gprs32, 'gpr16': gprs16, 'gpr8': gprs8, 'fpu': fpus, + 'mmx': mmxs, 'k': ks, 'bnd': bnds, 'xmm': xmms, 'ymm': ymms, 'zmm': zmms, 'mem': mems, + 'imd': imds} # Create Single Precision 1.0 - sp1 = '\t\t# create SP 1.0\n' - sp1 += '\t\tvpcmpeqw xmm0, xmm0, xmm0\n' - sp1 += '\t\tvpslld xmm0, xmm0, 25\t\t\t# logical left shift: 11111110..0 (25=32-(8-1))\n' - sp1 += ('\t\tvpsrld xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading ' - + 'mantissa bit is zero\n') - sp1 += '\t\t# copy SP 1.0\n' + sp1 = ('\t\t# create SP 1.0\n' + '\t\tvpcmpeqw xmm0, xmm0, xmm0\n' + '\t\tvpslld xmm0, xmm0, 25\t\t\t# logical left shift: 11111110..0 (25=32-(8-1))\n' + '\t\tvpsrld xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading ' + 'mantissa bit is zero\n' + '\t\t# copy SP 1.0\n') # Create Double Precision 1.0 - dp1 = '\t\t# create DP 1.0\n' - dp1 += '\t\tvpcmpeqw xmm0, xmm0, xmm0\t\t# all ones\n' - dp1 += '\t\tvpsllq xmm0, xmm0, 54\t\t\t# logical left shift: 11111110..0 (54=64-(10-1))\n' - dp1 += ('\t\tvpsrlq xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading ' - + 'mantissa bit is zero\n') + dp1 = ('\t\t# create DP 1.0\n' + '\t\tvpcmpeqw xmm0, xmm0, xmm0\t\t# all ones\n' + '\t\tvpsllq xmm0, xmm0, 54\t\t\t# logical left shift: 11111110..0 (54=64-(10-1))\n' + '\t\tvpsrlq xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading ' + 'mantissa bit is zero\n') # Create epilogue done = ('done:\n' '\t\tmov\trsp, rbp\n' '\t\tpop\trbp\n' '\t\tret\n' '.size latency, .-latency') - ##---------------------------------------------------------------- + # ---------------------------------------------------------------- # Constructor def __init__(self, _mnemonic, _param_list, _num_instr='32'): @@ -67,7 +69,7 @@ class Testcase(object): self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = self.__define_operands() self.num_operands = len(self.param_list) - # Create asm header + # Create asm header self.def_instr, self.ninstr, self.init, self.expand = self.__define_header() # Create latency and throughput loop self.loop_lat = self.__define_loop_lat() @@ -75,10 +77,9 @@ class Testcase(object): # Create extension for testcase name sep1 = '_' if (self.num_operands > 1) else '' sep2 = '_' if (self.num_operands > 2) else '' - self.extension = ('-'+(self.op_a if ('gpr' not in self.op_a) else 'r' + self.op_a[3:]) - + sep1 + (self.op_b if ('gpr' not in self.op_b) else 'r'+self.op_b[3:]) - + sep2 + (self.op_c if ('gpr' not in self.op_c) else 'r'+self.op_c[3:])) - + self.extension = ('-' + (self.op_a if ('gpr' not in self.op_a) else 'r' + self.op_a[3:]) + + sep1 + (self.op_b if ('gpr' not in self.op_b) else 'r' + self.op_b[3:]) + + sep2 + (self.op_c if ('gpr' not in self.op_c) else 'r' + self.op_c[3:])) def write_testcase(self, TP=True, LT=True): """ @@ -98,20 +99,19 @@ class Testcase(object): # Write latency file call(['mkdir', '-p', os.path.dirname(__file__)+'/../testcases']) f = open(os.path.dirname(__file__)+'/../testcases/'+self.instr+self.extension+'.S', 'w') - data = (self.def_instr+self.ninstr+self.init+self.dp1+self.expand+self.gprPush - +self.zeroGPR+self.copy+self.loop_lat+self.gprPop+self.done) + data = (self.def_instr + self.ninstr + self.init + self.dp1 + self.expand + self.gprPush + + self.zeroGPR + self.copy + self.loop_lat + self.gprPop + self.done) f.write(data) f.close() if(TP): # Write throughput file - f = open(os.path.dirname(__file__)+'/../testcases/'+self.instr+self.extension - +'-TP.S', 'w') - data = (self.def_instr+self.ninstr+self.init+self.dp1+self.expand+self.gprPush - +self.zeroGPR+self.copy+self.loop_thrpt+self.gprPop+self.done) + f = open(os.path.dirname(__file__) + '/../testcases/' + self.instr + self.extension + + '-TP.S', 'w') + data = (self.def_instr + self.ninstr + self.init + self.dp1 + self.expand + self.gprPush + + self.zeroGPR + self.copy + self.loop_thrpt + self.gprPop + self.done) f.write(data) f.close() - # Check operands def __define_operands(self): """ @@ -120,7 +120,7 @@ class Testcase(object): Returns ------- (str, str, str, str, str, str) - String tuple containing types of operands and if needed push/pop operations, the + String tuple containing types of operands and if needed push/pop operations, the initialisation of general purpose regs and the copy if registers. """ oprnds = self.param_list @@ -139,7 +139,7 @@ class Testcase(object): if(isinstance(oprnds[1], Register)): op_b = oprnds[1].reg_type.lower() elif(isinstance(oprnds[1], MemAddr)): - op_b = 'mem' + op_b = 'mem' elif(isinstance(oprnds[1], Parameter) and str(oprnds[1]) == 'IMD'): op_b = 'imd' if(op_b == 'gpr'): @@ -165,8 +165,7 @@ class Testcase(object): copy = self.__copy_regs(oprnds[1]) else: copy = '' - return (op_a, op_b, op_c, gprPush, gprPop, zeroGPR, copy) - + return (op_a, op_b, op_c, gprPush, gprPop, zeroGPR, copy) def __initialise_gprs(self): """ @@ -184,7 +183,7 @@ class Testcase(object): for reg in self.gprs64: gprPush += '\t\tpush {}\n'.format(reg) for reg in reversed(self.gprs64): - gprPop += '\t\tpop {}\n'.format(reg) + gprPop += '\t\tpop {}\n'.format(reg) for reg in self.gprs64: zeroGPR += '\t\txor {}, {}\n'.format(reg, reg) return (gprPush, gprPop, zeroGPR) @@ -199,7 +198,7 @@ class Testcase(object): ---------- reg : Register Register for copying the value - + Returns ------- str @@ -208,16 +207,16 @@ class Testcase(object): copy = '\t\t# copy DP 1.0\n' # Different handling for GPR, MMX and SSE/AVX registers if(reg.reg_type == 'GPR'): - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) + copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][1]) copy += '\t\t# Create DP 2.0\n' copy += '\t\tadd {}, {}\n'.format(self.ops['gpr64'][1], self.ops['gpr64'][0]) copy += '\t\t# Create DP 0.5\n' copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0]) copy += '\t\tmovq {}, {}\n'.format(self.ops['gpr64'][2], self.ops['gpr64'][0]) - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) + copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) elif(reg.reg_type == 'MMX'): - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][0]) + copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][0]) copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][1]) copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) copy += '\t\t# Create DP 2.0\n' @@ -227,19 +226,18 @@ class Testcase(object): copy += '\t\tmovq {}, {}\n'.format(self.ops['mmx'][2], self.ops['gpr64'][0]) elif(reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM'): key = reg.reg_type.lower() - copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][0], self.ops[key][0]) + copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][0], self.ops[key][0]) copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][1], self.ops[key][0]) copy += '\t\t# Create DP 2.0\n' - copy += '\t\tvaddpd {}, {}, {}\n'.format(self.ops[key][1], self.ops[key][1], - self.ops[key][1]) + copy += '\t\tvaddpd {}, {}, {}\n'.format(self.ops[key][1], self.ops[key][1], + self.ops[key][1]) copy += '\t\t# Create DP 0.5\n' - copy += '\t\tvdivpd {}, {}, {}\n'.format(self.ops[key][2], self.ops[key][0], - self.ops[key][1]) + copy += '\t\tvdivpd {}, {}, {}\n'.format(self.ops[key][2], self.ops[key][0], + self.ops[key][1]) else: copy = '' return copy - def __define_header(self): """ Define header. @@ -252,30 +250,30 @@ class Testcase(object): def_instr = '#define INSTR '+self.instr+'\n' ninstr = '#define NINST '+self.num_instr+'\n' pi = ('PI:\n' - '.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #128 bit - '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #256 bit - '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #384 bit - '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9\n') #512 bit - init = ('#define N edi\n' \ - '#define i r8d\n\n\n' - '.intel_syntax noprefix\n' - '.globl ninst\n' - '.data\n' - 'ninst:\n' - '.long NINST\n' - '.align 32\n' - +pi+ - '.text\n' - '.globl latency\n' - '.type latency, @function\n' - '.align 32\n' - 'latency:\n' - '\t\tpush rbp\n' - '\t\tmov rbp, rsp\n' - '\t\txor i, i\n' - '\t\ttest N, N\n' - '\t\tjle done\n') -# Expand to AVX(512) if necessary + '.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' # 128 bit + '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' # 256 bit + '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' # 384 bit + '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9\n') # 512 bit + init = ('#define N edi\n' + '#define i r8d\n\n\n' + '.intel_syntax noprefix\n' + '.globl ninst\n' + '.data\n' + 'ninst:\n' + '.long NINST\n' + '.align 32\n' + + pi + + '.text\n' + '.globl latency\n' + '.type latency, @function\n' + '.align 32\n' + 'latency:\n' + '\t\tpush rbp\n' + '\t\tmov rbp, rsp\n' + '\t\txor i, i\n' + '\t\ttest N, N\n' + '\t\tjle done\n') + # Expand to AVX(512) if necessary expand = '' if(self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm'): expand = ('\t\t# expand from SSE to AVX\n' @@ -287,7 +285,6 @@ class Testcase(object): '\t\tvinsert64x4 zmm0, zmm0, ymm0, 0x1\n') return (def_instr, ninstr, init, expand) - def __define_loop_lat(self): """ Create latency loop. @@ -304,33 +301,36 @@ class Testcase(object): loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.op_a][0]) elif(self.num_operands == 2 and self.op_a == self.op_b): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1]) - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1], + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1], self.ops[self.op_b][0]) elif(self.num_operands == 2 and self.op_a != self.op_b): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], + loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) elif(self.num_operands == 3 and self.op_a == self.op_b): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], - self.ops[self.op_b][1], self.ops[self.op_c][0]) - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], - self.ops[self.op_b][0], self.ops[self.op_c][0]) + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], + self.ops[self.op_b][1], + self.ops[self.op_c][0]) + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], + self.ops[self.op_b][0], + self.ops[self.op_c][0]) elif(self.num_operands == 3 and self.op_a == self.op_c): for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], - self.ops[self.op_b][0], self.ops[self.op_c][0]) - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], - self.ops[self.op_b][0], self.ops[self.op_c][0]) + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], + self.ops[self.op_b][0], + self.ops[self.op_c][0]) + loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], + self.ops[self.op_b][0], + self.ops[self.op_c][0]) loop_lat += ('\t\tcmp i, N\n' '\t\tjl loop\n') return loop_lat - def __define_loop_thrpt(self): """ Create throughput loop. @@ -352,16 +352,15 @@ class Testcase(object): ext2 = True for i in range(0, int(self.num_instr)): if(ext1): - ext = ', {}'.format(self.ops[self.op_b][i%3]) + ext = ', {}'.format(self.ops[self.op_b][i % 3]) if(ext2): - ext += ', {}'.format(self.ops[self.op_c][i%3]) - regNum = (i%(len(self.ops[self.op_a])-3))+3 + ext += ', {}'.format(self.ops[self.op_c][i % 3]) + regNum = (i % (len(self.ops[self.op_a]) - 3)) + 3 loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.op_a][regNum], ext) loop_thrpt += ('\t\tcmp i, N\n' - '\t\tjl loop\n') + '\t\tjl loop\n') return loop_thrpt - def is_in_dir(self): """ Check if testcases with the same name already exist in testcase @@ -383,4 +382,4 @@ class Testcase(object): TP = True if name+'.S' in files: LT = True - return (TP,LT) + return (TP, LT)