diff --git a/EUsched.py b/EUsched.py deleted file mode 100755 index f9faa20..0000000 --- a/EUsched.py +++ /dev/null @@ -1,331 +0,0 @@ -#!/apps/python/3.5-anaconda/bin/python - -import sys -import os -import math -import ast -from Params import * -from operator import add -import pandas as pd - -class Scheduler(object): - arch_dict = {'SNB':6, 'IVB':6, 'HSW':8, 'BDW':8, 'SKL':8} - ports = None #type: int - instrList = None #type: list> - # instr, operand(s), instr form - df = None #type: DataFrame - - - def __init__(self, arch, instructionList): - arch = arch.upper() - try: - self.ports = self.arch_dict[arch] - except KeyError: - print('Architecture not supportet for EU scheduling.') - sys.exit() - self.instrList = instructionList - currDir = os.path.realpath(__file__)[:-10] - self.df = pd.read_csv(currDir+'data/'+arch.lower()+'_data.csv', quotechar='"', converters={'ports':ast.literal_eval}) - - - def schedule(self): - ''' - Schedules Instruction Form list and calculates port bindings. - - Returns - ------- - (str, [int, ...]) - A tuple containing the graphic output of the schedule as string and - the port bindings as list of ints. - ''' - sched = self.get_head() -# Initialize ports -# groups = [[] for x in range(len(set(portOccurances))-1)] - occ_ports = [[0]*self.ports for x in range(len(self.instrList))] -# occ_ports = [[0]*self.ports]*len(self.instrList) - port_bndgs = [0]*self.ports -# Check if there's a port occupation stored in the CSV, otherwise leave the -# occ_port list item empty - for i,instrForm in enumerate(self.instrList): - try: - searchString = instrForm[0]+'-'+self.get_operand_suffix(instrForm) - entry = self.df.loc[lambda df: df.instr == searchString,'TP':'ports'] - tup = entry.ports.values[0] - if(len(tup) == 1 and tup[0][0] == -1): - raise IndexError() - except IndexError: -# Instruction form not in CSV - sched += self.get_line(occ_ports[i], '* '+instrForm[-1]) - continue -# Get the occurance of each port from the occupation list - portOccurances = self.get_port_occurances(tup) -# Get 'occurance groups' - occuranceGroups = self.get_occurance_groups(portOccurances) -# Calculate port dependent throughput - TPGes = entry.TP.values[0]*len(occuranceGroups[0]) - for occGroup in occuranceGroups: - for port in occGroup: - occ_ports[i][port] = TPGes/len(occGroup) -# Write schedule line - sched += self.get_line(occ_ports[i], instrForm[-1]) -# Add throughput to total port binding - port_bndgs = list(map(add, port_bndgs, occ_ports[i])) - return (sched, port_bndgs) - - - def schedule_FCFS(self): - ''' - Schedules Instruction Form list for a single run with latencies. - - Returns - ------- - (str, int) - A tuple containing the graphic output as string and the total throughput time as int. - ''' - sched = self.get_head() - total = 0 -# Initialize ports - occ_ports = [0]*self.ports - for i,instrForm in enumerate(self.instrList): - try: - searchString = instrForm[0]+'-'+self.get_operand_suffix(instrForm) - entry = self.df.loc[lambda df: df.instr == searchString,'LT':'ports'] - tup = entry.ports.values[0] - if(len(tup) == 1 and tup[0][0] == -1): - raise IndexError() - except IndexError: -# Instruction form not in CSV - sched += self.get_line([0]*self.ports,'* '+instrForm[-1]) - continue - found = False - while(not found): - for portOcc in tup: -# Test if chosen instruction form port occupation suits the current CPU port occupation - if(self.test_ports_FCFS(occ_ports, portOcc)): -# Current port occupation fits for chosen port occupation of the instruction! - found = True - good = [entry.LT.values[0] if (j in portOcc) else 0 for j in range(0,self.ports)] - sched += self.get_line(good, instrForm[-1]) -# Add new occupation - occ_ports = [occ_ports[j]+good[j] for j in range(0, self.ports)] - break -# Step - occ_ports = [j-1 if (j > 0) else 0 for j in occ_ports] - if(entry.LT.values[0] != 0): - total += 1 - total += max(occ_ports) - return (sched, total) - - - def get_occurance_groups(self, portOccurances): - ''' - Groups ports in groups by the number of their occurance and sorts - groups by cardinality - - Parameters - ---------- - portOccurances : [int, ...] - List with the length of ports containing the number of occurances - of each port - - Returns - ------- - [[int, ...], ...] - List of lists with all occurance groups sorted by cardinality - (smallest group first) - ''' - groups = [[] for x in range(len(set(portOccurances))-1)] - for i,groupInd in enumerate(range(min(list(filter(lambda x: x > 0, portOccurances))),max(portOccurances)+1)): - for p, occurs in enumerate(portOccurances): - if groupInd == occurs: - groups[i].append(p) -# Sort groups by cardinality - groups.sort(key=len) - return groups - - - def get_port_occurances(self, tups): - ''' - Returns the number of each port occurance for the possible port - occupations - - Parameters - ---------- - tups : ((int, ...), ...) - Tuple of tuples of possible port occupations - - Returns - ------- - [int, ...] - List in the length of the number of ports for the current architecture, - containing the amount of occurances for each port - ''' - ports = [0]*self.ports - for tup in tups: - for elem in tup: - ports[elem] += 1 - return ports - - - def test_ports_FCFS(self, occ_ports, needed_ports): - ''' - Test if current configuration of ports is possible and returns boolean - - Parameters - ---------- - occ_ports : [int] - Tuple to inspect for current port occupation - needed_ports : (int) - Tuple with needed port(s) for particular instruction form - - Returns - ------- - bool - True if needed ports can get scheduled on current port occupation - False if not - ''' - for port in needed_ports: - if(occ_ports[port] != 0): - return False - return True - - - def get_report_info(self): - ''' - Creates Report information including all needed annotations. - - Returns - ------- - str - String containing the report information - ''' - analysis = 'Throughput Analysis Report\n'+('-'*26)+'\n' - annotations = ( '* - No information for this instruction in database\n' - '\n') - return analysis+annotations - - - def get_head(self): - ''' - Creates right heading for CPU architecture. - - Returns - ------- - str - String containing the header - ''' - horizLine = '-'*7*self.ports+'-\n' - portAnno = ' '*(math.floor((len(horizLine)-24)/2))+'Ports Pressure in cycles'+' '*(math.ceil((len(horizLine)-24)/2))+'\n' - portLine = '' - for i in range(0,self.ports): - portLine += '| {} '.format(i) - portLine += '|\n' - head = portAnno+portLine+horizLine - return head - - - def get_line(self, occ_ports, instrName): - ''' - Create line with port occupation for output. - - Parameters - ---------- - occ_ports : (int, ...) - Integer tuple containing needed ports - instrName : str - Name of instruction form for output - - Returns - ------- - str - String for output containing port scheduling for instrName - ''' - line = '' - for i in occ_ports: - cycles = ' ' if (i == 0) else '%.2f' % float(i) - line += '| '+cycles+' ' - line += '| '+instrName+'\n' - return line - - - def get_port_binding(self, port_bndg): - ''' - Creates port binding out of scheduling result. - - Parameters - ---------- - port_bndg : [int, ...] - Integer list containing port bindings - - Returns - ------- - str - String containing the port binding graphical output - ''' - header = 'Port Binding in Cycles Per Iteration:\n' - horizLine = '-'*10+'-'*6*self.ports+'\n' - portLine = '| Port |' - for i in range(0, self.ports): - portLine += ' {} |'.format(i) - portLine += '\n' - cycLine = '| Cycles |' - for i in range(len(port_bndg)): - cycLine += ' {} |'.format(round(port_bndg[i], 2)) - cycLine += '\n' - binding = header+horizLine+portLine+horizLine+cycLine+horizLine - return binding - - - def get_operand_suffix(self, instrForm): - ''' - Creates operand suffix out of list of Parameters. - - Parameters - ---------- - instrForm : [str, Parameter, ..., Parameter, str] - Instruction Form data structure - - Returns - ------- - str - Operand suffix for searching in database - ''' - extension = '' - opExt = [] - for i in range(1, len(instrForm)-1): - optmp = '' - if(isinstance(instrForm[i], Register) and instrForm[i].reg_type == 'GPR'): - optmp = 'r'+str(instrForm[i].size) - elif(isinstance(instrForm[i], MemAddr)): - optmp = 'mem' - else: - optmp = str(instrForm[i]).lower() - opExt.append(optmp) - operands = '_'.join(opExt) - return operands - - -if __name__ == '__main__': - data = [ - ['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x1(%rax,%rax,1),%edx'], - ['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %edx,%xmm2,%xmm2'], - ['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm2,%xmm0, %xmm3'], - ['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x2(%rax,%rax,1),%ecx'], - ['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm3,%xmm1,%xmm4'], - ['vxorps',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vxorps %xmm1, %xmm1,%xmm1'], - ['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %ecx,%xmm1, %xmm1'], - ['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm1,%xmm0,%xmm5'], - ['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm4,0x4(%rsp,%rax,8)'], - ['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm5,%xmm4,%xmm1'], - ['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm1,0x8(%rsp,%rax,8)'], - ['inc',Register('RAX'),'inc %rax'], - ['cmp',Register('RAX'),Parameter('IMD'),'cmp $0x1f3,%rax'], - ['jb',Parameter('LBL'),'jb 400bc2 '] - ] - - sched = Scheduler('ivb', data) - output,binding = sched.schedule() - print(sched.get_port_binding(binding)) - print(sched.get_report_info(),end='') - print(output) - print('Block Throughput: {}'.format(round(max(binding),2))) diff --git a/Params.py b/Params.py deleted file mode 100755 index afde59d..0000000 --- a/Params.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/apps/python/3.5-anaconda/bin/python -class Parameter(object): - type_list = ["REG", "MEM", "IMD", "LBL", "NONE"] - def __init__(self, ptype, name="NONE"): - self.ptype = ptype.upper() - if(self.ptype not in self.type_list): - raise NameError("Type not supported: "+ptype) - - def __str__(self): - '''returns string representation''' - if(self.ptype == "NONE"): - return "" - else: - return self.ptype - -class MemAddr(Parameter): - segment_regs = ["CS", "DS", "SS", "ES", "FS", "GS"] - scales = [1, 2, 4, 8] - def __init__(self, name): - self.sreg = False - self.offset = False - self.base = False - self.index = False - self.scale = False - if(':' in name): - if(name[1:name.index(':')].upper() not in self.segment_regs): - raise NameError("Type not supported: "+name) - self.sreg = True - self.offset = True - if('(' not in name or ('(' in name and name.index('(') != 0)): - self.offset = True - if('(' in name): - self.parentheses = name[name.index('(')+1:-1] - self.commacnt = self.parentheses.count(',') - if(self.commacnt == 0): - self.base = True - elif(self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1): - self.base = True - self.index = True - elif(self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales): - self.base = True - self.index = True - self.scale = True - else: - raise NameError("Type not supported: "+name) - - def __str__(self): - '''returns string representation''' - mem_format = "MEM(" - if(self.sreg): - mem_format += "sreg:" - if(self.offset): - mem_format += "offset" - if(self.base and not self.index): - mem_format += "(base)" - elif(self.base and self.index and self.scale): - mem_format += "(base, index, scale)" - mem_format += ")" - return mem_format - - - -class Register(Parameter): - sizes = { -#General Purpose Registers - "AH":(8,"GPR"), "AL":(8,"GPR"), "BH":(8,"GPR"), "BL":(8,"GPR"), "CH":(8,"GPR"), "CL":(8,"GPR"), "DH":(8,"GPR"), "DL":(8,"GPR"), "BPL":(8,"GPR"), "SIL":(8,"GPR"), "DIL":(8,"GPR"), "SPL":(8,"GPR"), "R8L":(8,"GPR"), "R9L":(8,"GPR"), "R10L":(8,"GPR"), "R11L":(8,"GPR"), "R12L":(8,"GPR"), "R13L":(8,"GPR"), "R14L":(8,"GPR"), "R15L":(8,"GPR"), - "R8B":(8,"GPR"),"R9B":(8,"GPR"),"R10B":(8,"GPR"),"R11B":(8,"GPR"),"R12B":(8,"GPR"),"R13B":(8,"GPR"),"R14B":(8,"GPR"),"R15B":(8,"GPR"), - "AX":(16,"GPR"), "BC":(16,"GPR"), "CX":(16,"GPR"), "DX":(16,"GPR"), "BP":(16,"GPR"), "SI":(16,"GPR"), "DI":(16,"GPR"), "SP":(16,"GPR"), "R8W":(16,"GPR"), "R9W":(16,"GPR"), "R10W":(16,"GPR"), "R11W":(16,"GPR"), "R12W":(16,"GPR"), "R13W":(16,"GPR"), "R14W":(16,"GPR"), "R15W":(16,"GPR"), - "EAX":(32,"GPR"), "EBX":(32,"GPR"), "ECX":(32,"GPR"), "EDX":(32,"GPR"), "EBP":(32,"GPR"), "ESI":(32,"GPR"), "EDI":(32,"GPR"), "ESP":(32,"GPR"), "R8D":(32,"GPR"), "R9D":(32,"GPR"), "R10D":(32,"GPR"), "R11D":(32,"GPR"), "R12D":(32,"GPR"), "R13D":(32,"GPR"), "R14D":(32,"GPR"), "R15D":(32,"GPR"), - "RAX":(64,"GPR"), "RBX":(64,"GPR"), "RCX":(64,"GPR"), "RDX":(64,"GPR"), "RBP":(64,"GPR"), "RSI":(64,"GPR"), "RDI":(64,"GPR"), "RSP":(64,"GPR"), "R8":(64,"GPR"), "R9":(64,"GPR"), "R10":(64,"GPR"), "R11":(64,"GPR"), "R12":(64,"GPR"), "R13":(64,"GPR"), "R14":(64,"GPR"), "R15":(64,"GPR"), - "CS":(16,"GPR"), "DS":(16,"GPR"), "SS":(16,"GPR"), "ES":(16,"GPR"), "FS":(16,"GPR"), "GS":(16,"GPR"), - "EFLAGS":(32,"GPR"), "RFLAGS":(64,"GPR"), "EIP":(32,"GPR"), "RIP":(64,"GPR"), -#FPU Registers - "ST0":(80,"FPU"),"ST1":(80,"FPU"),"ST2":(80,"FPU"),"ST3":(80,"FPU"),"ST4":(80,"FPU"),"ST5":(80,"FPU"),"ST6":(80,"FPU"),"ST7":(80,"FPU"), -#MMX Registers - "MM0":(64,"MMX"),"MM1":(64,"MMX"),"MM2":(64,"MMX"),"MM3":(64,"MMX"),"MM4":(64,"MMX"),"MM5":(64,"MMX"),"MM6":(64,"MMX"),"MM7":(64,"MMX"), -#XMM Registers - "XMM0":(128,"XMM"),"XMM1":(128,"XMM"),"XMM2":(128,"XMM"),"XMM3":(128,"XMM"),"XMM4":(128,"XMM"),"XMM5":(128,"XMM"),"XMM6":(128,"XMM"),"XMM7":(128,"XMM"), "XMM8":(128,"XMM"), "XMM9":(128,"XMM"), "XMM10":(128,"XMM"), "XMM11":(128,"XMM"), "XMM12":(128,"XMM"), "XMM13":(128,"XMM"), "XMM14":(128,"XMM"), "XMM15":(128,"XMM"), "XMM16":(128,"XMM"), "XMM17":(128,"XMM"), "XMM18":(128,"XMM"), "XMM19":(128,"XMM"), "XMM20":(128,"XMM"), "XMM21":(128,"XMM"), "XMM22":(128,"XMM"), "XMM23":(128,"XMM"), "XMM24":(128,"XMM"), "XMM25":(128,"XMM"), "XMM26":(128,"XMM"), "XMM27":(128,"XMM"), "XMM28":(128,"XMM"), "XMM29":(128,"XMM"), "XMM30":(128,"XMM"), "XMM31":(128,"XMM"), -#YMM Registers - "YMM0":(256,"YMM"),"YMM1":(256,"YMM"),"YMM2":(256,"YMM"),"YMM3":(256,"YMM"),"YMM4":(256,"YMM"),"YMM5":(256,"YMM"),"YMM6":(256,"YMM"),"YMM7":(256,"YMM"), "YMM8":(256,"YMM"), "YMM9":(256,"YMM"), "YMM10":(256,"YMM"), "YMM11":(256,"YMM"), "YMM12":(256,"YMM"), "YMM13":(256,"YMM"), "YMM14":(256,"YMM"), "YMM15":(256,"YMM"), "YMM16":(256,"YMM"), "YMM17":(256,"YMM"), "YMM18":(256,"YMM"), "YMM19":(256,"YMM"), "YMM20":(256,"YMM"), "YMM21":(256,"YMM"), "YMM22":(256,"YMM"), "YMM23":(256,"YMM"), "YMM24":(256,"YMM"), "YMM25":(256,"YMM"), "YMM26":(256,"YMM"), "YMM27":(256,"YMM"), "YMM28":(256,"YMM"), "YMM29":(256,"YMM"), "YMM30":(256,"YMM"), "YMM31":(256,"YMM"), -#ZMM Registers - "ZMM0":(512,"ZMM"),"ZMM1":(512,"ZMM"),"ZMM2":(512,"ZMM"),"ZMM3":(512,"ZMM"),"ZMM4":(512,"ZMM"),"ZMM5":(512,"ZMM"),"ZMM6":(512,"ZMM"),"ZMM7":(512,"ZMM"), "ZMM8":(512,"ZMM"), "ZMM9":(512,"ZMM"), "ZMM10":(512,"ZMM"), "ZMM11":(512,"ZMM"), "ZMM12":(512,"ZMM"), "ZMM13":(512,"ZMM"), "ZMM14":(512,"ZMM"), "ZMM15":(512,"ZMM"), "ZMM16":(512,"ZMM"), "ZMM17":(512,"ZMM"), "ZMM18":(512,"ZMM"), "ZMM19":(512,"ZMM"), "ZMM20":(512,"ZMM"), "ZMM21":(512,"ZMM"), "ZMM22":(512,"ZMM"), "ZMM23":(512,"ZMM"), "ZMM24":(512,"ZMM"), "ZMM25":(512,"ZMM"), "ZMM26":(512,"ZMM"), "ZMM27":(512,"ZMM"), "ZMM28":(512,"ZMM"), "ZMM29":(512,"ZMM"), "ZMM30":(512,"ZMM"), "ZMM31":(512,"ZMM"), -#Opmask Register - "K0":(64,"K"), "K1":(64,"K"), "K2":(64,"K"), "K3":(64,"K"), "K4":(64,"K"), "K5":(64,"K"), "K6":(64,"K"), "K7":(64,"K"), -#Bounds Registers - "BND0":(128,"BND"),"BND1":(128,"BND"),"BND2":(128,"BND"),"BND3":(128,"BND"), -#Registers in gerneral - "R8":(8,"GPR"), "R16":(16,"GPR"), "R32":(32,"GPR"), "R64":(64,"GPR"), "FPU":(80,"FPU"), "MMX":(64,"MMX"), "XMM":(128,"XMM"), "YMM":(256,"YMM"), "ZMM":(512,"ZMM"), "K":(64,"K"), "BND":(128,"BND") - } - - def __init__(self,name,mask=False): - self.name = name.upper() - self.mask = mask -# try: - if[name in self.sizes]: - self.size = self.sizes[self.name][0] - self.reg_type = self.sizes[self.name][1] - else: - print(lncnt) - raise NameError("Register name not in dictionary: "+self.name) -# except KeyError: -# print(lncnt) - - def __str__(self): - '''returns string representation''' - opmask = "" - if(self.mask): - opmask = "{opmask}" - return(self.reg_type+opmask) diff --git a/Testcase.py b/Testcase.py deleted file mode 100755 index f3276e3..0000000 --- a/Testcase.py +++ /dev/null @@ -1,367 +0,0 @@ -#!/apps/python/3.5-anaconda/bin/python - -import os -from subprocess import call -from math import ceil -from Params import * - -class Testcase(object): - -##------------------Constant variables-------------------------- -# Lookup tables for regs - gprs64 = ['rax', 'rbx', 'rcx', 'rdx', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15'] - gprs32 = ['eax', 'ebx', 'ecx', 'edx', 'r9d', 'r10d', 'r11d', 'r12d', 'r13d', 'r14d', 'r15d'] - gprs16 = ['ax', 'bx', 'cx', 'dx', 'r9w', 'r10w', 'r11w', 'r12w', 'r13w', 'r14w', 'r15w'] - gprs8 = ['al', 'bl', 'cl', 'dl', 'r9l', 'r10l', 'r11l', 'r12l', 'r13l', 'r14l', 'r15l'] - fpus = ['st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', 'st7'] - mmxs = ['mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7'] - ks = ['k0', 'k1', 'k2', 'k3', 'k4', 'k5', 'k6', 'k7'] - bnds = ['bnd0', 'bnd1', 'bnd2', 'bnd3', 'bnd4', 'bnd5', 'bnd6', 'bnd7'] - xmms = ['xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'xmm8', 'xmm9', - 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15'] - ymms = ['ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', 'ymm6', 'ymm7', 'ymm8', 'ymm9', - 'ymm10', 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15'] - zmms = ['zmm0', 'zmm1', 'zmm2', 'zmm3', 'zmm4', 'zmm5', 'zmm6', 'zmm7', 'zmm8', 'zmm9', - 'zmm10', 'zmm11', 'zmm12', 'zmm13', 'zmm14', 'zmm15'] -# Lookup table for memory - mems = ['[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]','[rip+PI]'] -# Lookup table for immediates - imds = ['1', '2', '13', '22', '8', '78', '159', '222', '3', '9', '5', '55', '173', '317', '254', '255'] -# TODO Differentiate between AVX512 (with additional xmm16-31) and the rest -# ... -# ... -# end TODO - - ops = {'gpr64':gprs64, 'gpr32':gprs32, 'gpr16':gprs16, 'gpr8':gprs8, 'fpu':fpus, 'mmx':mmxs, 'k':ks, 'bnd':bnds, 'xmm':xmms, 'ymm':ymms, 'zmm':zmms, 'mem':mems, 'imd':imds} - -# Create Single Precision 1.0 - sp1 = '\t\t# create SP 1.0\n' - sp1 += '\t\tvpcmpeqw xmm0, xmm0, xmm0\n' - sp1 += '\t\tvpslld xmm0, xmm0, 25\t\t\t# logical left shift: 11111110..0 (25=32-(8-1))\n' - sp1 += '\t\tvpsrld xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading mantissa bit is zero\n' - sp1 += '\t\t# copy SP 1.0\n' -# Create Double Precision 1.0 - dp1 = '\t\t# create DP 1.0\n' - dp1 += '\t\tvpcmpeqw xmm0, xmm0, xmm0\t\t# all ones\n' - dp1 += '\t\tvpsllq xmm0, xmm0, 54\t\t\t# logical left shift: 11111110..0 (54=64-(10-1))\n' - dp1 += '\t\tvpsrlq xmm0, xmm0, 2\t\t\t# logical right shift: 1 bit for sign; leading mantissa bit is zero\n' -# Create epilogue - done = ('done:\n' - '\t\tmov\trsp, rbp\n' - '\t\tpop\trbp\n' - '\t\tret\n' - '.size latency, .-latency') -##---------------------------------------------------------------- - -# Constructor - def __init__(self, _mnemonic, _param_list, _num_instr='32'): - self.instr = _mnemonic.lower() - self.param_list = _param_list -# num_instr must be an even number - self.num_instr = str(ceil(int(_num_instr)/2)*2) -# Check for the number of operands and initialise the GPRs if necessary - self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = self.__define_operands() - self.num_operands = len(self.param_list) - -# Create asm header - self.def_instr, self.ninstr, self.init, self.expand = self.__define_header() -# Create latency and throughput loop - self.loop_lat = self.__define_loop_lat() - self.loop_thrpt = self.__define_loop_thrpt() -# Create extension for testcase name - sep1 = '_' if (self.num_operands > 1) else '' - sep2 = '_' if (self.num_operands > 2) else '' - self.extension = ('-'+(self.op_a if ('gpr' not in self.op_a) else 'r' + self.op_a[3:]) + sep1 + (self.op_b if ('gpr' not in self.op_b) else 'r'+self.op_b[3:]) + sep2 + (self.op_c if ('gpr' not in self.op_c) else 'r'+self.op_c[3:])) - - - def write_testcase(self, TP=True, LT=True): - """ - Write testcase for class attributes in a file. - - Parameters - ---------- - TP : bool - Controls if throughput testcase should be written - (default True) - - LT : bool - Controls if latency testcase should be written - (default True) - """ - if(LT): -# Write latency file - call(['mkdir', '-p', 'testcases']) - f = open(os.path.dirname(__file__)+'/testcases/'+self.instr+self.extension+'.S', 'w') - data = (self.def_instr+self.ninstr+self.init+self.dp1+self.expand+self.gprPush+self.zeroGPR+self.copy+self.loop_lat+self.gprPop+self.done) - f.write(data) - f.close() - if(TP): -# Write throughput file - f = open(os.path.dirname(__file__)+'/testcases/'+self.instr+self.extension+'-TP.S', 'w') - data = (self.def_instr+self.ninstr+self.init+self.dp1+self.expand+self.gprPush+self.zeroGPR+self.copy+self.loop_thrpt+self.gprPop+self.done) - f.write(data) - f.close() - - -# Check operands - def __define_operands(self): - """ - Check for the number of operands and initialise the GPRs if necessary. - - Returns - ------- - (str, str, str, str, str, str) - String tuple containing types of operands and if needed push/pop operations, the - initialisation of general purpose regs and the copy if registers. - """ - oprnds = self.param_list - op_a, op_b, op_c = ('', '', '') - gprPush, gprPop, zeroGPR = ('', '', '') - if(isinstance(oprnds[0], Register)): - op_a = oprnds[0].reg_type.lower() - elif(isinstance(oprnds[0], MemAddr)): - op_a = 'mem' - elif(isinstance(oprnds[0], Parameter) and str(oprnds[0]) == 'IMD'): - op_a = 'imd' - if(op_a == 'gpr'): - gprPush, gprPop, zeroGPR = self.__initialise_gprs() - op_a += str(oprnds[0].size) - if(len(oprnds) > 1): - if(isinstance(oprnds[1], Register)): - op_b = oprnds[1].reg_type.lower() - elif(isinstance(oprnds[1], MemAddr)): - op_b = 'mem' - elif(isinstance(oprnds[1], Parameter) and str(oprnds[1]) == 'IMD'): - op_b = 'imd' - if(op_b == 'gpr'): - op_b += str(oprnds[1].size) - if('gpr' not in op_a): - gprPush, gprPop, zeroGPR = self.__initialise_gprs() - if(len(oprnds) == 3): - if(isinstance(oprnds[2], Register)): - op_c = oprnds[2].reg_type.lower() - elif(isinstance(oprnds[2], MemAddr)): - op_c = 'mem' - elif(isinstance(oprnds[2], Parameter) and str(oprnds[2]) == 'IMD'): - op_c = 'imd' - if(op_c == 'gpr'): - op_c += str(oprnds[2].size) - if(('gpr' not in op_a) and ('gpr'not in op_b)): - gprPush, gprPop, zeroGPR = self.__initialise_gprs() - if(len(oprnds) == 1 and isinstance(oprnds[0], Register)): - copy = self.__copy_regs(oprnds[0]) - elif(len(oprnds) > 1 and isinstance(oprnds[1], Register)): - copy = self.__copy_regs(oprnds[1]) - elif(len(oprnds) > 2 and isinstance(oprnds[2], Register)): - copy = self.__copy_regs(oprnds[1]) - else: - copy = '' - return (op_a, op_b, op_c, gprPush, gprPop, zeroGPR, copy) - - - def __initialise_gprs(self): - """ - Initialise eleven general purpose registers and set them to zero. - - Returns - ------- - (str, str, str) - String tuple for push, pop and initalisation operations - """ - - gprPush = '' - gprPop = '' - zeroGPR = '' - for reg in self.gprs64: - gprPush += '\t\tpush {}\n'.format(reg) - for reg in reversed(self.gprs64): - gprPop += '\t\tpop {}\n'.format(reg) - for reg in self.gprs64: - zeroGPR += '\t\txor {}, {}\n'.format(reg, reg) - return (gprPush, gprPop, zeroGPR) - - -# Copy created values in specific register - def __copy_regs(self, reg): - """ - Copy created values in specific register. - - Parameters - ---------- - reg : Register - Register for copying the value - - Returns - ------- - str - String containing the copy instructions - """ - copy = '\t\t# copy DP 1.0\n' -# Different handling for GPR, MMX and SSE/AVX registers - if(reg.reg_type == 'GPR'): - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][1]) - copy += '\t\t# Create DP 2.0\n' - copy += '\t\tadd {}, {}\n'.format(self.ops['gpr64'][1], self.ops['gpr64'][0]) - copy += '\t\t# Create DP 0.5\n' - copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0]) - copy += '\t\tmovq {}, {}\n'.format(self.ops['gpr64'][2], self.ops['gpr64'][0]) - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) - elif(reg.reg_type == 'MMX'): - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][0]) - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][1]) - copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) - copy += '\t\t# Create DP 2.0\n' - copy += '\t\tadd {}, {}\n'.format(ops['mmx'][1], ops['mmx'][0]) - copy += '\t\t# Create DP 0.5\n' - copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0]) - copy += '\t\tmovq {}, {}\n'.format(self.ops['mmx'][2], self.ops['gpr64'][0]) - elif(reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM'): - key = reg.reg_type.lower() - copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][0], self.ops[key][0]) - copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][1], self.ops[key][0]) - copy += '\t\t# Create DP 2.0\n' - copy += '\t\tvaddpd {}, {}, {}\n'.format(self.ops[key][1], self.ops[key][1], self.ops[key][1]) - copy += '\t\t# Create DP 0.5\n' - copy += '\t\tvdivpd {}, {}, {}\n'.format(self.ops[key][2], self.ops[key][0], self.ops[key][1]) - else: - copy = '' - return copy - - - def __define_header(self): - """ - Define header. - - Returns - ------- - (str, str, str, str) - String tuple containing the header, value initalisations and extensions - """ - def_instr = '#define INSTR '+self.instr+'\n' - ninstr = '#define NINST '+self.num_instr+'\n' - pi = ('PI:\n' - '.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #128 bit - '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #256 bit - '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, ' #384 bit - '0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9\n') #512 bit - init = ('#define N edi\n' \ - '#define i r8d\n\n\n' - '.intel_syntax noprefix\n' - '.globl ninst\n' - '.data\n' - 'ninst:\n' - '.long NINST\n' - '.align 32\n' - +pi+ - '.text\n' - '.globl latency\n' - '.type latency, @function\n' - '.align 32\n' - 'latency:\n' - '\t\tpush rbp\n' - '\t\tmov rbp, rsp\n' - '\t\txor i, i\n' - '\t\ttest N, N\n' - '\t\tjle done\n') -# Expand to AVX(512) if necessary - expand = '' - if(self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm'): - expand = ('\t\t# expand from SSE to AVX\n' - '\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n') - if(self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm'): - expand = ('\t\t# expand from SSE to AVX\n' - '\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n' - '\t\t# expand from AVX to AVX512\n' - '\t\tvinsert64x4 zmm0, zmm0, ymm0, 0x1\n') - return (def_instr, ninstr, init, expand) - - - def __define_loop_lat(self): - """ - Create latency loop. - - Returns - ------- - str - Latency loop as string - """ - loop_lat = ('loop:\n' - '\t\tinc i\n') - if(self.num_operands == 1): - for i in range(0, int(self.num_instr)): - loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.op_a][0]) - elif(self.num_operands == 2 and self.op_a == self.op_b): - for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1]) - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1], self.ops[self.op_b][0]) - elif(self.num_operands == 2 and self.op_a != self.op_b): - for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) - loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) - elif(self.num_operands == 3 and self.op_a == self.op_b): - for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1], self.ops[self.op_c][0]) - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], self.ops[self.op_b][0], self.ops[self.op_c][0]) - elif(self.num_operands == 3 and self.op_a == self.op_c): - for i in range(0, int(self.num_instr), 2): - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0], self.ops[self.op_c][0]) - loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], self.ops[self.op_b][0], self.ops[self.op_c][0]) - loop_lat += ('\t\tcmp i, N\n' - '\t\tjl loop\n') - return loop_lat - - - def __define_loop_thrpt(self): - """ - Create throughput loop. - - Returns - ------- - str - Throughput loop as string - """ - loop_thrpt = ('loop:\n' - '\t\tinc i\n') - ext = '' - ext1 = False - ext2 = False - if(self.num_operands == 2): - ext1 = True - if(self.num_operands == 3): - ext1 = True - ext2 = True - for i in range(0, int(self.num_instr)): - if(ext1): - ext = ', {}'.format(self.ops[self.op_b][i%3]) - if(ext2): - ext += ', {}'.format(self.ops[self.op_c][i%3]) - regNum = (i%(len(self.ops[self.op_a])-3))+3 - loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.op_a][regNum], ext) - loop_thrpt += ('\t\tcmp i, N\n' - '\t\tjl loop\n') - return loop_thrpt - - - def __is_in_dir(self): - """ - Check if testcases with the same name already exist in testcase - directory. - - Returns - ------- - (bool, bool) - True if file is in directory - False if file is not in directory - While the first value stands for the throughput testcase - and the second value stands for the latency testcase - """ - TP = False - LT = False - name = self.instr+self.extension - for root, dirs, files in os.walk(os.path.dirname(__file__)+'/testcases'): - if((name+'-TP.S') in files): - TP = True - if name+'.S' in files: - LT = True - return (TP,LT) diff --git a/create_testcase.py b/create_testcase.py deleted file mode 100755 index 134bfa2..0000000 --- a/create_testcase.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/apps/python/3.5-anaconda/bin/python - -from Params import * -from Testcase import * - -# Choose out of various operands -reg8 = Register('al') -reg16 = Register('ax') -reg32 = Register('eax') -reg64 = Register('rax') -xmm = Register('xmm0') -ymm = Register('ymm0') -zmm = Register('zmm0') -mem0 = MemAddr('(%rax, %esi, 4)') -imd1 = Parameter('IMD') - - -#----------------------------------------------- -#-USER INPUT------------------------------------ -#----------------------------------------------- -# Enter your mnemonic -mnemonic = 'vxorpd' - -# Define your operands. If you don't need it, just type in None -dst = xmm -op1 = xmm -op2 = xmm - -# Define the number of instructions per loop (default: 12) -per_loop = '128' - -#----------------------------------------------- -#----------------------------------------------- - -# Start -operands = [x for x in [dst, op1, op2] if x is not None] -opListStr = ', '.join([str(x) for x in operands]) -print('Create Testcase for {} {}'.format(mnemonic, opListStr ), end='') -tc = Testcase(mnemonic, operands, per_loop) -tc.write_testcase() -print(' --------> SUCCEEDED') diff --git a/get_instr.py b/get_instr.py deleted file mode 100755 index c181f25..0000000 --- a/get_instr.py +++ /dev/null @@ -1,247 +0,0 @@ -#!/apps/python/3.5-anaconda/bin/python -import sys -import re -from Testcase import * -from Params import * - -marker = r'//STARTLOOP' -asm_line = re.compile(r'\s[0-9a-f]+[:]') -numSeps = 0 -sem = 0 -db = {} -sorted_db = [] -lncnt = 1 -#cnt=0 -fname = "" -cntChar = '' -first = True - -def extract_instr(asmFile): - global once - global lncnt - global fname - fname = asmFile -#Check if parameter is in the correct file format - if(asmFile[-4:] != ".log"): - print("Invalid argument") - sys.exit() -#Open file - try: - f=open(asmFile, "r") - except IOError: - print("IOError: File not found") -#Analyse code line by line and check the instructions - lncnt = 1 - for line in f: - check_line(line) - lncnt += 1 - f.close() - - -def check_line(line): - global numSeps - global sem - global first -#Check if marker is in line and count the number of whitespaces if so - if(marker in line): -#But first, check if high level code ist indented with whitespaces or tabs - if(first): - set_counter_char(line) - first = False - numSeps = (re.split(marker,line)[0]).count(cntChar) - sem = 2; - elif(sem > 0): -#We're in the marked code snipped -#Check if the line is ASM code and - if not - check if we're still in the loop - match = re.search(asm_line, line) - if(match): -#Further analysis of instructions -# print("".join(re.split(r'\t',line)[-1:]),end="") -#Check if there are commetns in line - if(r'//' in line): - return - check_instr("".join(re.split(r'\t',line)[-1:])) - elif((re.split(r'\S',line)[0]).count(cntChar) <= numSeps): -#Not in the loop anymore - or yet - so we decrement the semaphore - sem = sem-1 - -#Check if seperator is either tabulator or whitespace -def set_counter_char(line): - global cntChar - numSpaces = (re.split(marker,line)[0]).count(" ") - numTabs = (re.split(marker,line)[0]).count("\t") - if(numSpaces != 0 and numTabs == 0): - cntChar = ' ' - elif(numSpaces == 0 and numTabs != 0): - cntChar = '\t' - else: - raise NotImplementedError("Indentation of code is only supported for whitespaces and tabs.") - - -def check_instr(instr): - global db - global lncnt - global cnt - global fname -#Check for strange clang padding bytes - while(instr.startswith("data32")): - instr = instr[7:] -#Seperate mnemonic and operands - mnemonic = instr.split()[0] - params = "".join(instr.split()[1:]) -#Check if line is not only a byte - empty_byte = re.compile(r'[0-9a-f]{2}') - if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2): - return -#Check if there's one or more operand and store all in a list - param_list = flatten(separate_params(params)) - opList = list(param_list) -#Check operands and seperate them by IMMEDIATE (IMD), REGISTER (REG), MEMORY (MEM) or LABEL (LBL) - for i in range(len(param_list)): - op = param_list[i] - if(len(op) <= 0): - op = Parameter("NONE") - elif(op[0] == '$'): - op = Parameter("IMD") - elif(op[0] == '%' and '(' not in op): - j = len(op) - opmask = False - if('{' in op): - j = op.index('{') - opmask = True - op = Register(op[1:j], opmask) - elif('<' in op): - op = Parameter("LBL") - else: - op = MemAddr(op) - param_list[i] = str(op) - opList[i] = op -#Join mnemonic and operand(s) to an instruction form - if(len(mnemonic) > 7): - tabs = "\t" - else: - tabs = "\t\t" - instr_form = mnemonic+tabs+(" ".join(param_list)) -#Check in database for instruction form and increment the counter - if(instr_form in db): - db[instr_form] = db[instr_form]+1 - else: - db[instr_form] = 1 -#Create testcase for instruction form, since it is the first appearance of it -#But (as far as now) only for instr forms with only registers as operands -# is_Reg = True -# for par in opList: -# print(par.print()+" is Register: "+str(isinstance(par, Register))) -# if(not isinstance(par, Register)): -# is_Reg = False -# if(is_Reg): - #print(mnemonic) -# print("create testcase for "+mnemonic+" with params:") -# for p in opList: -# print(p.print(),end=", ") -# print() - - -#Only create benchmark if no label (LBL) is part of the operands - do_bench = True - for par in opList: - if(str(par) == 'LBL' or str(par) == ''): - do_bench = False - if(do_bench): -#Create testcase with reversed param list, due to the fact its intel syntax! -# create_testcase(mnemonic, list(reversed(opList))) -# print('menmonic: '+mnemonic+' ops: '+str(list(reversed(opList)))) - tc = Testcase(mnemonic, list(reversed(opList)), '64') - tc.write_testcase() -# print("-----------") - -def separate_params(params): - param_list = [params] - if(',' in params): - if(')' in params): - if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','): - i = params.index(')')+1 - elif(params.index('(') < params.index(',')): - return param_list - else: - i = params.index(',') - else: - i = params.index(',') - param_list = [params[:i],separate_params(params[i+1:])] - elif('#' in params): - i = params.index('#') - param_list = [params[:i]] - return param_list - - -def sort_db(): - global sorted_db - sorted_db=sorted(db.items(), key=lambda x:x[1], reverse=True) - - -def print_sorted_db(): - sort_db() - sum = 0 - print("Number of\tmnemonic") - print("calls\n") - for i in range(len(sorted_db)): - print(str(sorted_db[i][1])+"\t\t"+sorted_db[i][0]) - sum += sorted_db[i][1] - print("\nCumulated number of instructions: "+str(sum)) - - -def save_db(): - global db - file = open(".cnt_asm_ops.db","w") - for i in db.items(): - file.write(i[0]+"\t"+str(i[1])+"\n") - file.close() - - -def load_db(): - global db - try: - file = open(".cnt_asm_ops.db", "r") - except FileNotFoundError: - print("no database found in current directory") - return - for line in file: - mnemonic = line.split('\t')[0] -#Join mnemonic and operand(s) to an instruction form - if(len(mnemonic) > 7): - tabs = "\t" - params = line.split('\t')[1] - numCalls = line.split("\t")[2][:-1] - else: - tabs = "\t\t" - params = line.split('\t')[2] - numCalls = line.split("\t")[3][:-1] - instr_form = mnemonic+tabs+params - db[instr_form] = int(numCalls) - file.close() - - -def flatten(l): - if l == []: - return l - if(isinstance(l[0], list)): - return flatten(l[0]) + flatten(l[1:]) - return l[:1] + flatten(l[1:]) - - -if __name__ == "__main__": -# load_db() -# r0 = Register("ymm0") -# r1 = Register("xmm0") -# r64 = Register("rax") -# r32 = Register("eax") -# mem0 = MemAddr('(%rax, %esi, 4)') -# tc = Testcase("XOR", [r32, r32], '64') -# tc.write_testcase() -# create_testcase("VADDPD", [r0, r0, r0]) - if(len(sys.argv) > 1): - for i in range(1,len(sys.argv)): - extract_instr(sys.argv[i]) - print_sorted_db() - -# save_db() diff --git a/osaca.py b/osaca.py deleted file mode 100755 index 37bfa73..0000000 --- a/osaca.py +++ /dev/null @@ -1,826 +0,0 @@ -#!/apps/python/3.5-anaconda/bin/python - -import argparse -import sys -import subprocess -import os -import re -from Params import * -from EUsched import * -from Testcase import * -import pandas as pd -from datetime import datetime -import numpy as np - - -class Osaca(object): - arch = None - filepath = None - srcCode = None - df = None - instrForms = None -# Variables for checking lines - numSeps = 0 - indentChar = '' - sem = 0 - marker = r'//STARTLOOP' - -# Variables for creating output - longestInstr = 30 -# Constants - ASM_LINE = re.compile(r'\s[0-9a-f]+[:]') -# Matches every variation of the IACA start marker - IACA_SM = re.compile(r'\s*movl[ \t]+\$111[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') -# Matches every variation of the IACA end marker - IACA_EM = re.compile(r'\s*movl[ \t]+\$222[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') - - def __init__(self, _arch, _filepath): - self.arch = _arch - self.filepath = _filepath - self.instrForms = [] - - -##-------------------main functions depending on arguments---------------------- - def include_ibench(self): - """ - Reads ibench output and includes it in the architecture specific csv - file. - """ -# Check args and exit program if something's wrong - if(not self.check_arch()): - print('Invalid microarchitecture.') - sys.exit() - if(not self.check_file()): - print('Invalid file path or file format.') - sys.exit() -# Check for database for the chosen architecture - self.df = self.read_csv() -# Create sequence of numbers and their reciprokals for validate the measurements - cycList,reciList = self.create_sequences() - print('Everything seems fine! Let\'s start!') - newData = [] - addedValues = 0 - for line in self.srcCode: - if('Using frequency' in line or len(line) == 0): - continue - clmn = 'LT' - instr = line.split()[0][:-1] - if('TP' in line): -# We found a command with a throughput value. Get instruction and the number of -# clock cycles and remove the '-TP' suffix. - clmn = 'TP' - instr = instr[:-3] -# Otherwise it is a latency value. Nothing to do. - clkC = line.split()[1] - clkC_tmp = clkC - clkC = self.validate_val(clkC, instr, True if (clmn == 'TP') else False, cycList, reciList) - txtOutput = True if (clkC_tmp == clkC) else False - val = -2 - new = False - try: - entry = self.df.loc[lambda df: df.instr == instr,clmn] - val = entry.values[0] - except IndexError: -# Instruction not in database yet --> add it - new = True -# First check if LT or TP value has already been added before - for i,item in enumerate(newData): - if(instr in item): - if(clmn == 'TP'): - newData[i][1] = clkC - elif(clmn == 'LT'): - newData[i][2] = clkC - new = False - break - if(new and clmn == 'TP'): - newData.append([instr,clkC,'-1',((-1,),)]) - elif(new and clmn == 'LT'): - newData.append([instr,'-1',clkC,((-1,),)]) - new = True - addedValues += 1 - pass -# If val is -1 (= not filled with a valid value) add it immediately - if(val == -1): - self.df.set_value(entry.index[0], clmn, clkC) - addedValues += 1 - continue - if(not new and abs((val/np.float64(clkC))-1) > 0.05): - print('Different measurement for {} ({}): {}(old) vs. {}(new)\nPlease check for correctness (no changes were made).'.format(instr, clmn, val, clkC)) - txtOutput = True - if(txtOutput): - print() - txtOutput = False -# Now merge the DataFrames and write new csv file - self.df = self.df.append(pd.DataFrame(newData, columns=['instr','TP','LT','ports']), ignore_index=True) - csv = self.df.to_csv(index=False) - self.write_csv(csv) - print('ibench output {} successfully in database included.'.format(self.filepath.split('/')[-1])) - print('{} values were added.'.format(addedValues)) - - - def inspect_binary(self): - """ - Main function of OSACA. Inspect binary file and create analysis. - """ -# Check args and exit program if something's wrong - if(not self.check_arch()): - print('Invalid microarchitecture.') - sys.exit() - if(not self.check_elffile()): - print('Invalid file path or file format.') - sys.exit() -# Finally check for database for the chosen architecture - self.read_csv() - - print('Everything seems fine! Let\'s start checking!') - for i,line in enumerate(self.srcCode): - if(i == 0): - self.check_line(line, True) - else: - self.check_line(line) - output = self.create_output() - print(output) - - - def inspect_with_iaca(self): - """ - Main function of OSACA with IACA markers instead of OSACA marker. - Inspect binary file and create analysis. - """ -# Check args and exit program if something's wrong - if(not self.check_arch()): - print('Invalid microarchitecture.') - sys.exit() -# Check if input file is a binary or assembly file - try: - binaryFile = True - if(not self.check_elffile()): - print('Invalid file path or file format.') - sys.exit() - except (TypeError,IndexError): - binaryFile = False - if(not self.check_file(True)): - print('Invalid file path or file format.') - sys.exit() -# Finally check for database for the chosen architecture - self.read_csv() - - print('Everything seems fine! Let\'s start checking!') - if(binaryFile): - self.iaca_bin() - else: - self.iaca_asm() - output = self.create_output() - print(output) - -##------------------------------------------------------------------------------ - - def check_arch(self): - """ - Check if the architecture is valid. - - Returns - ------- - bool - True if arch is supported - False if arch is not supported - - """ - archList = ['SNB','IVB','HSW', 'BDW', 'SKL'] - if(self.arch in archList): - return True - else: - return False - - - def check_elffile(self): - """ - Check if the given filepath exists, if the format is the needed elf64 - and store file data in attribute srcCode. - - Returns - ------- - bool - True if file is expected elf64 file - False if file does not exist or is not an elf64 file - - """ - if(os.path.isfile(self.filepath)): - self.store_srcCode_elf() - if('file format elf64' in self.srcCode[1]): - return True - return False - - - def check_file(self,iacaFlag=False): - """ - Check if the given filepath exists and store file data in attribute - srcCode. - - Parameters - ---------- - iacaFlag : bool - store file data as a string in attribute srcCode if True, - store it as a list of strings (lines) if False (default False) - - Returns - ------- - bool - True if file exists - False if file does not exist - - """ - if(os.path.isfile(self.filepath)): - self.store_srcCode(iacaFlag) - return True - return False - - def store_srcCode_elf(self): - """ - Load binary file compiled with '-g' in class attribute srcCode and - separate by line. - """ - self.srcCode = subprocess.run(['objdump', '--source', self.filepath], stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n') - - - def store_srcCode(self,iacaFlag=False): - """ - Load arbitrary file in class attribute srcCode. - - Parameters - ---------- - iacaFlag : bool - store file data as a string in attribute srcCode if True, - store it as a list of strings (lines) if False (default False) - """ - try: - f = open(self.filepath, 'r') - except IOError: - print('IOError: file \'{}\' not found'.format(self.filepath)) - self.srcCode = '' - for line in f: - self.srcCode += line - f.close() - if(iacaFlag): - return - self.srcCode = self.srcCode.split('\n') - - - def read_csv(self): - """ - Reads architecture dependent CSV from data directory. - - Returns - ------- - DataFrame - CSV as DataFrame object - """ - currDir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - df = pd.read_csv(currDir+'/data/'+self.arch.lower()+'_data.csv') - return df - - - def write_csv(self,csv): - """ - Writes architecture dependent CSV into data directory. - - Parameters - ---------- - csv : str - CSV data as string - """ - try: - f = open('data/'+self.arch.lower()+'_data.csv', 'w') - except IOError: - print('IOError: file \'{}\' not found in ./data'.format(self.arch.lower()+'_data.csv')) - f.write(csv) - f.close() - - - - def create_sequences(self,end=101): - """ - Creates list of integers from 1 to end and list of their reciprocals. - - Parameters - ---------- - end : int - End value for list of integers (default 101) - - Returns - ------- - [int] - cycList of integers - [float] - reciList of floats - """ - cycList = [] - reciList = [] - for i in range(1, end): - cycList.append(i) - reciList.append(1/i) - return cycList,reciList - - - def validate_val(self,clkC, instr, isTP, cycList, reciList): - """ - Validate given clock cycle clkC and return rounded value in case of - success. - - A succeeded validation means the clock cycle clkC is only 5% higher or - lower than an integer value from cycList or - if clkC is a throughput - value - 5% higher or lower than a reciprocal from the reciList. - - Parameters - ---------- - clkC : float - Clock cycle to validate - instr : str - Instruction for warning output - isTP : bool - True if a throughput value is to check, False for a latency value - cycList : [int] - Cycle list for validating - reciList : [float] - Reciprocal cycle list for validating - - Returns - ------- - float - Clock cycle, either rounded to an integer or its reciprocal or the - given clkC parameter - """ - clmn = 'LT' - if(isTP): - clmn = 'TP' - for i in range(0, len(cycList)): - if(cycList[i]*1.05 > float(clkC) and cycList[i]*0.95 < float(clkC)): -# Value is probably correct, so round it to the estimated value - return cycList[i] -# Check reciprocal only if it is a throughput value - elif(isTP and reciList[i]*1.05 > float(clkC) and reciList[i]*0.95 < float(clkC)): -# Value is probably correct, so round it to the estimated value - return reciList[i] -# No value close to an integer or its reciprocal found, we assume the -# measurement is incorrect - print('Your measurement for {} ({}) is probably wrong. Please inspect your benchmark!'.format(instr, clmn)) - print('The program will continue with the given value') - return clkC - - - def check_line(self,line,firstAppearance=False): - """ - Inspect line of source code and process it if inside the marked snippet. - - Parameter - --------- - line : str - Line of source code - firstAppearance : bool - Necessary for setting indenting character (default False) - """ -# Check if marker is in line - if(self.marker in line): -# First, check if high level code in indented with whitespaces or tabs - if(firstAppearance): - self.indentChar = self.get_indent_chars(line) -# Now count the number of whitespaces - self.numSeps = (re.split(self.marker, line)[0]).count(self.indentChar) - self.sem = 2 - elif(self.sem > 0): -# We're in the marked code snippet -# Check if the line is ASM code and - if not - check if we're still in the loop - match = re.search(self.ASM_LINE, line) - if(match): -# Further analysis of instructions -# Check if there are comments in line - if(r'//' in line): - return - self.check_instr(''.join(re.split(r'\t', line)[-1:])) - elif((re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps): -# Not in the loop anymore - or yet. We decrement the semaphore - self.sem = self.sem-1 - - - def get_indent_chars(self,line): - """ - Check if indentation characters are either tabulators or whitespaces - - Parameters - ---------- - line : str - Line with start marker in it - - Returns - ------- - str - Indentation character as string - """ - numSpaces = (re.split(self.marker, line)[0]).count(' ') - numTabs = (re.split(self.marker, line)[0]).count('\t') - if(numSpaces != 0 and numTabs == 0): - return ' ' - elif(numSpaces == 0 and numTabs != 0): - return '\t' - else: - raise NotImplementedError('Indentation of code is only supported for whitespaces and tabs.') - - - def iaca_bin(self): - """ - Extract instruction forms out of binary file using IACA markers. - """ - self.marker = r'fs addr32 nop' - for line in self.srcCode: -# Check if marker is in line - if(self.marker in line): - self.sem += 1 - elif(self.sem == 1): -# We're in the marked code snippet -# Check if the line is ASM code - match = re.search(self.ASM_LINE, line) - if(match): -# Further analysis of instructions -# Check if there are comments in line - if(r'//' in line): - continue -# Do the same instruction check as for the OSACA marker line check - self.check_instr(''.join(re.split(r'\t', line)[-1:])) - elif(self.sem == 2): -# Not in the loop anymore. Due to the fact it's the IACA marker we can stop here -# After removing the last line which belongs to the IACA marker - del self.instrForms[-1:] - return - - - def iaca_asm(self): - """ - Extract instruction forms out of assembly file using IACA markers. - """ -# Extract the code snippet surround by the IACA markers - code = self.srcCode -# Search for the start marker - match = re.match(self.IACA_SM, code) - while(not match): - code = code.split('\n',1)[1] - match = re.match(self.IACA_SM, code) -# Search for the end marker - code = (code.split('144',1)[1]).split('\n',1)[1] - res = '' - match = re.match(self.IACA_EM, code) - while(not match): - res += code.split('\n',1)[0]+'\n' - code = code.split('\n',1)[1] - match = re.match(self.IACA_EM, code) -# Split the result by line go on like with OSACA markers - res = res.split('\n') - for line in res: - line = line.split('#')[0] - line = line.lstrip() - if(len(line) == 0 or '//' in line or line.startswith('..')): - continue - self.check_instr(line) - - - def check_instr(self,instr): - """ - Inspect instruction for its parameters and add it to the instruction forms - pool instrForm. - - Parameters - ---------- - instr : str - Instruction as string - """ -# Check for strange clang padding bytes - while(instr.startswith('data32')): - instr = instr[7:] -# Separate mnemonic and operands - mnemonic = instr.split()[0] - params = ''.join(instr.split()[1:]) -# Check if line is not only a byte - empty_byte = re.compile(r'[0-9a-f]{2}') - if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2): - return -# Check if there's one or more operands and store all in a list - param_list = self.flatten(self.separate_params(params)) - param_list_types = list(param_list) -# Check operands and separate them by IMMEDIATE (IMD), REGISTER (REG), -# MEMORY (MEM) or LABEL(LBL) - for i in range(len(param_list)): - op = param_list[i] - if(len(op) <= 0): - op = Parameter('NONE') - elif(op[0] == '$'): - op = Parameter('IMD') - elif(op[0] == '%' and '(' not in op): - j = len(op) - opmask = False - if('{' in op): - j = op.index('{') - opmask = True - op = Register(op[1:j], opmask) - elif('<' in op or op.startswith('.')): - op = Parameter('LBL') - else: - op = MemAddr(op) - param_list[i] = str(op) - param_list_types[i] = op -# Add to list - if(len(instr) > self.longestInstr): - self.longestInstr = len(instr) - instrForm = [mnemonic]+list(reversed(param_list_types))+[instr] - self.instrForms.append(instrForm) -# If flag is set, create testcase for instruction form -# Do this in reversed param list order, du to the fact it's intel syntax -# Only create benchmark if no label (LBL) is part of the operands - if('LBL' in param_list or '' in param_list): - return - tc = Testcase(mnemonic, list(reversed(param_list_types)), '64') -# Only write a testcase if it not already exists - writeTP, writeLT = tc._Testcase__is_in_dir() - tc.write_testcase(not writeTP, not writeLT) - - - def separate_params(self,params): - """ - Delete comments, separates parameters and return them as a list. - - Parameters - ---------- - params : str - Splitted line after mnemonic - - Returns - ------- - [[...[str]]] - Nested list of strings. The number of nest levels depend on the - number of parametes given. - """ - param_list = [params] - if(',' in params): - if(')' in params): - if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','): - i = params.index(')')+1 - elif(params.index('(') < params.index(',')): - return param_list - else: - i = params.index(',') - else: - i = params.index(',') - param_list = [params[:i],self.separate_params(params[i+1:])] - elif('#' in params): - i = params.index('#') - param_list = [params[:i]] - return param_list - - def flatten(self,l): - """ - Flatten a nested list of strings. - - Parameters - ---------- - l : [[...[str]]] - Nested list of strings - - Returns - ------- - [str] - List of strings - """ - if l == []: - return l - if(isinstance(l[0], list)): - return self.flatten(l[0]) + self.flatten(l[1:]) - return l[:1] + self.flatten(l[1:]) - - - def create_output(self,tp_list=False,pr_sched=True): - """ - Creates output of analysed file including a time stamp. - - Parameters - ---------- - tp_list : bool - Boolean for indicating the need for the throughput list as output - (default False) - pr_sched : bool - Boolean for indicating the need for predicting a scheduling - (default True) - - Returns - ------- - str - OSACA output - """ -# Check the output alignment depending on the longest instruction - if(self.longestInstr > 70): - self.longestInstr = 70 - horizLine = self.create_horiz_sep() - ws = ' '*(len(horizLine)-23) -# Write general information about the benchmark - output = ( '--'+horizLine+'\n' - '| Analyzing of file:\t'+os.path.abspath(self.filepath)+'\n' - '| Architecture:\t\t'+self.arch+'\n' - '| Timestamp:\t\t'+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'\n') - - if(tp_list): - output += self.create_TP_list(horizLine) - if(pr_sched): - output += '\n\n' - sched = Scheduler(self.arch, self.instrForms) - schedOutput,portBinding = sched.schedule() - binding = sched.get_port_binding(portBinding) - output += sched.get_report_info()+'\n'+binding+'\n\n'+schedOutput - blockTP = round(max(portBinding), 2) - output += 'Total number of estimated throughput: '+str(blockTP) - return output - - - def create_horiz_sep(self): - """ - Calculate and return horizontal separator line. - - Returns - ------- - str - Horizontal separator line - """ - return '-'*(self.longestInstr+8) - - - def create_TP_list(self,horizLine): - """ - Create list of instruction forms with the proper throughput value. - - Parameter - --------- - horizLine : str - Calculated horizontal line for nice alignement - - Returns - ------- - str - Throughput list output for printing - """ - warning = False - ws = ' '*(len(horizLine)-23) - - output = ('\n| INSTRUCTION'+ws+'CLOCK CYCLES\n' - '| '+horizLine+'\n|\n') -# Check for the throughput data in CSV - for elem in self.instrForms: - extension = '' - opExt = [] - for i in range(1, len(elem)-1): - optmp = '' - if(isinstance(elem[i], Register) and elem[i].reg_type == 'GPR'): - optmp = 'r'+str(elem[i].size) - elif(isinstance(elem[i], MemAddr)): - optmp = 'mem' - else: - optmp = str(elem[i]).lower() - opExt.append(optmp) - operands = '_'.join(opExt) -# Now look up the value in the dataframe -# Check if there is a stored throughput value in database - import warnings - warnings.filterwarnings("ignore", 'This pattern has match groups') - series = self.df['instr'].str.contains(elem[0]+'-'+operands) - if( True in series.values): -# It's a match! - notFound = False - try: - tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0] - except IndexError: -# Something went wrong - print('Error while fetching data from database') - continue -# Did not found the exact instruction form. -# Try to find the instruction form for register operands only - else: - opExtRegs = [] - for operand in opExt: - try: - regTmp = Register(operand) - opExtRegs.append(True) - except KeyError: - opExtRegs.append(False) - pass - if(not True in opExtRegs): -# No register in whole instruction form. How can I find out what regsize we need? - print('Feature not included yet: ', end='') - print(elem[0]+' for '+operands) - tp = 0 - notFound = True - warning = True - - numWhitespaces = self.longestInstr-len(elem[-1]) - ws = ' '*numWhitespaces+'| ' - n_f = ' '*(5-len(str(tp)))+'*' - data = '| '+elem[-1]+ws+str(tp)+n_f+'\n' - output += data - continue - if(opExtRegs[0] == False): -# Instruction stores result in memory. Check for storing in register instead. - if(len(opExt) > 1): - if(opExtRegs[1] == True): - opExt[0] = opExt[1] - elif(len(optExt > 2)): - if(opExtRegs[2] == True): - opExt[0] = opExt[2] - if(len(opExtRegs) == 2 and opExtRegs[1] == False): -# Instruction loads value from memory and has only two operands. Check for -# loading from register instead - if(opExtRegs[0] == True): - opExt[1] = opExt[0] - if(len(opExtRegs) == 3 and opExtRegs[2] == False): -# Instruction loads value from memory and has three operands. Check for loading -# from register instead - opExt[2] = opExt[0] - operands = '_'.join(opExt) -# Check for register equivalent instruction - series = self.df['instr'].str.contains(elem[0]+'-'+operands) - if( True in series.values): -# It's a match! - notFound = False - try: - tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0] - - except IndexError: -# Something went wrong - print('Error while fetching data from database') - continue -# Did not found the register instruction form. Set warning and go on with -# throughput 0 - else: - tp = 0 - notFound = True - warning = True -# Check the alignement again - numWhitespaces = self.longestInstr-len(elem[-1]) - ws = ' '*numWhitespaces+'| ' - n_f = '' - if(notFound): - n_f = ' '*(5-len(str(tp)))+'*' - data = '| '+elem[-1]+ws+'{:3.2f}'.format(tp)+n_f+'\n' - output += data -# Finally end the list of throughput values - numWhitespaces = self.longestInstr-27 - ws = ' '+' '*numWhitespaces - output += '| '+horizLine+'\n' - if(warning): - output += ('\n\n* There was no throughput value found ' - 'for the specific instruction form.' - '\n Please create a testcase via the create_testcase-method ' - 'or add a value manually.') - return output - - -##------------------------------------------------------------------------------ -##------------Main method-------------- -def main(): -# Parse args - parser = argparse.ArgumentParser(description='Analyzes a marked innermost loop snippet for a given architecture type and prints out the estimated average throughput') - parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.1') - parser.add_argument('--arch', dest='arch', type=str, help='define architecture (SNB, IVB, HSW, BDW, SKL)') - parser.add_argument('filepath', type=str, help='path to object (Binary, ASM, CSV)') - group = parser.add_mutually_exclusive_group(required=False) - group.add_argument('-i', '--include-ibench', dest='incl', action='store_true', help='includes the given values in form of the output of ibench in the database') - group.add_argument('--iaca', dest='iaca', action='store_true', help='search for IACA markers instead the OSACA marker') - group.add_argument('-m', '--insert-marker', dest='insert_marker', action='store_true', help='try to find blocks probably corresponding to loops in assembly and insert IACA marker') - -# Store args in global variables - inp = parser.parse_args() - if(inp.arch is None and inp.insert_marker is None): - raise ValueError('Please specify an architecture') - if(inp.arch is not None): - arch = inp.arch.upper() - filepath = inp.filepath - inclIbench = inp.incl - iacaFlag = inp.iaca - insert_m = inp.insert_marker - -# Create Osaca object - if(inp.arch is not None): - osaca = Osaca(arch, filepath) - - if(inclIbench): - osaca.include_ibench() - elif(iacaFlag): - osaca.inspect_with_iaca() - elif(insert_m): - try: - from kerncraft import iaca - except ImportError: - print('ImportError: Module kerncraft not installed. Use \'pip install --user kerncraft\' for installation.\nFor more information see https://github.com/RRZE-HPC/kerncraft') - sys.exit() - iaca.iaca_instrumentation(input_file=filepath, output_file=filepath, - block_selection='manual', pointer_increment=1) - else: - osaca.inspect_binary() - - -##------------Main method-------------- -if __name__ == '__main__': - main()