diff --git a/EUsched.py b/EUsched.py index 9bfd863..9e5f9e0 100755 --- a/EUsched.py +++ b/EUsched.py @@ -30,7 +30,11 @@ class Scheduler(object): def schedule_FCFS(self): ''' Schedules Instruction Form list via First Come First Serve algorithm. - Returns a tuple containing the graphic output as string and the total throughput time as int + + Returns + ------- + (str, int) + A tuple containing the graphic output as string and the total throughput time as int. ''' sched = '' total = 0 @@ -41,7 +45,7 @@ class Scheduler(object): searchString = instrForm[0]+'-'+self.get_operand_suffix(instrForm) entry = self.df.loc[lambda df: df.instr == searchString,'LT':'ports'] tup = entry.ports.values[0] - if(len(tup) == 1 and tup[0] == -1): + if(len(tup) == 1 and tup[0][0] == -1): raise IndexError() except IndexError: # Instruction form not in CSV @@ -68,7 +72,20 @@ class Scheduler(object): def test_ports_FCFS(self, occ_ports, needed_ports): ''' - tests if current configuration of ports is possible and returns boolean + Test if current configuration of ports is possible and returns boolean + + Parameters + ---------- + occ_ports : [int] + Tuple to inspect for current port occupation + needed_ports : (int) + Tuple with needed port(s) for particular instruction form + + Returns + ------- + bool + True if needed ports can get scheduled on current port occupation + False if not ''' for port in needed_ports: if(occ_ports[port] != 0): @@ -76,10 +93,21 @@ class Scheduler(object): return True def schedule_Tomasulo(self): + ''' + Not implement yet. Schedules Instruction Form list via Tomasulo algorithm. + ''' print('Scheduling with Tomasulo algorithm...') return '' def get_head(self): + ''' + Creates right heading for CPU architecture. + + Returns + ------- + str + String containing the header + ''' analysis = 'Throughput Analysis Report\n'+('-'*26)+'\n' annotations = ( '* - No information for this instruction in database\n' '\n') @@ -93,6 +121,21 @@ class Scheduler(object): return head def get_line(self, occ_ports, instrName): + ''' + Create line with port occupation for output. + + Parameters + ---------- + occ_ports : (int) + Integer tuple containing needed ports + instrName : str + Name of instruction form for output + + Returns + ------- + str + String for output containing port scheduling for instrName + ''' line = '' for i in occ_ports: cycles = ' ' if (i == 0) else float(i) @@ -102,6 +145,19 @@ class Scheduler(object): def get_operand_suffix(self, instrForm): + ''' + Creates operand suffix out of list of Parameters. + + Parameters + ---------- + instrForm : [str, Parameter, ..., Parameter, str] + Instruction Form data structure + + Returns + ------- + str + Operand suffix for searching in database + ''' extension = '' opExt = [] for i in range(1, len(instrForm)-1): diff --git a/Params.py b/Params.py index 1cd146d..dc5f306 100755 --- a/Params.py +++ b/Params.py @@ -7,6 +7,14 @@ class Parameter(object): raise NameError("Type not supported: "+ptype) def print(self): + ''' + Prints Parameter. + + Returns + ------- + str + Parameter as string + ''' if(self.ptype == "NONE"): return "" else: @@ -44,6 +52,14 @@ class MemAddr(Parameter): raise NameError("Type not supported: "+name) def print(self): + ''' + Prints MemAddr. + + Returns + ------- + str + MemAddr as string + ''' mem_format = "MEM(" if(self.sreg): mem_format += "sreg:" @@ -100,6 +116,14 @@ class Register(Parameter): # print(lncnt) def print(self): + ''' + Prints Register. + + Returns + ------- + str + Register as string + ''' opmask = "" if(self.mask): opmask = "{opmask}" diff --git a/Testcase.py b/Testcase.py index 50cccd2..0917d7a 100755 --- a/Testcase.py +++ b/Testcase.py @@ -54,7 +54,7 @@ class Testcase(object): ##---------------------------------------------------------------- # Constructor - def __init__(self, _mnemonic, _param_list, _num_instr='12'): + def __init__(self, _mnemonic, _param_list, _num_instr='32'): self.instr = _mnemonic.lower() self.param_list = _param_list # num_instr must be an even number @@ -71,6 +71,9 @@ class Testcase(object): def write_testcase(self): + """ + Write testcase for class attributes in a file. + """ regs = self.param_list extension = '' # Add operands @@ -94,6 +97,15 @@ class Testcase(object): # Check operands def __define_operands(self): + """ + Check for the number of operands and initialise the GPRs if necessary. + + Returns + ------- + (str, str, str, str, str, str) + String tuple containing types of operands and if needed push/pop operations, the + initialisation of general purpose regs and the copy if registers. + """ oprnds = self.param_list op_a, op_b, op_c = ('', '', '') gprPush, gprPop, zeroGPR = ('', '', '') @@ -138,8 +150,17 @@ class Testcase(object): copy = '' return (op_a, op_b, op_c, gprPush, gprPop, zeroGPR, copy) -# Initialise 11 general purpose registers and set them to zero + def __initialise_gprs(self): + """ + Initialise eleven general purpose registers and set them to zero. + + Returns + ------- + (str, str, str) + String tuple for push, pop and initalisation operations + """ + gprPush = '' gprPop = '' zeroGPR = '' @@ -154,6 +175,19 @@ class Testcase(object): # Copy created values in specific register def __copy_regs(self, reg): + """ + Copy created values in specific register. + + Parameters + ---------- + reg : Register + Register for copying the value + + Returns + ------- + str + String containing the copy instructions + """ copy = '\t\t# copy DP 1.0\n' # Different handling for GPR, MMX and SSE/AVX registers if(reg.reg_type == 'GPR'): @@ -188,6 +222,14 @@ class Testcase(object): def __define_header(self): + """ + Define header. + + Returns + ------- + (str, str, str, str) + String tuple containing the header, value initalisations and extensions + """ def_instr = '#define INSTR '+self.instr+'\n' ninstr = '#define NINST '+self.num_instr+'\n' pi = ('PI:\n' @@ -226,8 +268,16 @@ class Testcase(object): '\t\tvinsert64x4 zmm0, zmm0, ymm0, 0x1\n') return (def_instr, ninstr, init, expand) -# Create latency loop + def __define_loop_lat(self): + """ + Create latency loop. + + Returns + ------- + str + Latency loop as string + """ loop_lat = ('loop:\n' '\t\tinc i\n') if(self.num_operands == 1): @@ -253,8 +303,16 @@ class Testcase(object): '\t\tjl loop\n') return loop_lat -# Create throughput loop + def __define_loop_thrpt(self): + """ + Create throughput loop. + + Returns + ------- + str + Throughput loop as string + """ loop_thrpt = ('loop:\n' '\t\tinc i\n') ext = '' @@ -278,6 +336,22 @@ class Testcase(object): def __is_in_dir(self, name, path): + """ + Check if file with the name name is in directory path. + + Parameters + ---------- + name : str + Name of file + path : str + Path of directory + + Returns + ------- + bool + True if file is in directory + False if file is not in directory + """ for root, dirs, files in os.walk(path): if name in files: return True diff --git a/osaca.py b/osaca.py index 1fbf33d..c14551f 100755 --- a/osaca.py +++ b/osaca.py @@ -11,550 +11,763 @@ import pandas as pd from datetime import datetime import numpy as np -#----------Global variables-------------- -arch = '' -archList = ['SNB','IVB','HSW', 'BDW', 'SKL'] -filepath = '' -srcCode = '' -marker = r'//STARTLOOP' -asm_line = re.compile(r'\s[0-9a-f]+[:]') -numSeps = 0 -sem = 0 -firstAppearance = True -instrForms = list() -df = '' -horizontalSeparator = '' -longestInstr = 30 -cycList = [] -reciList = [] + +class Osaca(object): + arch = None + filepath = None + srcCode = None + df = None + instrForms = None +# Variables for checking lines + numSeps = 0 + indentChar = '' + sem = 0 + marker = r'//STARTLOOP' + +# Variables for creating output + longestInstr = 30 +# Constants + ASM_LINE = re.compile(r'\s[0-9a-f]+[:]') # Matches every variation of the IACA start marker -iaca_sm = re.compile(r'\s*movl[ \t]+\$111[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') + IACA_SM = re.compile(r'\s*movl[ \t]+\$111[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') # Matches every variation of the IACA end marker -iaca_em = re.compile(r'\s*movl[ \t]+\$222[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') -#--------------------------------------- + IACA_EM = re.compile(r'\s*movl[ \t]+\$222[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') -# Check if the architecture arg is valid -def check_arch(): - if(arch in archList): - return True - else: - return False - -# Check if the given filepath exists and if the format is the needed elf64 -def check_elffile(): - if(os.path.isfile(filepath)): - create_elffile() - if('file format elf64' in srcCode[1]): - return True - return False - -# Check if the given filepath exists -def check_file(iacaFlag=False): - if(os.path.isfile(filepath)): - get_file(iacaFlag) - return True - return False - -# Load binary file in variable srcCode and separate by line -def create_elffile(): - global srcCode - srcCode = subprocess.run(['objdump', '--source', filepath], stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n') - -# Load arbitrary file in variable srcCode and separate by line -def get_file(iacaFlag): - global srcCode - srcCode = '' - try: - f = open(filepath, 'r') - except IOError: - print('IOError: file \'{}\' not found'.format(filepath)) - for line in f: - srcCode += line - f.close() - if(iacaFlag): - return - srcCode = srcCode.split('\n') + def __init__(self, _arch, _filepath): + self.arch = _arch + self.filepath = _filepath + self.instrForms = [] -def check_line(line): - global numSeps - global sem - global firstAppearance -# Check if marker is in line - if(marker in line): -# First, check if high level code in indented with whitespaces or tabs - if(firstAppearance): - set_char_counter(line) - firstAppearance = False -# Now count the number of whitespaces - numSeps = (re.split(marker, line)[0]).count(cntChar) - sem = 2 - elif(sem > 0): -# We're in the marked code snippet -# Check if the line is ASM code and - if not - check if we're still in the loop - match = re.search(asm_line, line) - if(match): -# Further analysis of instructions -# Check if there are comments in line - if(r'//' in line): - return - check_instr(''.join(re.split(r'\t', line)[-1:])) - elif((re.split(r'\S', line)[0]).count(cntChar) <= numSeps): -# Not in the loop anymore - or yet. We decrement the semaphore - sem = sem-1 +##-------------------main functions depending on arguments---------------------- + def include_ibench(self): + """ + Reads ibench output and includes it in the architecture specific csv + file. + """ +# Check args and exit program if something's wrong + if(not self.check_arch()): + print('Invalid microarchitecture.') + sys.exit() + if(not self.check_file()): + print('Invalid file path or file format.') + sys.exit() +# Check for database for the chosen architecture + self.df = self.read_csv() +# Create sequence of numbers and their reciprokals for validate the measurements + cycList,reciList = self.create_sequences() + print('Everything seems fine! Let\'s start!') + newData = [] + addedValues = 0 + for line in self.srcCode: + if('Using frequency' in line or len(line) == 0): + continue + clmn = 'LT' + instr = line.split()[0][:-1] + if('TP' in line): +# We found a command with a throughput value. Get instruction and the number of +# clock cycles and remove the '-TP' suffix. + clmn = 'TP' + instr = instr[:-3] +# Otherwise it is a latency value. Nothing to do. + clkC = line.split()[1] + clkC_tmp = clkC + clkC = self.validate_val(clkC, instr, True if (clmn == 'TP') else False, cycList, reciList) + txtOutput = True if (clkC_tmp == clkC) else False + val = -2 + new = False + try: + entry = self.df.loc[lambda df: df.instr == instr,clmn] + val = entry.values[0] + except IndexError: +# Instruction not in database yet --> add it + new = True +# First check if LT or TP value has already been added before + for i,item in enumerate(newData): + if(instr in item): + if(clmn == 'TP'): + newData[i][1] = clkC + elif(clmn == 'LT'): + newData[i][2] = clkC + new = False + break + if(new and clmn == 'TP'): + newData.append([instr,clkC,'-1',((-1,),)]) + elif(new and clmn == 'LT'): + newData.append([instr,'-1',clkC,((-1,),)]) + new = True + addedValues += 1 + pass +# If val is -1 (= not filled with a valid value) add it immediately + if(val == -1): + self.df.set_value(entry.index[0], clmn, clkC) + addedValues += 1 + continue + if(not new and abs((val/np.float64(clkC))-1) > 0.05): + print('Different measurement for {} ({}): {}(old) vs. {}(new)\nPlease check for correctness (no changes were made).'.format(instr, clmn, val, clkC)) + txtOutput = True + if(txtOutput): + print() + txtOutput = False +# Now merge the DataFrames and write new csv file + self.df = self.df.append(pd.DataFrame(newData, columns=['instr','TP','LT','ports']), ignore_index=True) + csv = self.df.to_csv(index=False) + self.write_csv(csv) + print('ibench output {} successfully in database included.'.format(self.filepath.split('/')[-1])) + print('{} values were added.'.format(addedValues)) + + + def inspect_binary(self): + """ + Main function of OSACA. Inspect binary file and create analysis. + """ +# Check args and exit program if something's wrong + if(not self.check_arch()): + print('Invalid microarchitecture.') + sys.exit() + if(not self.check_elffile()): + print('Invalid file path or file format.') + sys.exit() +# Finally check for database for the chosen architecture + self.read_csv() + + print('Everything seems fine! Let\'s start checking!') + for i,line in enumerate(self.srcCode): + if(i == 0): + self.check_line(line, True) + else: + self.check_line(line) + output = self.create_output() + print(output) -# Check if separators are either tabulators or whitespaces -def set_char_counter(line): - global cntChar - numSpaces = (re.split(marker, line)[0]).count(' ') - numTabs = (re.split(marker, line)[0]).count('\t') - if(numSpaces != 0 and numTabs == 0): - cntChar = ' ' - elif(numSpaces == 0 and numTabs != 0): - cntChar = '\t' - else: - raise NotImplementedError('Indentation of code is only supported for whitespaces and tabs.') - - -def check_instr(instr): - global instrForms - global longestInstr -# Check for strange clang padding bytes - while(instr.startswith('data32')): - instr = instr[7:] -# Separate mnemonic and operands - mnemonic = instr.split()[0] - params = ''.join(instr.split()[1:]) -# Check if line is not only a byte - empty_byte = re.compile(r'[0-9a-f]{2}') - if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2): - return -# Check if there's one or more operands and store all in a list - param_list = flatten(separate_params(params)) - param_list_types = list(param_list) -# check operands and separate them by IMMEDIATE (IMD), REGISTER (REG). MEMORY (MEM) or LABEL(LBL) - for i in range(len(param_list)): - op = param_list[i] - if(len(op) <= 0): - op = Parameter('NONE') - elif(op[0] == '$'): - op = Parameter('IMD') - elif(op[0] == '%' and '(' not in op): - j = len(op) - opmask = False - if('{' in op): - j = op.index('{') - opmask = True - op = Register(op[1:j], opmask) - elif('<' in op or op.startswith('.')): - op = Parameter('LBL') + def inspect_with_iaca(self): + """ + Main function of OSACA with IACA markers instead of OSACA marker. + Inspect binary file and create analysis. + """ +# Check args and exit program if something's wrong + if(not self.check_arch()): + print('Invalid microarchitecture.') + sys.exit() +# Check if input file is a binary or assembly file + try: + binaryFile = True + if(not self.check_elffile()): + print('Invalid file path or file format.') + sys.exit() + except (TypeError,IndexError): + binaryFile = False + if(not self.check_file(True)): + print('Invalid file path or file format.') + sys.exit() +# Finally check for database for the chosen architecture + self.read_csv() + + print('Everything seems fine! Let\'s start checking!') + if(binaryFile): + self.iaca_bin() else: - op = MemAddr(op) - param_list[i] = op.print() - param_list_types[i] = op -#Add to list - if(len(instr) > longestInstr): - longestInstr = len(instr) - instrForm = [mnemonic]+list(reversed(param_list_types))+[instr] - instrForms.append(instrForm) + self.iaca_asm() + output = self.create_output() + print(output) + +##------------------------------------------------------------------------------ + + def check_arch(self): + """ + Check if the architecture is valid. + + Returns + ------- + bool + True if arch is supported + False if arch is not supported + + """ + archList = ['SNB','IVB','HSW', 'BDW', 'SKL'] + if(self.arch in archList): + return True + else: + return False + + + def check_elffile(self): + """ + Check if the given filepath exists, if the format is the needed elf64 + and store file data in attribute srcCode. + + Returns + ------- + bool + True if file is expected elf64 file + False if file does not exist or is not an elf64 file + + """ + if(os.path.isfile(self.filepath)): + self.store_srcCode_elf() + if('file format elf64' in self.srcCode[1]): + return True + return False -# Extract instruction forms out of binary file -def iaca_bin(): - global marker - global sem - global instrForms + def check_file(self,iacaFlag=False): + """ + Check if the given filepath exists and store file data in attribute + srcCode. + + Parameters + ---------- + iacaFlag : bool + store file data as a string in attribute srcCode if True, + store it as a list of strings (lines) if False (default False) + + Returns + ------- + bool + True if file exists + False if file does not exist + + """ + if(os.path.isfile(self.filepath)): + self.store_srcCode(iacaFlag) + return True + return False + + def store_srcCode_elf(self): + """ + Load binary file compiled with '-g' in class attribute srcCode and + separate by line. + """ + self.srcCode = subprocess.run(['objdump', '--source', self.filepath], stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n') + + + def store_srcCode(self,iacaFlag=False): + """ + Load arbitrary file in class attribute srcCode. + + Parameters + ---------- + iacaFlag : bool + store file data as a string in attribute srcCode if True, + store it as a list of strings (lines) if False (default False) + """ + try: + f = open(self.filepath, 'r') + except IOError: + print('IOError: file \'{}\' not found'.format(self.filepath)) + self.srcCode = '' + for line in f: + self.srcCode += line + f.close() + if(iacaFlag): + return + self.srcCode = self.srcCode.split('\n') - marker = r'fs addr32 nop' - for line in srcCode: + + def read_csv(self): + """ + Reads architecture dependent CSV from data directory. + + Returns + ------- + DataFrame + CSV as DataFrame object + """ + currDir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) + df = pd.read_csv(currDir+'/data/'+self.arch.lower()+'_data.csv') + return df + + + def write_csv(self,csv): + """ + Writes architecture dependent CSV into data directory. + + Parameters + ---------- + csv : str + CSV data as string + """ + try: + f = open('data/'+self.arch.lower()+'_data.csv', 'w') + except IOError: + print('IOError: file \'{}\' not found in ./data'.format(self.arch.lower()+'_data.csv')) + f.write(csv) + f.close() + + + + def create_sequences(self,end=101): + """ + Creates list of integers from 1 to end and list of their reciprocals. + + Parameters + ---------- + end : int + End value for list of integers (default 101) + + Returns + ------- + [int] + cycList of integers + [float] + reciList of floats + """ + cycList = [] + reciList = [] + for i in range(1, end): + cycList.append(i) + reciList.append(1/i) + return cycList,reciList + + + def validate_val(self,clkC, instr, isTP, cycList, reciList): + """ + Validate given clock cycle clkC and return rounded value in case of + success. + + A succeeded validation means the clock cycle clkC is only 5% higher or + lower than an integer value from cycList or - if clkC is a throughput + value - 5% higher or lower than a reciprocal from the reciList. + + Parameters + ---------- + clkC : float + Clock cycle to validate + instr : str + Instruction for warning output + isTP : bool + True if a throughput value is to check, False for a latency value + cycList : [int] + Cycle list for validating + reciList : [float] + Reciprocal cycle list for validating + + Returns + ------- + float + Clock cycle, either rounded to an integer or its reciprocal or the + given clkC parameter + """ + clmn = 'LT' + if(isTP): + clmn = 'TP' + for i in range(0, len(cycList)): + if(cycList[i]*1.05 > float(clkC) and cycList[i]*0.95 < float(clkC)): +# Value is probably correct, so round it to the estimated value + return cycList[i] +# Check reciprocal only if it is a throughput value + elif(isTP and reciList[i]*1.05 > float(clkC) and reciList[i]*0.95 < float(clkC)): +# Value is probably correct, so round it to the estimated value + return reciList[i] +# No value close to an integer or its reciprocal found, we assume the +# measurement is incorrect + print('Your measurement for {} ({}) is probably wrong. Please inspect your benchmark!'.format(instr, clmn)) + print('The program will continue with the given value') + return clkC + + + def check_line(self,line,firstAppearance=False): + """ + Inspect line of source code and process it if inside the marked snippet. + + Parameter + --------- + line : str + Line of source code + firstAppearance : bool + Necessary for setting indenting character (default False) + """ # Check if marker is in line - if(marker in line): - sem += 1 - elif(sem == 1): + if(self.marker in line): +# First, check if high level code in indented with whitespaces or tabs + if(firstAppearance): + self.indentChar = self.get_indent_chars(line) +# Now count the number of whitespaces + self.numSeps = (re.split(self.marker, line)[0]).count(self.indentChar) + self.sem = 2 + elif(self.sem > 0): # We're in the marked code snippet -# Check if the line is ASM code - match = re.search(asm_line, line) +# Check if the line is ASM code and - if not - check if we're still in the loop + match = re.search(self.ASM_LINE, line) if(match): # Further analysis of instructions # Check if there are comments in line if(r'//' in line): - continue + return + self.check_instr(''.join(re.split(r'\t', line)[-1:])) + elif((re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps): +# Not in the loop anymore - or yet. We decrement the semaphore + self.sem = self.sem-1 + + + def get_indent_chars(self,line): + """ + Check if indentation characters are either tabulators or whitespaces + + Parameters + ---------- + line : str + Line with start marker in it + + Returns + ------- + str + Indentation character as string + """ + numSpaces = (re.split(self.marker, line)[0]).count(' ') + numTabs = (re.split(self.marker, line)[0]).count('\t') + if(numSpaces != 0 and numTabs == 0): + return ' ' + elif(numSpaces == 0 and numTabs != 0): + return '\t' + else: + raise NotImplementedError('Indentation of code is only supported for whitespaces and tabs.') + + + def iaca_bin(self): + """ + Extract instruction forms out of binary file using IACA markers. + """ + self.marker = r'fs addr32 nop' + for line in self.srcCode: +# Check if marker is in line + if(self.marker in line): + self.sem += 1 + elif(self.sem == 1): +# We're in the marked code snippet +# Check if the line is ASM code + match = re.search(self.ASM_LINE, line) + if(match): +# Further analysis of instructions +# Check if there are comments in line + if(r'//' in line): + continue # Do the same instruction check as for the OSACA marker line check - check_instr(''.join(re.split(r'\t', line)[-1:])) - elif(sem == 2): + self.check_instr(''.join(re.split(r'\t', line)[-1:])) + elif(self.sem == 2): # Not in the loop anymore. Due to the fact it's the IACA marker we can stop here # After removing the last line which belongs to the IACA marker - del instrForms[-1:] - return - - -# Extract instruction forms out of assembly file -def iaca_asm(): + del self.instrForms[-1:] + return + + + def iaca_asm(self): + """ + Extract instruction forms out of assembly file using IACA markers. + """ # Extract the code snippet surround by the IACA markers - code = srcCode + code = self.srcCode # Search for the start marker - match = re.match(iaca_sm, code) - while(not match): - code = code.split('\n',1)[1] - match = re.match(iaca_sm, code) + match = re.match(self.IACA_SM, code) + while(not match): + code = code.split('\n',1)[1] + match = re.match(self.IACA_SM, code) # Search for the end marker - code = (code.split('144',1)[1]).split('\n',1)[1] - res = '' - match = re.match(iaca_em, code) - while(not match): - res += code.split('\n',1)[0]+'\n' - code = code.split('\n',1)[1] - match = re.match(iaca_em, code) + code = (code.split('144',1)[1]).split('\n',1)[1] + res = '' + match = re.match(self.IACA_EM, code) + while(not match): + res += code.split('\n',1)[0]+'\n' + code = code.split('\n',1)[1] + match = re.match(self.IACA_EM, code) # Split the result by line go on like with OSACA markers - res = res.split('\n') - for line in res: - line = line.split('#')[0] - line = line.lstrip() - if(len(line) == 0 or '//' in line or line.startswith('..')): - continue - check_instr(line) + res = res.split('\n') + for line in res: + line = line.split('#')[0] + line = line.lstrip() + if(len(line) == 0 or '//' in line or line.startswith('..')): + continue + self.check_instr(line) + + def check_instr(self,instr): + """ + Inspect instruction for its parameters and add it to the instruction forms + pool instrForm. + + Parameters + ---------- + instr : str + Instruction as string + """ +# Check for strange clang padding bytes + while(instr.startswith('data32')): + instr = instr[7:] +# Separate mnemonic and operands + mnemonic = instr.split()[0] + params = ''.join(instr.split()[1:]) +# Check if line is not only a byte + empty_byte = re.compile(r'[0-9a-f]{2}') + if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2): + return +# Check if there's one or more operands and store all in a list + param_list = self.flatten(self.separate_params(params)) + param_list_types = list(param_list) +# Check operands and separate them by IMMEDIATE (IMD), REGISTER (REG), +# MEMORY (MEM) or LABEL(LBL) + for i in range(len(param_list)): + op = param_list[i] + if(len(op) <= 0): + op = Parameter('NONE') + elif(op[0] == '$'): + op = Parameter('IMD') + elif(op[0] == '%' and '(' not in op): + j = len(op) + opmask = False + if('{' in op): + j = op.index('{') + opmask = True + op = Register(op[1:j], opmask) + elif('<' in op or op.startswith('.')): + op = Parameter('LBL') + else: + op = MemAddr(op) + param_list[i] = op.print() + param_list_types[i] = op +# Add to list + if(len(instr) > self.longestInstr): + self.longestInstr = len(instr) + instrForm = [mnemonic]+list(reversed(param_list_types))+[instr] + self.instrForms.append(instrForm) + -def separate_params(params): - param_list = [params] - if(',' in params): - if(')' in params): - if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','): - i = params.index(')')+1 - elif(params.index('(') < params.index(',')): - return param_list + def separate_params(self,params): + """ + Delete comments, separates parameters and return them as a list. + + Parameters + ---------- + params : str + Splitted line after mnemonic + + Returns + ------- + [[...[str]]] + Nested list of strings. The number of nest levels depend on the + number of parametes given. + """ + param_list = [params] + if(',' in params): + if(')' in params): + if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','): + i = params.index(')')+1 + elif(params.index('(') < params.index(',')): + return param_list + else: + i = params.index(',') else: i = params.index(',') - else: - i = params.index(',') - param_list = [params[:i],separate_params(params[i+1:])] - elif('#' in params): - i = params.index('#') - param_list = [params[:i]] - return param_list - -def flatten(l): - if l == []: - return l - if(isinstance(l[0], list)): - return flatten(l[0]) + flatten(l[1:]) - return l[:1] + flatten(l[1:]) - -def read_csv(): - global df - currDir = os.path.realpath(__file__)[:-8] - df = pd.read_csv(currDir+'data/'+arch.lower()+'_data.csv') - -def create_horiz_sep(): - global horizontalSeparator - horizontalSeparator = '-'*(longestInstr+8) - -def create_output(tp_list=False,pr_sched=True): - global longestInstr + param_list = [params[:i],self.separate_params(params[i+1:])] + elif('#' in params): + i = params.index('#') + param_list = [params[:i]] + return param_list -#Check the output alignment depending on the longest instruction - if(longestInstr > 70): - longestInstr = 70 - create_horiz_sep() - ws = ' '*(len(horizontalSeparator)-23) + def flatten(self,l): + """ + Flatten a nested list of strings. + + Parameters + ---------- + l : [[...[str]]] + Nested list of strings + + Returns + ------- + [str] + List of strings + """ + if l == []: + return l + if(isinstance(l[0], list)): + return self.flatten(l[0]) + self.flatten(l[1:]) + return l[:1] + self.flatten(l[1:]) + + + def create_output(self,tp_list=False,pr_sched=True): + """ + Creates output of analysed file including a time stamp. + + Parameters + ---------- + tp_list : bool + Boolean for indicating the need for the throughput list as output + (default False) + pr_sched : bool + Boolean for indicating the need for predicting a scheduling + (default True) + + Returns + ------- + str + OSACA output + """ +# Check the output alignment depending on the longest instruction + if(self.longestInstr > 70): + self.longestInstr = 70 + horizLine = self.create_horiz_sep() + ws = ' '*(len(horizLine)-23) # Write general information about the benchmark - output = ( '--'+horizontalSeparator+'\n' - '| Analyzing of file:\t'+os.getcwd()+'/'+filepath+'\n' - '| Architecture:\t\t'+arch+'\n' - '| Timestamp:\t\t'+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'\n') - - if(tp_list): - output += create_TP_list(instrForms) - if(pr_sched): - output += '\n\n' - sched = Scheduler(arch, instrForms) - schedOutput,totalTP = sched.schedule_FCFS() - output += sched.get_head()+schedOutput - output += 'Total number of estimated throughput: '+str(totalTP) - return output - - -def create_TP_list(instrForms): - warning = False - ws = ' '*(len(horizontalSeparator)-23) - - output = ('\n| INSTRUCTION'+ws+'CLOCK CYCLES\n' - '| '+horizontalSeparator+'\n|\n') + output = ( '--'+horizLine+'\n' + '| Analyzing of file:\t'+os.getcwd()+'/'+self.filepath+'\n' + '| Architecture:\t\t'+self.arch+'\n' + '| Timestamp:\t\t'+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'\n') + + if(tp_list): + output += self.create_TP_list(horizLine) + if(pr_sched): + output += '\n\n' + sched = Scheduler(self.arch, self.instrForms) + schedOutput,totalTP = sched.schedule_FCFS() + output += sched.get_head()+schedOutput + output += 'Total number of estimated throughput: '+str(totalTP) + return output + + + def create_horiz_sep(self): + """ + Calculate and return horizontal separator line. + + Returns + ------- + str + Horizontal separator line + """ + return '-'*(self.longestInstr+8) + + + def create_TP_list(self,horizLine): + """ + Create list of instruction forms with the proper throughput value. + + Parameter + --------- + horizLine : str + Calculated horizontal line for nice alignement + + Returns + ------- + str + Throughput list output for printing + """ + warning = False + ws = ' '*(len(horizLine)-23) + + output = ('\n| INSTRUCTION'+ws+'CLOCK CYCLES\n' + '| '+horizLine+'\n|\n') # Check for the throughput data in CSV -# First determine if we're searching for the SSE, AVX or AVX512 type of instruction - for elem in instrForms: - extension = '' - opExt = [] - for i in range(1, len(elem)-1): - optmp = '' - if(isinstance(elem[i], Register) and elem[i].reg_type == 'GPR'): - optmp = 'r'+str(elem[i].size) - elif(isinstance(elem[i], MemAddr)): - optmp = 'mem' - else: - optmp = elem[i].print().lower() - opExt.append(optmp) - operands = '_'.join(opExt) + for elem in self.instrForms: + extension = '' + opExt = [] + for i in range(1, len(elem)-1): + optmp = '' + if(isinstance(elem[i], Register) and elem[i].reg_type == 'GPR'): + optmp = 'r'+str(elem[i].size) + elif(isinstance(elem[i], MemAddr)): + optmp = 'mem' + else: + optmp = elem[i].print().lower() + opExt.append(optmp) + operands = '_'.join(opExt) # Now look up the value in the dataframe # Check if there is a stored throughput value in database - import warnings - warnings.filterwarnings("ignore", 'This pattern has match groups') - series = df['instr'].str.contains(elem[0]+'-'+operands) - if( True in series.values): -# It's a match! - notFound = False - try: - tp = df[df.instr == elem[0]+'-'+operands].TP.values[0] - except IndexError: -# Something went wrong - print('Error while fetching data from database') - continue -# Did not found the exact instruction form. -# Try to find the instruction form for register operands only - else: - opExtRegs = [] - for operand in opExt: - try: - regTmp = Register(operand) - opExtRegs.append(True) - except KeyError: - opExtRegs.append(False) - pass - if(not True in opExtRegs): -# No register in whole instruction form. How can I found out what regsize we need? - print('Feature not included yet: ', end='') - print(elem[0]+' for '+operands) - tp = 0 - notFound = True - warning = True - - numWhitespaces = longestInstr-len(elem[-1]) - ws = ' '*numWhitespaces+'| ' - n_f = ' '*(5-len(str(tp)))+'*' - data = '| '+elem[-1]+ws+str(tp)+n_f+'\n' - output += data - continue - if(opExtRegs[0] == False): -# Instruction stores result in memory. Check for storing in register instead - if(len(opExt) > 1): - if(opExtRegs[1] == True): - opExt[0] = opExt[1] - elif(len(optExt > 2)): - if(opExtRegs[2] == True): - opExt[0] = opExt[2] - if(len(opExtRegs) == 2 and opExtRegs[1] == False): -# Instruction loads value from memory and has only two operands. Check for loading from register instead - if(opExtRegs[0] == True): - opExt[1] = opExt[0] - if(len(opExtRegs) == 3 and opExtRegs[2] == False): -# Instruction loads value from memory and has three operands. Check for loading from register instead - opExt[2] = opExt[0] - operands = '_'.join(opExt) -# Check for register equivalent instruction - series = df['instr'].str.contains(elem[0]+'-'+operands) + import warnings + warnings.filterwarnings("ignore", 'This pattern has match groups') + series = self.df['instr'].str.contains(elem[0]+'-'+operands) if( True in series.values): # It's a match! notFound = False try: - tp = df[df.instr == elem[0]+'-'+operands].TP.values[0] - + tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0] except IndexError: # Something went wrong print('Error while fetching data from database') continue -# Did not found the register instruction form. Set warning and go on with throughput 0 +# Did not found the exact instruction form. +# Try to find the instruction form for register operands only else: - tp = 0 - notFound = True - warning = True -# Check the alignement again - numWhitespaces = longestInstr-len(elem[-1]) - ws = ' '*numWhitespaces+'| ' - n_f = '' - if(notFound): - n_f = ' '*(5-len(str(tp)))+'*' - data = '| '+elem[-1]+ws+'{:3.2f}'.format(tp)+n_f+'\n' - output += data -# Finally end the list of throughput values - numWhitespaces = longestInstr-27 - ws = ' '+' '*numWhitespaces - output += '| '+horizontalSeparator+'\n' - if(warning): - output += ('\n\n* There was no throughput value found ' - 'for the specific instruction form.' - '\n Please create a testcase via the create_testcase-method ' - 'or add a value manually.') - return output - - -def create_sequences(): - global cycList - global reciList - - for i in range(1, 101): - cycList.append(i) - reciList.append(1/i) - -def validate_val(clkC, instr, isTP): - clmn = 'LT' - if(isTP): - clmn = 'TP' - for i in range(0, 100): - if(cycList[i]*1.05 > float(clkC) and cycList[i]*0.95 < float(clkC)): -# Value is probably correct, so round it to the estimated value - return cycList[i] -# Check reciprocal only if it is a throughput value - elif(isTP and reciList[i]*1.05 > float(clkC) and reciList[i]*0.95 < float(clkC)): -# Value is probably correct, so round it to the estimated value - return reciList[i] -# No value close to an integer or its reciprokal found, we assume the measurement is incorrect - print('Your measurement for {} ({}) is probably wrong. Please inspect your benchmark!'.format(instr, clmn)) - print('The program will continue with the given value') - return clkC - -def write_csv(csv): - try: - f = open('data/'+arch.lower()+'_data.csv', 'w') - except IOError: - print('IOError: file \'{}\' not found in ./data'.format(arch.lower()+'_data.csv')) - f.write(csv) - f.close() - -##---------------main functions depending on arguments---------------------- - -#reads ibench output and includes it in the architecture specific csv file -def include_ibench(): - global df - -# Check args and exit program if something's wrong - if(not check_arch()): - print('Invalid microarchitecture.') - sys.exit() - if(not check_file()): - print('Invalid file path or file format.') - sys.exit() -# Check for database for the chosen architecture - read_csv() -# Create sequence of numbers and their reciprokals for validate the measurements - create_sequences() + opExtRegs = [] + for operand in opExt: + try: + regTmp = Register(operand) + opExtRegs.append(True) + except KeyError: + opExtRegs.append(False) + pass + if(not True in opExtRegs): +# No register in whole instruction form. How can I find out what regsize we need? + print('Feature not included yet: ', end='') + print(elem[0]+' for '+operands) + tp = 0 + notFound = True + warning = True + + numWhitespaces = self.longestInstr-len(elem[-1]) + ws = ' '*numWhitespaces+'| ' + n_f = ' '*(5-len(str(tp)))+'*' + data = '| '+elem[-1]+ws+str(tp)+n_f+'\n' + output += data + continue + if(opExtRegs[0] == False): +# Instruction stores result in memory. Check for storing in register instead. + if(len(opExt) > 1): + if(opExtRegs[1] == True): + opExt[0] = opExt[1] + elif(len(optExt > 2)): + if(opExtRegs[2] == True): + opExt[0] = opExt[2] + if(len(opExtRegs) == 2 and opExtRegs[1] == False): +# Instruction loads value from memory and has only two operands. Check for +# loading from register instead + if(opExtRegs[0] == True): + opExt[1] = opExt[0] + if(len(opExtRegs) == 3 and opExtRegs[2] == False): +# Instruction loads value from memory and has three operands. Check for loading +# from register instead + opExt[2] = opExt[0] + operands = '_'.join(opExt) +# Check for register equivalent instruction + series = self.df['instr'].str.contains(elem[0]+'-'+operands) + if( True in series.values): +# It's a match! + notFound = False + try: + tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0] + + except IndexError: +# Something went wrong + print('Error while fetching data from database') + continue +# Did not found the register instruction form. Set warning and go on with +# throughput 0 + else: + tp = 0 + notFound = True + warning = True +# Check the alignement again + numWhitespaces = self.longestInstr-len(elem[-1]) + ws = ' '*numWhitespaces+'| ' + n_f = '' + if(notFound): + n_f = ' '*(5-len(str(tp)))+'*' + data = '| '+elem[-1]+ws+'{:3.2f}'.format(tp)+n_f+'\n' + output += data +# Finally end the list of throughput values + numWhitespaces = self.longestInstr-27 + ws = ' '+' '*numWhitespaces + output += '| '+horizLine+'\n' + if(warning): + output += ('\n\n* There was no throughput value found ' + 'for the specific instruction form.' + '\n Please create a testcase via the create_testcase-method ' + 'or add a value manually.') + return output - print('Everything seems fine! Let\'s start!') - newData = [] - addedValues = 0 - for line in srcCode: - if('Using frequency' in line or len(line) == 0): - continue - clmn = 'LT' - instr = line.split()[0][:-1] - if('TP' in line): -# We found a command with a throughput value. Get instruction and the number of clock cycles -# and remove the '-TP' suffix - clmn = 'TP' - instr = instr[:-3] -# Otherwise stay with Latency - clkC = line.split()[1] - clkC_tmp = clkC - clkC = validate_val(clkC, instr, True if (clmn == 'TP') else False) - txtOutput = True if (clkC_tmp == clkC) else False - val = -2 - new = False - try: - entry = df.loc[lambda df: df.instr == instr,clmn] - val = entry.values[0] - except IndexError: -# Instruction not in database yet --> add it - new = True -# First check if LT or TP value has already been added before - for i,item in enumerate(newData): - if(instr in item): - if(clmn == 'TP'): - newData[i][1] = clkC - elif(clmn == 'LT'): - newData[i][2] = clkC - new = False - break - if(new and clmn == 'TP'): - newData.append([instr,clkC,'-1']) - elif(new and clmn == 'LT'): - newData.append([instr,'-1',clkC]) - new = True - addedValues += 1 - pass -# If val is -1 (= not filled with a valid value) add it immediately - if(val == -1): - df.set_value(entry.index[0], clmn, clkC) - addedValues += 1 - continue - if(not new and abs((val/np.float64(clkC))-1) > 0.05): - print('Different measurement for {} ({}): {}(old) vs. {}(new)\nPlease check for correctness (no changes were made).'.format(instr, clmn, val, clkC)) - txtOutput = True - if(txtOutput): - print() - txtOutput = False -# Now merge the DataFrames and write new csv file - df = df.append(pd.DataFrame(newData, columns=['instr','TP','LT']), ignore_index=True) - csv = df.to_csv(index=False) - write_csv(csv) - print('ibench output {} successfully in database included.'.format(filepath.split('/')[-1])) - print('{} values were added.'.format(addedValues)) - - -# main function of the tool -def inspect_binary(): -# Check args and exit program if something's wrong - if(not check_arch()): - print('Invalid microarchitecture.') - sys.exit() - if(not check_elffile()): - print('Invalid file path or file format.') - sys.exit() -# Finally check for database for the chosen architecture - read_csv() - - print('Everything seems fine! Let\'s start checking!') - for line in srcCode: - check_line(line) - output = create_output() - print(output) - - -# main function of the tool with IACA markers instead of OSACA marker -def inspect_with_iaca(): -# Check args and exit program if something's wrong - if(not check_arch()): - print('Invalid microarchitecture.') - sys.exit() -# Check if input file is a binary or assembly file - try: - binaryFile = True - if(not check_elffile()): - print('Invalid file path or file format.') - sys.exit() - except (TypeError,IndexError): - binaryFile = False - if(not check_file(True)): - print('Invalid file path or file format.') - sys.exit() -# Finally check for database for the chosen architecture - read_csv() - - print('Everything seems fine! Let\'s start checking!') - if(binaryFile): - iaca_bin() - else: - iaca_asm() - output = create_output() - print(output) - - ##------------------------------------------------------------------------------ ##------------Main method-------------- def main(): - global inp - global arch - global filepath # Parse args parser = argparse.ArgumentParser(description='Analyzes a marked innermost loop snippet for a given architecture type and prints out the estimated average throughput') parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.1') @@ -575,21 +788,25 @@ def main(): inclIbench = inp.incl iacaFlag = inp.iaca insert_m = inp.insert_marker + +# Create Osaca object + if(inp.arch is not None): + osaca = Osaca(arch, filepath) if(inclIbench): - include_ibench() + osaca.include_ibench() elif(iacaFlag): - inspect_with_iaca() + osaca.inspect_with_iaca() elif(insert_m): try: - from kerncrafts import iaca + from kerncraft import iaca except ImportError: print('ImportError: Module kerncraft not installed. Use \'pip install --user kerncraft\' for installation.\nFor more information see https://github.com/RRZE-HPC/kerncraft') sys.exit() iaca.iaca_instrumentation(input_file=filepath, output_file=filepath, block_selection='manual', pointer_increment=1) else: - inspect_binary() + osaca.inspect_binary() ##------------Main method--------------