diff --git a/osaca/eu_sched.py b/osaca/eu_sched.py index ed66d32..6c31c02 100755 --- a/osaca/eu_sched.py +++ b/osaca/eu_sched.py @@ -11,19 +11,17 @@ from osaca.param import Register, MemAddr class Scheduler(object): - arch_dict = {'SNB': 6, 'IVB': 6, 'HSW': 8, 'BDW': 8, 'SKL': 8, 'ZEN': 10} dv_port_dict = {'SKL': 0, 'ZEN': 3} - ports = None # type: int - instrList = None # type: list>, + ports = None # type: int + instrList = None # type: list>, # content of most inner list in instrList: instr, operand(s), instr form - df = None # type: DataFrame + df = None # type: DataFrame # for parallel ld/st in archs with 1 st/cy and >1 ld/cy, able to do 1 st and 1 ld in 1cy - ld_ports = None # type: list + ld_ports = None # type: list # enable flag for parallel ld/st - en_par_ldst = False # type: boolean - dv_port = -1 # type: int - + en_par_ldst = False # type: boolean + dv_port = -1 # type: int def __init__(self, arch, instruction_list): arch = arch.upper() @@ -33,7 +31,7 @@ class Scheduler(object): print('Architecture not supportet for EU scheduling.', file=sys.stderr) sys.exit(1) # check for parallel ld/st in a cycle - if(arch == 'ZEN'): + if arch == 'ZEN': self.en_par_ldst = True self.ld_ports = [9, 10] # check for DV port @@ -44,13 +42,14 @@ class Scheduler(object): # do nothing pass self.instrList = instruction_list - #curr_dir = os.path.realpath(__file__)[:-11] + # curr_dir = os.path.realpath(__file__)[:-11] osaca_dir = os.path.expanduser('~/.osaca/') self.df = pd.read_csv(osaca_dir + 'data/' + arch.lower() + '_data.csv', quotechar='"', converters={'ports': ast.literal_eval}) + def new_schedule(self): """ - Schedules Instruction Form list and calculates port bindings. + Schedule Instruction Form list and calculate port bindings. Returns ------- @@ -62,18 +61,18 @@ class Scheduler(object): # Initialize ports # Add DV port, if it is existing tmp_port = 0 - if(self.dv_port != -1): + if self.dv_port != -1: tmp_port = 1 occ_ports = [[0] * (self.ports + tmp_port) for x in range(len(self.instrList))] port_bndgs = [0] * (self.ports + tmp_port) # Store instruction counter for parallel ld/st par_ldst = 0 # Count the number of store instr if we schedule for an architecture with par ld/st - if(self.en_par_ldst): + if self.en_par_ldst: for i, instrForm in enumerate(self.instrList): - if(isinstance(instrForm[1], MemAddr) and len(instrForm) > 3 - and not instrForm[0].startswith('cmp')): - #print('({}, {}) is st --> par_ldst = {}'.format(i, instrForm[0], par_ldst + 1)) + if (isinstance(instrForm[1], MemAddr) and len(instrForm) > 3 + and not instrForm[0].startswith('cmp')): + # print('({}, {}) is st --> par_ldst = {}'.format(i, instrForm[0], par_ldst + 1)) par_ldst += 1 # Check if there's a port occupation stored in the CSV, otherwise leave the # occ_port list item empty @@ -82,11 +81,11 @@ class Scheduler(object): search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm) entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr] tup = entry.ports.values[0] - if(len(tup) == 1 and tup[0] == -1): + if len(tup) == 1 and tup[0] == -1: raise IndexError() except IndexError: # Instruction form not in CSV - if(instrForm[0][:3] == 'nop'): + if instrForm[0][:3] == 'nop': sched += self.get_line(occ_ports[i], '* ' + instrForm[-1]) else: sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1]) @@ -94,18 +93,17 @@ class Scheduler(object): occ_ports[i] = list(tup) # Check if it's a ld including instr p_flg = '' - if(self.en_par_ldst): + if self.en_par_ldst: # Check for ld - if(isinstance(instrForm[-2], MemAddr) or - (len(instrForm) > 4 and isinstance(instrForm[2], MemAddr))): - if(par_ldst > 0): + if (isinstance(instrForm[-2], MemAddr) or + (len(instrForm) > 4 and isinstance(instrForm[2], MemAddr))): + if par_ldst > 0: par_ldst -= 1 p_flg = 'P ' for port in self.ld_ports: - tmp_port_add = 1 if(self.dv_port != -1 and self.dv_port < port) else 0 occ_ports[i][port] = '(' + str(occ_ports[i][port]) + ')' # Write schedule line - if(len(p_flg) > 0): + if len(p_flg) > 0: sched += self.get_line(occ_ports[i], p_flg + instrForm[-1]) for port in self.ld_ports: occ_ports[i][port] = 0 @@ -113,11 +111,11 @@ class Scheduler(object): sched += self.get_line(occ_ports[i], instrForm[-1]) # Add throughput to total port binding port_bndgs = list(map(add, port_bndgs, occ_ports[i])) - return (sched, port_bndgs) + return sched, port_bndgs def schedule(self): """ - Schedules Instruction Form list and calculates port bindings. + Schedule Instruction Form list and calculate port bindings. Returns ------- @@ -128,7 +126,6 @@ class Scheduler(object): wTP = False sched = self.get_head() # Initialize ports - occ_ports = [[0] * self.ports for x in range(len(self.instrList))] port_bndgs = [0] * self.ports # Check if there's a port occupation stored in the CSV, otherwise leave the # occ_port list item empty @@ -137,16 +134,16 @@ class Scheduler(object): search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm) entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr] tup = entry.ports.values[0] - if(len(tup) == 1 and tup[0][0] == -1): + if len(tup) == 1 and tup[0][0] == -1: raise IndexError() except IndexError: # Instruction form not in CSV - if(instrForm[0][:3] == 'nop'): + if instrForm[0][:3] == 'nop': sched += self.get_line(occ_ports[i], '* ' + instrForm[-1]) else: sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1]) continue - if(wTP): + if wTP: # Get the occurance of each port from the occupation list port_occurances = self.get_port_occurances(tup) # Get 'occurance groups' @@ -155,34 +152,34 @@ class Scheduler(object): tp_ges = entry.TP.values[0] * len(occurance_groups[0]) for occGroup in occurance_groups: for port in occGroup: - occ_ports[i][port] = tp_ges/len(occGroup) + occ_ports[i][port] = tp_ges / len(occGroup) else: variations = len(tup) t_all = self.flatten(tup) - if(entry.TP.values[0] == 0): + if entry.TP.values[0] == 0: t_all = () - if(variations == 1): + if variations == 1: for j in tup[0]: occ_ports[i][j] = entry.TP.values[0] else: for j in range(0, self.ports): - occ_ports[i][j] = t_all.count(j) / variations + occ_ports[i][j] = t_all.count(j) / variations # Write schedule line sched += self.get_line(occ_ports[i], instrForm[-1]) # Add throughput to total port binding port_bndgs = list(map(add, port_bndgs, occ_ports[i])) - return (sched, port_bndgs) - + return sched, port_bndgs + def flatten(self, l): - if(len(l) == 0): + if len(l) == 0: return l - if(isinstance(l[0], type(l))): + if isinstance(l[0], type(l)): return self.flatten(l[0]) + self.flatten(l[1:]) - return l[:1] + self.flatten(l[1:]) + return l[:1] + self.flatten(l[1:]) def schedule_fcfs(self): """ - Schedules Instruction Form list for a single run with latencies. + Schedule Instruction Form list for a single run with latencies. Returns ------- @@ -198,18 +195,18 @@ class Scheduler(object): search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm) entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr] tup = entry.ports.values[0] - if(len(tup) == 1 and tup[0][0] == -1): + if len(tup) == 1 and tup[0][0] == -1: raise IndexError() except IndexError: # Instruction form not in CSV sched += self.get_line([0] * self.ports, '* ' + instrForm[-1]) continue found = False - while(not found): + while not found: for portOcc in tup: # Test if chosen instruction form port occupation suits the current CPU port # occupation - if(self.test_ports_fcfs(occ_ports, portOcc)): + if self.test_ports_fcfs(occ_ports, portOcc): # Current port occupation fits for chosen port occupation of instruction! found = True good = [entry.LT.values[0] if (j in portOcc) else 0 for j in @@ -219,16 +216,15 @@ class Scheduler(object): occ_ports = [occ_ports[j] + good[j] for j in range(0, self.ports)] break # Step - occ_ports = [j-1 if (j > 0) else 0 for j in occ_ports] - if(entry.LT.values[0] != 0): + occ_ports = [j - 1 if (j > 0) else 0 for j in occ_ports] + if entry.LT.values[0] != 0: total += 1 total += max(occ_ports) - return (sched, total) + return sched, total def get_occurance_groups(self, port_occurances): """ - Groups ports in groups by the number of their occurance and sorts - groups by cardinality + Group ports in groups by the number of their occurrence and sorts groups by cardinality. Parameters ---------- @@ -254,8 +250,7 @@ class Scheduler(object): def get_port_occurances(self, tups): """ - Returns the number of each port occurance for the possible port - occupations + Return the number of each port occurrence for the possible port occupations. Parameters ---------- @@ -276,7 +271,7 @@ class Scheduler(object): def test_ports_fcfs(self, occ_ports, needed_ports): """ - Test if current configuration of ports is possible and returns boolean + Test if current configuration of ports is possible and returns boolean. Parameters ---------- @@ -292,13 +287,13 @@ class Scheduler(object): False if not """ for port in needed_ports: - if(occ_ports[port] != 0): + if occ_ports[port] != 0: return False return True def get_report_info(self): """ - Creates Report information including all needed annotations. + Create Report information including all needed annotations. Returns ------- @@ -306,15 +301,15 @@ class Scheduler(object): String containing the report information """ analysis = 'Throughput Analysis Report\n' + ('-' * 26) + '\n' - annotations = ('P - Load operation can be hidden behind a past or future store instruction\n' - 'X - No information for this instruction in data file\n' - '* - Instruction micro-ops not bound to a port\n' - '\n') + annotations = ( + 'P - Load operation can be hidden behind a past or future store instruction\n' + 'X - No information for this instruction in data file\n' + '* - Instruction micro-ops not bound to a port\n\n') return analysis + annotations def get_head(self): """ - Creates right heading for CPU architecture. + Create right heading for CPU architecture. Returns ------- @@ -322,15 +317,15 @@ class Scheduler(object): String containing the header """ horiz_line = '-' * 7 * self.ports - if(self.dv_port != -1): + if self.dv_port != -1: horiz_line += '-' * 6 horiz_line += '-\n' - port_anno = (' ' * (math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles' - + ' ' * (math.ceil((len(horiz_line) - 24) / 2)) + '\n') + port_anno = (' ' * int(math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles' + + ' ' * int(math.ceil((len(horiz_line) - 24) / 2)) + '\n') port_line = '' for i in range(0, self.ports): port_line += '| {} '.format(i) - if(i == self.dv_port): + if i == self.dv_port: port_line = port_line + '- DV ' port_line += '|\n' head = port_anno + port_line + horiz_line @@ -353,19 +348,18 @@ class Scheduler(object): String for output containing port scheduling for instr_name """ line = '' - r_space = ' ' for p_num, i in enumerate(occ_ports): pipe = '|' - if(isinstance(i, str)): + if isinstance(i, str): cycles = i i = float(i[1:-1]) r_space = '' else: cycles = ' ' if (i == 0) else '%.2f' % float(i) r_space = ' ' - if(p_num == self.dv_port + 1 and p_num != 0): + if p_num == self.dv_port + 1 and p_num != 0: pipe = ' ' - if(i >= 10): + if i >= 10: line += pipe + cycles + r_space else: line += pipe + ' ' + cycles + r_space @@ -374,7 +368,7 @@ class Scheduler(object): def get_port_binding(self, port_bndg): """ - Creates port binding out of scheduling result. + Create port binding out of scheduling result. Parameters ---------- @@ -392,12 +386,13 @@ class Scheduler(object): port_line = '| Port |' after_dv = 0 for i in range(0, self.ports): - if(i == self.dv_port): - port_line += ' ' * sp_left[i] + str(i) + ' ' * sp_right[i] + '-' - port_line += ' ' * (sp_left[i+1] - 1) + 'DV' + ' ' * sp_right[i+1] + '|' + if i == self.dv_port: + port_line += ' ' * int(sp_left[i]) + str(i) + ' ' * int(sp_right[i]) + '-' + port_line += ' ' * int(sp_left[i + 1] - 1) + 'DV' + ' ' * int(sp_right[i + 1]) + '|' after_dv = 1 else: - port_line += ' ' * sp_left[i + after_dv] + str(i) + ' ' * sp_right[i + after_dv] + port_line += (' ' * int(sp_left[i + after_dv]) + str(i) + + ' ' * int(sp_right[i + after_dv])) port_line += '|' port_line += '\n' cyc_line = '| Cycles |' @@ -410,15 +405,15 @@ class Scheduler(object): return binding def get_spaces(self, port_bndg): - len_list = [len(str(round(x, 2)))+1 for x in port_bndg] - total = sum([x+2 for x in len_list]) - sp_left = [math.ceil(x/2) for x in len_list] - sp_right = [math.floor(x/2) for x in len_list] + len_list = [len(str(round(x, 2))) + 1 for x in port_bndg] + total = sum([x + 2 for x in len_list]) + sp_left = [math.ceil(x / 2) for x in len_list] + sp_right = [math.floor(x / 2) for x in len_list] return sp_left, sp_right, total def get_operand_suffix(self, instr_form): """ - Creates operand suffix out of list of Parameters. + Create operand suffix out of list of Parameters. Parameters ---------- @@ -431,11 +426,10 @@ class Scheduler(object): Operand suffix for searching in data file """ op_ext = [] - for i in range(1, len(instr_form)-1): - optmp = '' - if(isinstance(instr_form[i], Register) and instr_form[i].reg_type == 'GPR'): + for i in range(1, len(instr_form) - 1): + if isinstance(instr_form[i], Register) and instr_form[i].reg_type == 'GPR': optmp = 'r' + str(instr_form[i].size) - elif(isinstance(instr_form[i], MemAddr)): + elif isinstance(instr_form[i], MemAddr): optmp = 'mem' else: optmp = str(instr_form[i]).lower() diff --git a/osaca/get_instr.py b/osaca/get_instr.py index b083902..0334e21 100755 --- a/osaca/get_instr.py +++ b/osaca/get_instr.py @@ -29,23 +29,20 @@ class InstrExtractor(object): self.extract_instr(self.filepaths[i]) def is_elffile(self, filepath): - if(os.path.isfile(filepath)): + if os.path.isfile(filepath): with open(filepath) as f: src = f.read() - if('format elf64' in src): + if 'format elf64' in src: return True return False def extract_instr(self, asm_file): # Check if parameter is in the correct file format - if(not self.is_elffile(asm_file)): + if not self.is_elffile(asm_file): print('Invalid argument') return # Open file - try: - f = open(asm_file, 'r') - except IOError: - print('IOError: File not found') + f = open(asm_file, 'r') # Analyse code line by line and check the instructions self.lncnt = 1 for line in f: @@ -55,34 +52,34 @@ class InstrExtractor(object): def check_line(self, line): # Check if MARKER is in line and count the number of whitespaces if so - if(self.MARKER in line): + if self.MARKER in line: # But first, check if high level code ist indented with whitespaces or tabs - if(self.first): + if self.first: self.set_counter_char(line) self.first = False self.numSeps = (re.split(self.MARKER, line)[0]).count(self.cntChar) self.sem = 2 - elif(self.sem > 0): + elif self.sem > 0: # We're in the marked code snipped # Check if the line is ASM code and - if not - check if we're still in the loop match = re.search(self.ASM_LINE, line) - if(match): + if match: # Further analysis of instructions # Check if there are commetns in line - if(r'//' in line): + if r'//' in line: return self.check_instr(''.join(re.split(r'\t', line)[-1:])) - elif((re.split(r'\S', line)[0]).count(self.cntChar) <= self.numSeps): + elif (re.split(r'\S', line)[0]).count(self.cntChar) <= self.numSeps: # Not in the loop anymore - or yet - so we decrement the semaphore - self.sem = self.sem-1 + self.sem = self.sem - 1 # Check if seperator is either tabulator or whitespace def set_counter_char(self, line): num_spaces = (re.split(self.MARKER, line)[0]).count(' ') num_tabs = (re.split(self.MARKER, line)[0]).count('\t') - if(num_spaces != 0 and num_tabs == 0): + if num_spaces != 0 and num_tabs == 0: self.cntChar = ' ' - elif(num_spaces == 0 and num_tabs != 0): + elif num_spaces == 0 and num_tabs != 0: self.cntChar = '\t' else: err_msg = 'Indentation of code is only supported for whitespaces and tabs.' @@ -90,14 +87,14 @@ class InstrExtractor(object): def check_instr(self, instr): # Check for strange clang padding bytes - while(instr.startswith('data32')): + while instr.startswith('data32'): instr = instr[7:] # Seperate mnemonic and operands mnemonic = instr.split()[0] params = ''.join(instr.split()[1:]) # Check if line is not only a byte empty_byte = re.compile(r'[0-9a-f]{2}') - if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2): + if re.match(empty_byte, mnemonic) and len(mnemonic) == 2: return # Check if there's one or more operand and store all in a list param_list = self.flatten(self.separate_params(params)) @@ -106,59 +103,59 @@ class InstrExtractor(object): # LABEL (LBL) for i in range(len(param_list)): op = param_list[i] - if(len(op) <= 0): + if len(op) <= 0: op = Parameter('NONE') - elif(op[0] == '$'): + elif op[0] == '$': op = Parameter('IMD') - elif(op[0] == '%' and '(' not in op): + elif op[0] == '%' and '(' not in op: j = len(op) opmask = False - if('{' in op): + if '{' in op: j = op.index('{') opmask = True op = Register(op[1:j], opmask) - elif('<' in op): + elif '<' in op: op = Parameter('LBL') else: op = MemAddr(op) - param_list[i] = str(op) if (type(op) is not Register) else str(op)+str(op.size) + param_list[i] = str(op) if (type(op) is not Register) else str(op) + str(op.size) op_list[i] = op # Join mnemonic and operand(s) to an instruction form - if(len(mnemonic) > 7): + if len(mnemonic) > 7: tabs = '\t' else: tabs = '\t\t' - instr_form = mnemonic+tabs+(' '.join(param_list)) + instr_form = mnemonic + tabs + (' '.join(param_list)) # Check in data file for instruction form and increment the counter - if(instr_form in self.db): - self.db[instr_form] = self.db[instr_form]+1 + if instr_form in self.db: + self.db[instr_form] = self.db[instr_form] + 1 else: - self. db[instr_form] = 1 + self.db[instr_form] = 1 # Create testcase for instruction form, since it is the first appearance of it # Only create benchmark if no label (LBL) is part of the operands do_bench = True for par in op_list: - if(str(par) == 'LBL' or str(par) == ''): + if str(par) == 'LBL' or str(par) == '': do_bench = False - if(do_bench): + if do_bench: # Create testcase with reversed param list, due to the fact its intel syntax! tc = Testcase(mnemonic, list(reversed(op_list)), '64') tc.write_testcase() def separate_params(self, params): param_list = [params] - if(',' in params): - if(')' in params): - if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','): - i = params.index(')')+1 - elif(params.index('(') < params.index(',')): + if ',' in params: + if ')' in params: + if params.index(')') < len(params) - 1 and params[params.index(')') + 1] == ',': + i = params.index(')') + 1 + elif params.index('(') < params.index(','): return param_list else: i = params.index(',') else: i = params.index(',') - param_list = [params[:i], self.separate_params(params[i+1:])] - elif('#' in params): + param_list = [params[:i], self.separate_params(params[i + 1:])] + elif '#' in params: i = params.index('#') param_list = [params[:i]] return param_list @@ -172,14 +169,14 @@ class InstrExtractor(object): print('Number of\tmnemonic') print('calls\n') for i in range(len(self.sorted_db)): - print(str(self.sorted_db[i][1])+'\t\t'+self.sorted_db[i][0]) + print(str(self.sorted_db[i][1]) + '\t\t' + self.sorted_db[i][0]) total += self.sorted_db[i][1] - print('\nCumulated number of instructions: '+str(total)) + print('\nCumulated number of instructions: ' + str(total)) def save_db(self): file = open('.cnt_asm_ops.db', 'w') for i in self.db.items(): - file.write(i[0]+'\t'+str(i[1])+'\n') + file.write(i[0] + '\t' + str(i[1]) + '\n') file.close() def load_db(self): @@ -191,7 +188,7 @@ class InstrExtractor(object): for line in file: mnemonic = line.split('\t')[0] # Join mnemonic and operand(s) to an instruction form - if(len(mnemonic) > 7): + if len(mnemonic) > 7: tabs = '\t' params = line.split('\t')[1] num_calls = line.split('\t')[2][:-1] @@ -199,41 +196,43 @@ class InstrExtractor(object): tabs = '\t\t' params = line.split('\t')[2] num_calls = line.split('\t')[3][:-1] - instr_form = mnemonic+tabs+params + instr_form = mnemonic + tabs + params self.db[instr_form] = int(num_calls) file.close() def flatten(self, l): - if l == []: + if not l: return l - if(isinstance(l[0], list)): + if isinstance(l[0], list): return self.flatten(l[0]) + self.flatten(l[1:]) return l[:1] + self.flatten(l[1:]) def main(): # Parse args - parser = argparse.ArgumentParser(description='Returns a list of all instruction forms in the' - + 'given files sorted by their number of occurences.') + parser = argparse.ArgumentParser(description='Returns a list of all instruction forms in the ' + 'given files sorted by their number of ' + 'occurrences.') parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.2') parser.add_argument('filepath', nargs='+', help='path to objdump(s)') - parser.add_argument('-l', '--load', dest='load', action='store_true', help='load data file' - + ' before checking new files') - parser.add_argument('-s', '--store', dest='store', action='store_true', help='store data file ' - + 'before checking new files') + parser.add_argument('-l', '--load', dest='load', action='store_true', + help='load data file before checking new files') + parser.add_argument('-s', '--store', dest='store', action='store_true', + help='store data file before checking new files') # Create object and store arguments as attribute inp = parser.parse_args() ie = InstrExtractor(inp.filepath) # Do work - if(inp.load): + if inp.load: ie.load_db() ie.check_all() ie.print_sorted_db() - if(inp.store): + if inp.store: ie.save_db() + # ---------main method---------- if __name__ == '__main__': main() diff --git a/osaca/osaca.py b/osaca/osaca.py index 857c34d..2e38eda 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -16,9 +16,8 @@ from osaca.eu_sched import Scheduler from osaca.testcase import Testcase -class Osaca(object): +class OSACA(object): arch = None - filepath = None srcCode = None df = None instr_forms = None @@ -37,23 +36,23 @@ class Osaca(object): ASM_LINE = re.compile(r'\s[0-9a-f]+[:]') # Matches every variation of the IACA start marker IACA_SM = re.compile(r'\s*movl[ \t]+\$111[ \t]*,[ \t]*%ebx.*\n\s*\.byte[ \t]+100.*' - + r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte' - + r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') + r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte' + r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') # Matches every variation of the IACA end marker IACA_EM = re.compile(r'\s*movl[ \t]+\$222[ \t]*,[ \t]*%ebx.*\n\s*\.byte[ \t]+100.*' - + r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte' - + r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') + r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte' + r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') - def __init__(self, _arch, _filepath, output=sys.stdout): + def __init__(self, _arch, file_path, output=sys.stdout): self.arch = _arch - self.filepath = _filepath + self.file_path = file_path self.instr_forms = [] self.file_output = output # Check if data files are already in usr dir, otherwise create them - if(not os.path.isdir(self.osaca_dir + 'data')): + if not os.path.isdir(self.osaca_dir + 'data'): print('Copying files in user directory...', file=self.file_output, end='') subprocess.call(['mkdir', '-p', self.osaca_dir]) - subprocess.call(['cp', '-r', + subprocess.call(['cp', '-r', '/'.join(os.path.realpath(__file__).split('/')[:-1]) + '/data', self.osaca_dir]) print('Done!', file=self.file_output) @@ -61,82 +60,80 @@ class Osaca(object): # -----------------main functions depending on arguments-------------------- def include_ibench(self): """ - Reads ibench output and includes it in the architecture specific csv - file. + Read ibench output and include it in the architecture specific csv file. """ # Check args and exit program if something's wrong - if(not self.check_arch()): + if not self.check_arch(): print('Invalid microarchitecture.', file=sys.stderr) sys.exit(1) - if(not self.check_file()): + if not self.check_file(): print('Invalid file path or file format.', file=sys.stderr) sys.exit(1) # Check for database for the chosen architecture self.df = self.read_csv() - # Create sequence of numbers and their reciprokals for validate the measurements + # Create sequence of numbers and their reciprocals for validate the measurements cyc_list, reci_list = self.create_sequences() print('Everything seems fine! Let\'s start!', file=self.file_output) new_data = [] added_vals = 0 for line in self.srcCode: - if('Using frequency' in line or len(line) == 0): + if 'Using frequency' in line or len(line) == 0: continue - clmn = 'LT' + column = 'LT' instr = line.split()[0][:-1] - if('TP' in line): + if 'TP' in line: # We found a command with a throughput value. Get instruction and the number of # clock cycles and remove the '-TP' suffix. - clmn = 'TP' + column = 'TP' instr = instr[:-3] # Otherwise it is a latency value. Nothing to do. - clk_cyc = line.split()[1] + clk_cyc = float(line.split()[1]) clk_cyc_tmp = clk_cyc - clk_cyc = self.validate_val(clk_cyc, instr, True if (clmn == 'TP') else False, + clk_cyc = self.validate_val(clk_cyc, instr, True if (column == 'TP') else False, cyc_list, reci_list) - txt_output = True if (clk_cyc_tmp == clk_cyc) else False + txt_output = (clk_cyc_tmp == clk_cyc) val = -2 new = False try: - entry = self.df.loc[lambda df, inst=instr: df.instr == inst, clmn] + entry = self.df.loc[lambda df, inst=instr: df.instr == inst, column] val = entry.values[0] + # If val is -1 (= not filled with a valid value) add it immediately + if val == -1: + self.df.set_value(entry.index[0], column, clk_cyc) + added_vals += 1 + continue except IndexError: # Instruction not in database yet --> add it new = True # First check if LT or TP value has already been added before for i, item in enumerate(new_data): - if(instr in item): - if(clmn == 'TP'): + if instr in item: + if column == 'TP': new_data[i][1] = clk_cyc - elif(clmn == 'LT'): + elif column == 'LT': new_data[i][2] = clk_cyc new = False break - if(new and clmn == 'TP'): + if new and column == 'TP': new_data.append([instr, clk_cyc, '-1', (-1,)]) - elif(new and clmn == 'LT'): + elif new and column == 'LT': new_data.append([instr, '-1', clk_cyc, (-1,)]) new = True added_vals += 1 - # If val is -1 (= not filled with a valid value) add it immediately - if(val == -1): - self.df.set_value(entry.index[0], clmn, clk_cyc) - added_vals += 1 - continue - if(not new and abs((val/np.float64(clk_cyc))-1) > 0.05): - print('Different measurement for {} ({}): {}(old) vs. '.format(instr, clmn, val) + if not new and abs((val / np.float64(clk_cyc)) - 1) > 0.05: + print('Different measurement for {} ({}): {}(old) vs. '.format(instr, column, val) + '{}(new)\nPlease check for correctness '.format(clk_cyc) + '(no changes were made).', file=self.file_output) txt_output = True - if(txt_output): + if txt_output: print('', file=self.file_output) - txt_output = False # Now merge the DataFrames and write new csv file self.df = self.df.append(pd.DataFrame(new_data, columns=['instr', 'TP', 'LT', 'ports']), ignore_index=True) csv = self.df.to_csv(index=False) self.write_csv(csv) - print('ibench output {} '.format(self.filepath.split('/')[-1]) - + 'successfully in data file included.', file=self.file_output) + print('ibench output {}'.format(self.file_path.split('/')[-1]), + 'successfully in data file included.', file=self.file_output) print('{} values were added.'.format(added_vals), file=self.file_output) def inspect_binary(self): @@ -144,10 +141,10 @@ class Osaca(object): Main function of OSACA. Inspect binary file and create analysis. """ # Check args and exit program if something's wrong - if(not self.check_arch()): + if not self.check_arch(): print('Invalid microarchitecture.', file=sys.stderr) sys.exit(1) - if(not self.check_elffile()): + if not self.check_elffile(): print('Invalid file path or file format. Not an ELF file.', file=sys.stderr) sys.exit(1) # Finally check for database for the chosen architecture @@ -155,7 +152,7 @@ class Osaca(object): print('Everything seems fine! Let\'s start checking!', file=self.file_output) for i, line in enumerate(self.srcCode): - if(i == 0): + if i == 0: self.check_line(line, True) else: self.check_line(line) @@ -168,21 +165,21 @@ class Osaca(object): Inspect binary file and create analysis. """ # Check args and exit program if something's wrong - if(not self.check_arch()): + if not self.check_arch(): print('Invalid microarchitecture.', file=sys.stderr) sys.exit() # Check if input file is a binary or assembly file binary_file = True - if(not self.check_elffile()): + if not self.check_elffile(): binary_file = False - if(not self.check_file(True)): + if not self.check_file(True): print('Invalid file path or file format.', file=sys.stderr) sys.exit(1) # Finally check for database for the chosen architecture self.df = self.read_csv() print('Everything seems fine! Let\'s start checking!', file=self.file_output) - if(binary_file): + if binary_file: self.iaca_bin() else: self.iaca_asm() @@ -203,7 +200,7 @@ class Osaca(object): """ arch_list = ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'ZEN'] - if(self.arch in arch_list): + if self.arch in arch_list: return True else: return False @@ -220,12 +217,12 @@ class Osaca(object): False if file does not exist or is not an elf64 file """ - if(os.path.isfile(self.filepath)): + if os.path.isfile(self.file_path): self.store_src_code_elf() try: - if('file format elf64' in self.srcCode[1].lower()): + if 'file format elf64' in self.srcCode[1].lower(): return True - except(IndexError): + except IndexError: return False return False @@ -247,7 +244,7 @@ class Osaca(object): False if file does not exist """ - if(os.path.isfile(self.filepath)): + if os.path.isfile(self.file_path): self.store_src_code(iaca_flag) return True return False @@ -257,8 +254,8 @@ class Osaca(object): Load binary file compiled with '-g' in class attribute srcCode and separate by line. """ - self.srcCode = (subprocess.run(['objdump', '--source', self.filepath], - stdout=subprocess.PIPE, + self.srcCode = (subprocess.run(['objdump', '--source', self.file_path], + stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n')) def store_src_code(self, iaca_flag=False): @@ -271,52 +268,45 @@ class Osaca(object): store file data as a string in attribute srcCode if True, store it as a list of strings (lines) if False (default False) """ - try: - f = open(self.filepath, 'r') - except IOError: - print('IOError: file \'{}\' not found'.format(self.filepath), file=self.file_output) + f = open(self.file_path, 'r') self.srcCode = '' for line in f: self.srcCode += line f.close() - if(iaca_flag): + if iaca_flag: return self.srcCode = self.srcCode.split('\n') def read_csv(self): """ - Reads architecture dependent CSV from data directory. + Read architecture dependent CSV from data directory. Returns ------- DataFrame CSV as DataFrame object """ - #curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - df = pd.read_csv(self.osaca_dir+'data/'+self.arch.lower()+'_data.csv') + # curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) + df = pd.read_csv(self.osaca_dir + 'data/' + self.arch.lower() + '_data.csv') return df def write_csv(self, csv): """ - Writes architecture dependent CSV into data directory. + Write architecture dependent CSV into data directory. Parameters ---------- csv : str CSV data as string """ - #curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - try: - f = open(self.osaca_dir+'data/'+self.arch.lower()+'_data.csv', 'w') - except IOError: - print('IOError: file \'{}\' not found in $HOME/.osaca/data'.format(self.arch.lower() - + '_data.csv'), file=self.file_output) + # curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) + f = open(self.osaca_dir + 'data/' + self.arch.lower() + '_data.csv', 'w') f.write(csv) f.close() def create_sequences(self, end=101): """ - Creates list of integers from 1 to end and list of their reciprocals. + Create list of integers from 1 to end and list of their reciprocals. Parameters ---------- @@ -334,7 +324,7 @@ class Osaca(object): reci_list = [] for i in range(1, end): cyc_list.append(i) - reci_list.append(1/i) + reci_list.append(1 / i) return cyc_list, reci_list def validate_val(self, clk_cyc, instr, is_tp, cyc_list, reci_list): @@ -365,21 +355,20 @@ class Osaca(object): Clock cycle, either rounded to an integer or its reciprocal or the given clk_cyc parameter """ - clmn = 'LT' - if(is_tp): - clmn = 'TP' + column = 'LT' + if is_tp: + column = 'TP' for i in range(0, len(cyc_list)): - if(cyc_list[i]*1.05 > float(clk_cyc) and cyc_list[i]*0.95 < float(clk_cyc)): + if cyc_list[i] * 1.05 > float(clk_cyc) > cyc_list[i] * 0.95: # Value is probably correct, so round it to the estimated value return cyc_list[i] # Check reciprocal only if it is a throughput value - elif(is_tp and reci_list[i]*1.05 > float(clk_cyc) - and reci_list[i]*0.95 < float(clk_cyc)): + elif is_tp and reci_list[i] * 1.05 > float(clk_cyc) > reci_list[i] * 0.95: # Value is probably correct, so round it to the estimated value return reci_list[i] # No value close to an integer or its reciprocal found, we assume the # measurement is incorrect - print('Your measurement for {} ({}) is probably wrong. '.format(instr, clmn) + print('Your measurement for {} ({}) is probably wrong. '.format(instr, column) + 'Please inspect your benchmark!', file=self.file_output) print('The program will continue with the given value', file=self.file_output) return clk_cyc @@ -396,26 +385,26 @@ class Osaca(object): Necessary for setting indenting character (default False) """ # Check if marker is in line - if(self.marker in line): + if self.marker in line: # First, check if high level code in indented with whitespaces or tabs - if(first_appearance): + if first_appearance: self.indentChar = self.get_indent_chars(line) # Now count the number of whitespaces self.numSeps = (re.split(self.marker, line)[0]).count(self.indentChar) self.sem = 3 - elif(self.sem > 0): + elif self.sem > 0: # We're in the marked code snippet # Check if the line is ASM code and - if not - check if we're still in the loop match = re.search(self.ASM_LINE, line) - if(match): + if match: # Further analysis of instructions # Check if there are comments in line - if(r'//' in line): + if r'//' in line: return self.check_instr(''.join(re.split(r'\t', line)[-1:])) - elif((re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps): + elif (re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps: # Not in the loop anymore - or yet. We decrement the semaphore - self.sem = self.sem-1 + self.sem = self.sem - 1 def get_indent_chars(self, line): """ @@ -433,9 +422,9 @@ class Osaca(object): """ num_spaces = (re.split(self.marker, line)[0]).count(' ') num_tabs = (re.split(self.marker, line)[0]).count('\t') - if(num_spaces != 0 and num_tabs == 0): + if num_spaces != 0 and num_tabs == 0: return ' ' - elif(num_spaces == 0 and num_tabs != 0): + elif num_spaces == 0 and num_tabs != 0: return '\t' else: err_msg = 'Indentation of code is only supported for whitespaces and tabs.' @@ -448,31 +437,29 @@ class Osaca(object): self.marker = r'fs addr32 nop' part1 = re.compile(r'64\s+fs') part2 = re.compile(r'67 90\s+addr32 nop') - is_2_lines = False for line in self.srcCode: # Check if marker is in line - if(self.marker in line): + if self.marker in line: self.sem += 1 - elif(re.search(part1, line) or re.search(part2, line)): + elif re.search(part1, line) or re.search(part2, line): self.sem += 0.5 - is_2_lines = True - elif(self.sem == 1): + elif self.sem == 1: # We're in the marked code snippet # Check if the line is ASM code match = re.search(self.ASM_LINE, line) - if(match): + if match: # Further analysis of instructions # Check if there are comments in line - if(r'//' in line): + if r'//' in line: continue # Do the same instruction check as for the OSACA marker line check self.check_instr(''.join(re.split(r'\t', line)[-1:])) - elif(self.sem == 2): + elif self.sem == 2: # Not in the loop anymore. Due to the fact it's the IACA marker we can stop here # After removing the last line which belongs to the IACA marker del self.instr_forms[-1:] - #if(is_2_lines): - # The marker is splitted into two lines, therefore delete another line + # if(is_2_lines): + # The marker is splitted into two lines, therefore delete another line # del self.instr_forms[-1:] return @@ -484,15 +471,15 @@ class Osaca(object): code = self.srcCode # Search for the start marker match = re.match(self.IACA_SM, code) - while(not match): + while not match: code = code.split('\n', 1)[1] match = re.match(self.IACA_SM, code) # Search for the end marker code = (code.split('144', 1)[1]).split('\n', 1)[1] res = '' match = re.match(self.IACA_EM, code) - while(not match): - res += code.split('\n', 1)[0]+'\n' + while not match: + res += code.split('\n', 1)[0] + '\n' code = code.split('\n', 1)[1] match = re.match(self.IACA_EM, code) # Split the result by line go on like with OSACA markers @@ -500,7 +487,7 @@ class Osaca(object): for line in res: line = line.split('#')[0] line = line.lstrip() - if(len(line) == 0 or '//' in line or line.startswith('..')): + if len(line) == 0 or '//' in line or line.startswith('..'): continue self.check_instr(line) @@ -515,14 +502,14 @@ class Osaca(object): Instruction as string """ # Check for strange clang padding bytes - while(instr.startswith('data32')): + while instr.startswith('data32'): instr = instr[7:] # Separate mnemonic and operands mnemonic = instr.split()[0] params = ''.join(instr.split()[1:]) # Check if line is not only a byte empty_byte = re.compile(r'[0-9a-f]{2}') - if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2): + if re.match(empty_byte, mnemonic) and len(mnemonic) == 2: return # Check if there's one or more operands and store all in a list param_list = self.flatten(self.separate_params(params)) @@ -531,39 +518,39 @@ class Osaca(object): # MEMORY (MEM) or LABEL(LBL) for i in range(len(param_list)): op = param_list[i] - if(len(op) <= 0): + if len(op) <= 0: op = Parameter('NONE') - elif(op[0] == '$'): + elif op[0] == '$': op = Parameter('IMD') - elif(op[0] == '%' and '(' not in op): + elif op[0] == '%' and '(' not in op: j = len(op) opmask = False - if('{' in op): + if '{' in op: j = op.index('{') opmask = True op = Register(op[1:j], opmask) - elif('<' in op or op.startswith('.')): + elif '<' in op or op.startswith('.'): op = Parameter('LBL') else: - op = MemAddr(op) + op = MemAddr(op, ) param_list[i] = str(op) param_list_types[i] = op # Add to list instr = instr.rstrip() - if(len(instr) > self.longestInstr): + if len(instr) > self.longestInstr: self.longestInstr = len(instr) - instr_form = [mnemonic]+list(reversed(param_list_types))+[instr] + instr_form = [mnemonic] + list(reversed(param_list_types)) + [instr] self.instr_forms.append(instr_form) # If flag is set, create testcase for instruction form # Do this in reversed param list order, du to the fact it's intel syntax # Only create benchmark if no label (LBL) is part of the operands - if('LBL' in param_list or '' in param_list): + if 'LBL' in param_list or '' in param_list: return tc = Testcase(mnemonic, list(reversed(param_list_types)), '32') # Only write a testcase if it not already exists or already in data file writeTP, writeLT = tc.is_in_dir() inDB = len(self.df.loc[lambda df: df.instr == tc.get_entryname()]) - if(inDB == 0): + if inDB == 0: tc.write_testcase(not writeTP, not writeLT) def separate_params(self, params): @@ -582,18 +569,18 @@ class Osaca(object): number of parametes given. """ param_list = [params] - if(',' in params): - if(')' in params): - if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','): - i = params.index(')')+1 - elif(params.index('(') < params.index(',')): + if ',' in params: + if ')' in params: + if params.index(')') < len(params) - 1 and params[params.index(')') + 1] == ',': + i = params.index(')') + 1 + elif params.index('(') < params.index(','): return param_list else: i = params.index(',') else: i = params.index(',') - param_list = [params[:i], self.separate_params(params[i+1:])] - elif('#' in params): + param_list = [params[:i], self.separate_params(params[i + 1:])] + elif '#' in params: i = params.index('#') param_list = [params[:i]] return param_list @@ -612,9 +599,9 @@ class Osaca(object): [str] List of strings """ - if l == []: + if not l: return l - if(isinstance(l[0], list)): + if isinstance(l[0], list): return self.flatten(l[0]) + self.flatten(l[1:]) return l[:1] + self.flatten(l[1:]) @@ -637,22 +624,22 @@ class Osaca(object): OSACA output """ # Check the output alignment depending on the longest instruction - if(self.longestInstr > 70): + if self.longestInstr > 70: self.longestInstr = 70 horiz_line = self.create_horiz_sep() # Write general information about the benchmark - output = ('--' + horiz_line + '\n' - + '| Analyzing of file:\t' + os.path.abspath(self.filepath) + '\n' - + '| Architecture:\t\t' + self.arch + '\n' - + '| Timestamp:\t\t' + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n') - if(tp_list): + output = '--{}\n| Analyzing of file:\t{}| Architecture:\t\t{}\n| Timestamp:\t\t{}\n'.format( + horiz_line, os.path.abspath(self.file_path), self.arch, + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + ) + if tp_list: output += self.create_tp_list(horiz_line) - if(pr_sched): + if pr_sched: output += '\n\n' - sched = Scheduler(self.arch, self.instr_forms) - sched_output, port_binding = sched.new_schedule() - binding = sched.get_port_binding(port_binding) - output += sched.get_report_info() + '\n' + binding + '\n\n' + sched_output + schedule = Scheduler(self.arch, self.instr_forms) + schedule_output, port_binding = schedule.new_schedule() + binding = schedule.get_port_binding(port_binding) + output += schedule.get_report_info() + '\n' + binding + '\n\n' + schedule_output block_tp = round(max(port_binding), 2) output += 'Total number of estimated throughput: ' + str(block_tp) return output @@ -666,7 +653,7 @@ class Osaca(object): str Horizontal separator line """ - return '-'*(self.longestInstr+8) + return '-' * (self.longestInstr + 8) def create_tp_list(self, horiz_line): """ @@ -683,18 +670,16 @@ class Osaca(object): Throughput list output for printing """ warning = False - ws = ' '*(len(horiz_line)-23) + ws = ' ' * (len(horiz_line) - 23) - output = ('\n| INSTRUCTION' + ws + 'CLOCK CYCLES\n' - + '| ' + horiz_line + '\n|\n') + output = '\n| INSTRUCTION{}CLOCK CYCLES\n| {}\n|\n'.format(ws, horiz_line) # Check for the throughput data in CSV for elem in self.instr_forms: op_ext = [] - for i in range(1, len(elem)-1): - optmp = '' - if(isinstance(elem[i], Register) and elem[i].reg_type == 'GPR'): - optmp = 'r'+str(elem[i].size) - elif(isinstance(elem[i], MemAddr)): + for i in range(1, len(elem) - 1): + if isinstance(elem[i], Register) and elem[i].reg_type == 'GPR': + optmp = 'r' + str(elem[i].size) + elif isinstance(elem[i], MemAddr): optmp = 'mem' else: optmp = str(elem[i]).lower() @@ -705,7 +690,7 @@ class Osaca(object): import warnings warnings.filterwarnings("ignore", 'This pattern has match groups') series = self.df['instr'].str.contains(elem[0] + '-' + operands) - if(True in series.values): + if True in series.values: # It's a match! not_found = False try: @@ -726,44 +711,43 @@ class Osaca(object): op_ext_regs.append(True) except KeyError: op_ext_regs.append(False) - if(True not in op_ext_regs): + if True not in op_ext_regs: # No register in whole instr form. How can I find out what regsize we need? print('Feature not included yet: ', end='', file=self.file_output) - print(elem[0]+' for '+operands, file=self.file_output) + print(elem[0] + ' for ' + operands, file=self.file_output) tp = 0 - not_found = True warning = True - num_whitespaces = self.longestInstr-len(elem[-1]) + num_whitespaces = self.longestInstr - len(elem[-1]) ws = ' ' * num_whitespaces + '| ' n_f = ' ' * (5 - len(str(tp))) + '*' data = '| ' + elem[-1] + ws + str(tp) + n_f + '\n' output += data continue - if(op_ext_regs[0] is False): + if op_ext_regs[0] is False: # Instruction stores result in memory. Check for storing in register instead. - if(len(op_ext) > 1): - if(op_ext_regs[1] is True): + if len(op_ext) > 1: + if op_ext_regs[1] is True: op_ext[0] = op_ext[1] - elif(len(op_ext > 2)): - if(op_ext_regs[2] is True): + elif len(op_ext) > 2: + if op_ext_regs[2] is True: op_ext[0] = op_ext[2] - if(len(op_ext_regs) == 2 and op_ext_regs[1] is False): + if len(op_ext_regs) == 2 and op_ext_regs[1] is False: # Instruction loads value from memory and has only two operands. Check for # loading from register instead - if(op_ext_regs[0] is True): + if op_ext_regs[0] is True: op_ext[1] = op_ext[0] - if(len(op_ext_regs) == 3 and op_ext_regs[2] is False): + if len(op_ext_regs) == 3 and op_ext_regs[2] is False: # Instruction loads value from memory and has three operands. Check for loading # from register instead op_ext[2] = op_ext[0] operands = '_'.join(op_ext) # Check for register equivalent instruction - series = self.df['instr'].str.contains(elem[0]+'-'+operands) - if(True in series.values): + series = self.df['instr'].str.contains(elem[0] + '-' + operands) + if True in series.values: # It's a match! not_found = False try: - tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0] + tp = self.df[self.df.instr == elem[0] + '-' + operands].TP.values[0] except IndexError: # Something went wrong print('Error while fetching data from data file', file=self.file_output) @@ -778,19 +762,16 @@ class Osaca(object): num_whitespaces = self.longestInstr - len(elem[-1]) ws = ' ' * num_whitespaces + '| ' n_f = '' - if(not_found): + if not_found: n_f = ' ' * (5 - len(str(tp))) + '*' data = '| ' + elem[-1] + ws + '{:3.2f}'.format(tp) + n_f + '\n' output += data # Finally end the list of throughput values - num_whitespaces = self.longestInstr - 27 - ws = ' ' + ' ' * num_whitespaces output += '| ' + horiz_line + '\n' - if(warning): - output += ('\n\n* There was no throughput value found ' - 'for the specific instruction form.' - '\n Please create a testcase via the create_testcase-method ' - 'or add a value manually.') + if warning: + output += ('\n\n* There was no throughput value found for the specific instruction ' + 'form.\n Please create a testcase via the create_testcase-method or add a ' + 'value manually.') return output @@ -798,8 +779,8 @@ class Osaca(object): # Stolen from pip def __read(*names, **kwargs): with io.open( - os.path.join(os.path.dirname(__file__), *names), - encoding=kwargs.get("encoding", "utf8") + os.path.join(os.path.dirname(__file__), *names), + encoding=kwargs.get("encoding", "utf8") ) as fp: return fp.read() @@ -817,10 +798,10 @@ def __find_version(*file_paths): def main(): # Parse args parser = argparse.ArgumentParser(description='Analyzes a marked innermost loop snippet' - + 'for a given architecture type and prints out the estimated' - + 'average throughput.') - parser.add_argument('-V', '--version', action='version', version='%(prog)s ' - + __find_version('__init__.py')) + 'for a given architecture type and prints out the ' + 'estimated average throughput.') + parser.add_argument('-V', '--version', action='version', + version='%(prog)s ' + __find_version('__init__.py')) parser.add_argument('--arch', dest='arch', type=str, help='define architecture ' + '(SNB, IVB, HSW, BDW, SKL, ZEN)') parser.add_argument('--tp-list', dest='tp_list', action='store_true', @@ -833,49 +814,47 @@ def main(): help='search for IACA markers instead the OSACA marker') group.add_argument('-m', '--insert-marker', dest='insert_marker', action='store_true', help='try to find blocks probably corresponding to loops in assembly and' - + 'insert IACA marker') + + 'insert IACA marker') parser.add_argument('filepath', type=str, help='path to object (Binary, ASM, CSV)') # Store args in global variables inp = parser.parse_args() - if(inp.arch is None and inp.insert_marker is None): + if inp.arch is None and inp.insert_marker is None: raise ValueError('Please specify an architecture.', file=sys.stderr) - if(inp.arch is not None): - arch = inp.arch.upper() + arch = inp.arch.upper() filepath = inp.filepath incl_ibench = inp.incl iaca_flag = inp.iaca insert_m = inp.insert_marker # Create Osaca object - if(inp.arch is not None): - osaca = Osaca(arch, filepath) - if(inp.tp_list): + osaca = OSACA(arch, filepath) + if inp.tp_list: osaca.tp_list = True - if(incl_ibench): + if incl_ibench: try: osaca.include_ibench() except UnboundLocalError: print('Please specify an architecture.', file=sys.stderr) - elif(iaca_flag): + elif iaca_flag: try: osaca.inspect_with_iaca() except UnboundLocalError: print('Please specify an architecture.', file=sys.stderr) - elif(insert_m): + elif insert_m: try: from kerncraft import iaca except ImportError: - print('ImportError: Module kerncraft not installed. Use ' - + '\'pip install --user kerncraft\' for installation.\nFor more information see ' - + 'https://github.com/RRZE-HPC/kerncraft', file=sys.stderr) + print("ImportError: Module kerncraft not installed. Use 'pip install --user " + "kerncraft' for installation.\nFor more information see " + "https://github.com/RRZE-HPC/kerncraft", file=sys.stderr) sys.exit(1) # Change due to newer kerncraft version (hopefully temporary) - #iaca.iaca_instrumentation(input_file=filepath, output_file=filepath, + # iaca.iaca_instrumentation(input_file=filepath, output_file=filepath, # block_selection='manual', pointer_increment=1) with open(filepath, 'r') as f_in, open(filepath[:-2] + '-iaca.s', 'w') as f_out: - iaca.iaca_instrumentation(input_file=f_in, output_file=f_out, + iaca.iaca_instrumentation(input_file=f_in, output_file=f_out, block_selection='manual', pointer_increment=1) else: osaca.inspect_binary() diff --git a/osaca/param.py b/osaca/param.py index d9f15a6..61cd9c0 100755 --- a/osaca/param.py +++ b/osaca/param.py @@ -4,12 +4,12 @@ class Parameter(object): def __init__(self, ptype): self.ptype = ptype.upper() - if(self.ptype not in self.type_list): + if self.ptype not in self.type_list: raise NameError('Type not supported: '+ptype) def __str__(self): - """returns string representation""" - if(self.ptype == 'NONE'): + """Return string representation.""" + if self.ptype == 'NONE': return '' else: return self.ptype @@ -20,27 +20,28 @@ class MemAddr(Parameter): scales = [1, 2, 4, 8] def __init__(self, name): + super().__init__("MEM") self.sreg = False self.offset = False self.base = False self.index = False self.scale = False - if(':' in name): - if(name[1:name.index(':')].upper() not in self.segment_regs): + if ':' in name: + if name[1:name.index(':')].upper() not in self.segment_regs: raise NameError('Type not supported: '+name) self.sreg = True self.offset = True - if('(' not in name or ('(' in name and name.index('(') != 0)): + if '(' not in name or ('(' in name and name.index('(') != 0): self.offset = True - if('(' in name): + if '(' in name: self.parentheses = name[name.index('(')+1:-1] self.commacnt = self.parentheses.count(',') - if(self.commacnt == 0): + if self.commacnt == 0: self.base = True - elif(self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1): + elif self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1: self.base = True self.index = True - elif(self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales): + elif self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales: self.base = True self.index = True self.scale = True @@ -50,13 +51,13 @@ class MemAddr(Parameter): def __str__(self): """returns string representation""" mem_format = 'MEM(' - if(self.sreg): + if self.sreg: mem_format += 'sreg:' - if(self.offset): + if self.offset: mem_format += 'offset' - if(self.base and not self.index): + if self.base and not self.index: mem_format += '(base)' - elif(self.base and self.index and self.scale): + elif self.base and self.index and self.scale: mem_format += '(base, index, scale)' mem_format += ')' return mem_format @@ -131,17 +132,18 @@ class Register(Parameter): } def __init__(self, name, mask=False): + super().__init__("REG") self.name = name.upper() self.mask = mask - if[name in self.sizes]: + if self.name in self.sizes: self.size = self.sizes[self.name][0] self.reg_type = self.sizes[self.name][1] else: - raise NameError('Register name not in dictionary: '+self.name) + raise NameError('Register name not in dictionary: {}'.format(self.name)) def __str__(self): - """returns string representation""" + """Return string representation.""" opmask = '' - if(self.mask): + if self.mask: opmask = '{opmask}' - return(self.reg_type+opmask) + return self.reg_type + opmask diff --git a/osaca/testcase.py b/osaca/testcase.py index d7ae7b2..2ad6b62 100755 --- a/osaca/testcase.py +++ b/osaca/testcase.py @@ -67,7 +67,8 @@ class Testcase(object): # num_instr must be an even number self.num_instr = str(ceil(int(_num_instr)/2)*2) # Check for the number of operands and initialise the GPRs if necessary - self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = self.__define_operands() + self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = \ + self.__define_operands() self.num_operands = len(self.param_list) # Create asm header @@ -97,7 +98,7 @@ class Testcase(object): (default True) """ osaca_dir = os.path.expanduser('~') + '/.osaca/' - if(lt): + if lt: # Write latency file call(['mkdir', '-p', osaca_dir + 'benchmarks']) f = open(osaca_dir + 'benchmarks/'+self.instr+self.extension+'.S', 'w') @@ -105,7 +106,7 @@ class Testcase(object): + self.zeroGPR + self.copy + self.loop_lat + self.gprPop + self.done) f.write(data) f.close() - if(tp): + if tp: # Write throughput file call(['mkdir', '-p', osaca_dir + 'benchmarks']) f = open(osaca_dir + 'benchmarks/' + self.instr + self.extension @@ -126,53 +127,53 @@ class Testcase(object): String tuple containing types of operands and if needed push/pop operations, the initialisation of general purpose regs and the copy if registers. """ - oprnds = self.param_list + operands = self.param_list op_a, op_b, op_c = ('', '', '') gpr_push, gpr_pop, zero_gpr = ('', '', '') - if(isinstance(oprnds[0], Register)): - op_a = oprnds[0].reg_type.lower() - elif(isinstance(oprnds[0], MemAddr)): + if isinstance(operands[0], Register): + op_a = operands[0].reg_type.lower() + elif isinstance(operands[0], MemAddr): op_a = 'mem' - elif(isinstance(oprnds[0], Parameter) and str(oprnds[0]) == 'IMD'): + elif isinstance(operands[0], Parameter) and str(operands[0]) == 'IMD': op_a = 'imd' - if(op_a == 'gpr'): + if op_a == 'gpr': gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs() - op_a += str(oprnds[0].size) - if(len(oprnds) > 1): - if(isinstance(oprnds[1], Register)): - op_b = oprnds[1].reg_type.lower() - elif(isinstance(oprnds[1], MemAddr)): + op_a += str(operands[0].size) + if len(operands) > 1: + if isinstance(operands[1], Register): + op_b = operands[1].reg_type.lower() + elif isinstance(operands[1], MemAddr): op_b = 'mem' - elif(isinstance(oprnds[1], Parameter) and str(oprnds[1]) == 'IMD'): + elif isinstance(operands[1], Parameter) and str(operands[1]) == 'IMD': op_b = 'imd' - if(op_b == 'gpr'): - op_b += str(oprnds[1].size) - if('gpr' not in op_a): + if op_b == 'gpr': + op_b += str(operands[1].size) + if 'gpr' not in op_a: gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs() - if(len(oprnds) == 3): - if(isinstance(oprnds[2], Register)): - op_c = oprnds[2].reg_type.lower() - elif(isinstance(oprnds[2], MemAddr)): + if len(operands) == 3: + if isinstance(operands[2], Register): + op_c = operands[2].reg_type.lower() + elif isinstance(operands[2], MemAddr): op_c = 'mem' - elif(isinstance(oprnds[2], Parameter) and str(oprnds[2]) == 'IMD'): + elif isinstance(operands[2], Parameter) and str(operands[2]) == 'IMD': op_c = 'imd' - if(op_c == 'gpr'): - op_c += str(oprnds[2].size) - if(('gpr' not in op_a) and ('gpr'not in op_b)): + if op_c == 'gpr': + op_c += str(operands[2].size) + if ('gpr' not in op_a) and ('gpr' not in op_b): gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs() - if(len(oprnds) == 1 and isinstance(oprnds[0], Register)): - copy = self.__copy_regs(oprnds[0]) - elif(len(oprnds) > 1 and isinstance(oprnds[1], Register)): - copy = self.__copy_regs(oprnds[1]) - elif(len(oprnds) > 2 and isinstance(oprnds[2], Register)): - copy = self.__copy_regs(oprnds[1]) + if len(operands) == 1 and isinstance(operands[0], Register): + copy = self.__copy_regs(operands[0]) + elif len(operands) > 1 and isinstance(operands[1], Register): + copy = self.__copy_regs(operands[1]) + elif len(operands) > 2 and isinstance(operands[2], Register): + copy = self.__copy_regs(operands[1]) else: copy = '' - return (op_a, op_b, op_c, gpr_push, gpr_pop, zero_gpr, copy) + return op_a, op_b, op_c, gpr_push, gpr_pop, zero_gpr, copy def __initialise_gprs(self): """ - Initialise eleven general purpose registers and set them to zero. + Initialize eleven general purpose registers and set them to zero. Returns ------- @@ -189,10 +190,10 @@ class Testcase(object): gpr_pop += '\t\tpop {}\n'.format(reg) for reg in self.gprs64: zero_gpr += '\t\txor {}, {}\n'.format(reg, reg) - return (gpr_push, gpr_pop, zero_gpr) + return gpr_push, gpr_pop, zero_gpr -# Copy created values in specific register + # Copy created values in specific register def __copy_regs(self, reg): """ Copy created values in specific register. @@ -208,8 +209,8 @@ class Testcase(object): String containing the copy instructions """ copy = '\t\t# copy DP 1.0\n' -# Different handling for GPR, MMX and SSE/AVX registers - if(reg.reg_type == 'GPR'): + # Different handling for GPR, MMX and SSE/AVX registers + if reg.reg_type == 'GPR': copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][1]) copy += '\t\t# Create DP 2.0\n' @@ -218,7 +219,7 @@ class Testcase(object): copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0]) copy += '\t\tmovq {}, {}\n'.format(self.ops['gpr64'][2], self.ops['gpr64'][0]) copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) - elif(reg.reg_type == 'MMX'): + elif reg.reg_type == 'MMX': copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][0]) copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][1]) copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0]) @@ -227,7 +228,7 @@ class Testcase(object): copy += '\t\t# Create DP 0.5\n' copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0]) copy += '\t\tmovq {}, {}\n'.format(self.ops['mmx'][2], self.ops['gpr64'][0]) - elif(reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM'): + elif reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM': key = reg.reg_type.lower() copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][0], self.ops[key][0]) copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][1], self.ops[key][0]) @@ -278,15 +279,15 @@ class Testcase(object): '\t\tjle done\n') # Expand to AVX(512) if necessary expand = '' - if(self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm'): + if self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm': expand = ('\t\t# expand from SSE to AVX\n' '\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n') - if(self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm'): + if self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm': expand = ('\t\t# expand from SSE to AVX\n' '\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n' '\t\t# expand from AVX to AVX512\n' '\t\tvinsert64x4 zmm0, zmm0, ymm0, 0x1\n') - return (def_instr, ninstr, init, expand) + return def_instr, ninstr, init, expand def __define_loop_lat(self): """ @@ -299,22 +300,22 @@ class Testcase(object): """ loop_lat = ('loop:\n' '\t\tinc i\n') - if(self.num_operands == 1): + if self.num_operands == 1: for i in range(0, int(self.num_instr)): loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.op_a][0]) - elif(self.num_operands == 2 and self.op_a == self.op_b): + elif self.num_operands == 2 and self.op_a == self.op_b: for i in range(0, int(self.num_instr), 2): loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1]) loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1], self.ops[self.op_b][0]) - elif(self.num_operands == 2 and self.op_a != self.op_b): + elif self.num_operands == 2 and self.op_a != self.op_b: for i in range(0, int(self.num_instr), 2): loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0]) - elif(self.num_operands == 3 and self.op_a == self.op_b): + elif self.num_operands == 3 and self.op_a == self.op_b: for i in range(0, int(self.num_instr), 2): loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][1], @@ -322,7 +323,7 @@ class Testcase(object): loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1], self.ops[self.op_b][0], self.ops[self.op_c][0]) - elif(self.num_operands == 3 and self.op_a == self.op_c): + elif self.num_operands == 3 and self.op_a == self.op_c: for i in range(0, int(self.num_instr), 2): loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0], self.ops[self.op_b][0], @@ -348,15 +349,15 @@ class Testcase(object): ext = '' ext1 = False ext2 = False - if(self.num_operands == 2): + if self.num_operands == 2: ext1 = True - if(self.num_operands == 3): + if self.num_operands == 3: ext1 = True ext2 = True for i in range(0, int(self.num_instr)): - if(ext1): + if ext1: ext = ', {}'.format(self.ops[self.op_b][i % 3]) - if(ext2): + if ext2: ext += ', {}'.format(self.ops[self.op_c][i % 3]) reg_num = (i % (len(self.ops[self.op_a]) - 3)) + 3 loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.op_a][reg_num], ext) @@ -381,15 +382,15 @@ class Testcase(object): lt = False name = self.instr+self.extension for root, dirs, files in os.walk(os.path.dirname(__file__)+'/benchmarks'): - if((name+'-tp.S') in files): + if (name + '-tp.S') in files: tp = True if name+'.S' in files: lt = True - return (tp, lt) + return tp, lt def get_entryname(self): """ - Returns the name of the entry the instruction form would be the data file + Return the name of the entry the instruction form would be the data file Returns ------- diff --git a/setup.py b/setup.py index cb85627..ad796aa 100755 --- a/setup.py +++ b/setup.py @@ -10,12 +10,14 @@ import re here = os.path.abspath(os.path.dirname(__file__)) + # Stolen from pip def read(*names, **kwargs): with io.open(os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")) as fp: return fp.read() + # Stolen from pip def find_version(*file_paths): version_file = read(*file_paths) @@ -24,6 +26,7 @@ def find_version(*file_paths): return version_match.group(1) raise RuntimeError("Unable to find version string.") + # Get the long description from the README file with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: long_description = f.read() @@ -43,11 +46,11 @@ setup( # The project's main homepage url='https://github.com/RRZE-HPC/OSACA', - #Author details + # Author details author='Jan Laukemann', author_email='jan.laukemann@fau.de', - #Choose your license + # Choose your license license='AGPLv3', # See https://pypi.python.org/pypi?%3Aaction=list_classifiers @@ -77,7 +80,6 @@ setup( # What doesd your project relate to? keywords='hpc performance benchmark analysis architecture', - # You can just specify the packages manually here if your project is # simple. Or you can use find_packages(). packages=find_packages(exclude=['contrib', 'docs', 'tests']), @@ -97,10 +99,10 @@ setup( # dependencies). You can install these using the following syntax, # for example: # $ pip install -e .[dev,test] - #extras_require={ + # extras_require={ # 'dev': ['check-manifest'], # 'test': ['coverage'], - #}, + # }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these @@ -111,7 +113,7 @@ setup( # need to place data files outside of your packages. See: # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa # In this case, 'data_file' will be installed into '/my_data' - #data_files=[('my_data', ['data/data_file'])], + # data_files=[('my_data', ['data/data_file'])], # To provide executable scripts, use entry points in preference to the # "scripts" keyword. Entry points provide cross-platform support and allow @@ -122,7 +124,3 @@ setup( ], }, ) - - - - diff --git a/tests/test_osaca.py b/tests/test_osaca.py index bfd9229..2f4f91d 100755 --- a/tests/test_osaca.py +++ b/tests/test_osaca.py @@ -7,29 +7,30 @@ import os import unittest sys.path.insert(0, '..') -from osaca.osaca import Osaca +from osaca.osaca import OSACA + class TestOsaca(unittest.TestCase): def testIACABinary(self): out = StringIO() curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - osa = Osaca('IVB', curr_dir+'/testfiles/taxCalc-ivb-iaca', out) + osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca', out) osa.inspect_with_iaca() result = out.getvalue() result = '\n'.join(result.split('\n')[-27:]) - with open(curr_dir+'/test_osaca_iaca.out', encoding='utf-8') as f: + with open(curr_dir + '/test_osaca_iaca.out', encoding='utf-8') as f: assertion = f.read() self.assertEqual(assertion, result) - + # Test ASM file with IACA marker in two lines def testIACAasm1(self): out = StringIO() curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - osa = Osaca('IVB', curr_dir+'/testfiles/taxCalc-ivb-iaca.S', out) + osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca.S', out) osa.inspect_with_iaca() result = out.getvalue() result = '\n'.join(result.split('\n')[-27:]) - with open(curr_dir+'/test_osaca_iaca_asm.out', encoding='utf-8') as f: + with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f: assertion = f.read() self.assertEqual(assertion, result) @@ -37,10 +38,10 @@ class TestOsaca(unittest.TestCase): def testIACAasm2(self): out = StringIO() curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - osa = Osaca('IVB', curr_dir+'/testfiles/taxCalc-ivb-iaca2.S', out) + osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca2.S', out) osa.inspect_with_iaca() result = out.getvalue() result = '\n'.join(result.split('\n')[-27:]) - with open(curr_dir+'/test_osaca_iaca_asm.out', encoding='utf-8') as f: + with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f: assertion = f.read() self.assertEqual(assertion, result)