From 71377ee420721a4ab3a248fe9eb02bef14c8561d Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Mon, 17 Dec 2018 17:24:20 +0100 Subject: [PATCH 01/17] moved constants in OSACA class --- osaca/osaca.py | 63 +++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index 3e9854d..4e03061 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -16,19 +16,17 @@ from osaca.eu_sched import Scheduler from osaca.testcase import Testcase +DATA_DIR = os.path.expanduser('~') + '/.osaca/' + + class OSACA(object): - arch = None srcCode = None - df = None - instr_forms = None tp_list = False - file_output = '' - osaca_dir = os.path.expanduser('~') + '/.osaca/' # Variables for checking lines numSeps = 0 indentChar = '' sem = 0 - marker = r'//STARTLOOP' + CODE_MARKER = r'//STARTLOOP' # Variables for creating output longestInstr = 30 @@ -43,29 +41,33 @@ class OSACA(object): r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte' r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))') - def __init__(self, _arch, file_path, output=sys.stdout): - self.arch = _arch + VALID_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'ZEN'] + + def __init__(self, arch, file_path, output=sys.stdout): + # Check architecture + if arch not in self.VALID_ARCHS: + raise ValueError("Invalid architecture ({!r}), must be one of {}.".format( + arch, self.VALID_ARCHS)) + self.arch = arch + self.file_path = file_path self.instr_forms = [] self.file_output = output # Check if data files are already in usr dir, otherwise create them - if not os.path.isdir(self.osaca_dir + 'data'): + if not os.path.isdir(os.path.join(DATA_DIR, 'data')): print('Copying files in user directory...', file=self.file_output, end='') - subprocess.call(['mkdir', '-p', self.osaca_dir]) + os.makedirs(os.path.join(DATA_DIR, 'data')) subprocess.call(['cp', '-r', '/'.join(os.path.realpath(__file__).split('/')[:-1]) + '/data', - self.osaca_dir]) - print('Done!', file=self.file_output) + DATA_DIR]) + print(' Done!', file=self.file_output) + # -----------------main functions depending on arguments-------------------- def include_ibench(self): """ Read ibench output and include it in the architecture specific csv file. """ - # Check args and exit program if something's wrong - if not self.check_arch(): - print('Invalid microarchitecture.', file=sys.stderr) - sys.exit(1) if not self.check_file(): print('Invalid file path or file format.', file=sys.stderr) sys.exit(1) @@ -188,23 +190,6 @@ class OSACA(object): # -------------------------------------------------------------------------- - def check_arch(self): - """ - Check if the architecture is valid. - - Returns - ------- - bool - True if arch is supported - False if arch is not supported - - """ - arch_list = ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'ZEN'] - if self.arch in arch_list: - return True - else: - return False - def check_elffile(self): """ Check if the given filepath exists, if the format is the needed elf64 @@ -385,12 +370,12 @@ class OSACA(object): Necessary for setting indenting character (default False) """ # Check if marker is in line - if self.marker in line: + if self.CODE_MARKER in line: # First, check if high level code in indented with whitespaces or tabs if first_appearance: self.indentChar = self.get_indent_chars(line) # Now count the number of whitespaces - self.numSeps = (re.split(self.marker, line)[0]).count(self.indentChar) + self.numSeps = (re.split(self.CODE_MARKER, line)[0]).count(self.indentChar) self.sem = 3 elif self.sem > 0: # We're in the marked code snippet @@ -420,8 +405,8 @@ class OSACA(object): str Indentation character as string """ - num_spaces = (re.split(self.marker, line)[0]).count(' ') - num_tabs = (re.split(self.marker, line)[0]).count('\t') + num_spaces = (re.split(self.CODE_MARKER, line)[0]).count(' ') + num_tabs = (re.split(self.CODE_MARKER, line)[0]).count('\t') if num_spaces != 0 and num_tabs == 0: return ' ' elif num_spaces == 0 and num_tabs != 0: @@ -434,12 +419,12 @@ class OSACA(object): """ Extract instruction forms out of binary file using IACA markers. """ - self.marker = r'fs addr32 nop' + self.CODE_MARKER = r'fs addr32 nop' part1 = re.compile(r'64\s+fs') part2 = re.compile(r'67 90\s+addr32 nop') for line in self.srcCode: # Check if marker is in line - if self.marker in line: + if self.CODE_MARKER in line: self.sem += 1 elif re.search(part1, line) or re.search(part2, line): self.sem += 0.5 From eb512a55274cd393b7ff690b0d46a8161e396f41 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Mon, 17 Dec 2018 17:26:27 +0100 Subject: [PATCH 02/17] automatically reading csv in __init__ write_csv uses class state --- osaca/osaca.py | 53 ++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index 4e03061..233dfaa 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -62,6 +62,9 @@ class OSACA(object): DATA_DIR]) print(' Done!', file=self.file_output) + # Check for database for the chosen architecture + self.df = self.read_csv() + # -----------------main functions depending on arguments-------------------- def include_ibench(self): @@ -71,8 +74,6 @@ class OSACA(object): if not self.check_file(): print('Invalid file path or file format.', file=sys.stderr) sys.exit(1) - # Check for database for the chosen architecture - self.df = self.read_csv() # Create sequence of numbers and their reciprocals for validate the measurements cyc_list, reci_list = self.create_sequences() print('Everything seems fine! Let\'s start!', file=self.file_output) @@ -132,8 +133,7 @@ class OSACA(object): # Now merge the DataFrames and write new csv file self.df = self.df.append(pd.DataFrame(new_data, columns=['instr', 'TP', 'LT', 'ports']), ignore_index=True) - csv = self.df.to_csv(index=False) - self.write_csv(csv) + self.write_csv() print('ibench output {}'.format(self.file_path.split('/')[-1]), 'successfully in data file included.', file=self.file_output) print('{} values were added.'.format(added_vals), file=self.file_output) @@ -143,16 +143,11 @@ class OSACA(object): Main function of OSACA. Inspect binary file and create analysis. """ # Check args and exit program if something's wrong - if not self.check_arch(): - print('Invalid microarchitecture.', file=sys.stderr) - sys.exit(1) if not self.check_elffile(): print('Invalid file path or file format. Not an ELF file.', file=sys.stderr) sys.exit(1) - # Finally check for database for the chosen architecture - self.df = self.read_csv() - print('Everything seems fine! Let\'s start checking!', file=self.file_output) + print("Everything seems fine! Let's start checking!", file=self.file_output) for i, line in enumerate(self.srcCode): if i == 0: self.check_line(line, True) @@ -166,10 +161,6 @@ class OSACA(object): Main function of OSACA with IACA markers instead of OSACA marker. Inspect binary file and create analysis. """ - # Check args and exit program if something's wrong - if not self.check_arch(): - print('Invalid microarchitecture.', file=sys.stderr) - sys.exit() # Check if input file is a binary or assembly file binary_file = True if not self.check_elffile(): @@ -177,8 +168,6 @@ class OSACA(object): if not self.check_file(True): print('Invalid file path or file format.', file=sys.stderr) sys.exit(1) - # Finally check for database for the chosen architecture - self.df = self.read_csv() print('Everything seems fine! Let\'s start checking!', file=self.file_output) if binary_file: @@ -253,11 +242,9 @@ class OSACA(object): store file data as a string in attribute srcCode if True, store it as a list of strings (lines) if False (default False) """ - f = open(self.file_path, 'r') - self.srcCode = '' - for line in f: - self.srcCode += line - f.close() + with open(self.file_path, 'r') as f: + self.srcCode = f.read() + if iaca_flag: return self.srcCode = self.srcCode.split('\n') @@ -272,22 +259,16 @@ class OSACA(object): CSV as DataFrame object """ # curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - df = pd.read_csv(self.osaca_dir + 'data/' + self.arch.lower() + '_data.csv') - return df + return pd.read_csv(DATA_DIR + 'data/' + self.arch.lower() + '_data.csv') - def write_csv(self, csv): + def write_csv(self): """ - Write architecture dependent CSV into data directory. - - Parameters - ---------- - csv : str - CSV data as string + Write architecture DataFrame as CSV into data directory. """ # curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1]) - f = open(self.osaca_dir + 'data/' + self.arch.lower() + '_data.csv', 'w') - f.write(csv) - f.close() + csv = self.df.to_csv(index=False) + with open(DATA_DIR + 'data/' + self.arch.lower() + '_data.csv', 'w') as f: + f.write(csv) def create_sequences(self, end=101): """ @@ -457,7 +438,11 @@ class OSACA(object): # Search for the start marker match = re.match(self.IACA_SM, code) while not match: - code = code.split('\n', 1)[1] + code = code.split('\n', 1) + if len(code) > 1: + code = code[1] + else: + raise ValueError("No IACA-style markers found in assembly code.") match = re.match(self.IACA_SM, code) # Search for the end marker code = (code.split('144', 1)[1]).split('\n', 1)[1] From 822e1ef3007d3dca1d14ab725b89e903ede0be1b Mon Sep 17 00:00:00 2001 From: Jan Laukemann Date: Mon, 17 Dec 2018 18:08:20 +0100 Subject: [PATCH 03/17] added machine-readable output feature --- osaca/eu_sched.py | 23 +++++++++++++++++++---- osaca/get_instr.py | 2 ++ osaca/osaca.py | 41 ++++++++++++++++++++++++++++++----------- osaca/testcase.py | 1 + 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/osaca/eu_sched.py b/osaca/eu_sched.py index ed66d32..31b8b25 100755 --- a/osaca/eu_sched.py +++ b/osaca/eu_sched.py @@ -8,6 +8,7 @@ from operator import add import pandas as pd from osaca.param import Register, MemAddr +#from param import Register, MemAddr class Scheduler(object): @@ -48,15 +49,22 @@ class Scheduler(object): osaca_dir = os.path.expanduser('~/.osaca/') self.df = pd.read_csv(osaca_dir + 'data/' + arch.lower() + '_data.csv', quotechar='"', converters={'ports': ast.literal_eval}) - def new_schedule(self): + def new_schedule(self, machine_readable=False): """ Schedules Instruction Form list and calculates port bindings. + Parameters + ---------- + machine_readable : bool + Boolean for indicating if the return value should be human readable (if False) or + machine readable (if True) + Returns ------- - (str, [int, ...]) - A tuple containing the graphic output of the schedule as string and - the port bindings as list of ints. + (str, [float, ...]) or ([[float, ...], ...], [float, ...]) + A tuple containing the output of the schedule as string (if machine_readable is not + given or False) or as list of lists (if machine_readable is True) and the port bindings + as list of float. """ sched = self.get_head() # Initialize ports @@ -113,6 +121,9 @@ class Scheduler(object): sched += self.get_line(occ_ports[i], instrForm[-1]) # Add throughput to total port binding port_bndgs = list(map(add, port_bndgs, occ_ports[i])) + if(machine_readable): + list(map(self.append, occ_ports, self.instrList)) + return (occ_ports, port_bndgs) return (sched, port_bndgs) def schedule(self): @@ -180,6 +191,10 @@ class Scheduler(object): return self.flatten(l[0]) + self.flatten(l[1:]) return l[:1] + self.flatten(l[1:]) + def append(self, l, e): + if(isinstance(l, list)): + l.append(e) + def schedule_fcfs(self): """ Schedules Instruction Form list for a single run with latencies. diff --git a/osaca/get_instr.py b/osaca/get_instr.py index b083902..b09f776 100755 --- a/osaca/get_instr.py +++ b/osaca/get_instr.py @@ -5,6 +5,8 @@ import argparse from osaca.testcase import Testcase from osaca.param import Register, MemAddr, Parameter +#from testcase import Testcase +#from param import Register, MemAddr, Parameter class InstrExtractor(object): diff --git a/osaca/osaca.py b/osaca/osaca.py index 857c34d..b06c5b5 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -14,6 +14,9 @@ import numpy as np from osaca.param import Register, MemAddr, Parameter from osaca.eu_sched import Scheduler from osaca.testcase import Testcase +#from param import Register, MemAddr, Parameter +#from eu_sched import Scheduler +#from testcase import Testcase class Osaca(object): @@ -33,6 +36,7 @@ class Osaca(object): # Variables for creating output longestInstr = 30 + machine_readable = False # Constants ASM_LINE = re.compile(r'\s[0-9a-f]+[:]') # Matches every variation of the IACA start marker @@ -75,7 +79,7 @@ class Osaca(object): self.df = self.read_csv() # Create sequence of numbers and their reciprokals for validate the measurements cyc_list, reci_list = self.create_sequences() - print('Everything seems fine! Let\'s start!', file=self.file_output) + #print('Everything seems fine! Let\'s start!', file=self.file_output) new_data = [] added_vals = 0 for line in self.srcCode: @@ -153,14 +157,17 @@ class Osaca(object): # Finally check for database for the chosen architecture self.df = self.read_csv() - print('Everything seems fine! Let\'s start checking!', file=self.file_output) + #print('Everything seems fine! Let\'s start checking!', file=self.file_output) for i, line in enumerate(self.srcCode): if(i == 0): self.check_line(line, True) else: self.check_line(line) - output = self.create_output(self.tp_list) - print(output, file=self.file_output) + output = self.create_output(self.tp_list, True, self.machine_readable) + if(self.machine_readable): + return output + else: + print(output, file=self.file_output) def inspect_with_iaca(self): """ @@ -181,13 +188,16 @@ class Osaca(object): # Finally check for database for the chosen architecture self.df = self.read_csv() - print('Everything seems fine! Let\'s start checking!', file=self.file_output) + #print('Everything seems fine! Let\'s start checking!', file=self.file_output) if(binary_file): self.iaca_bin() else: self.iaca_asm() - output = self.create_output(self.tp_list) - print(output, file=self.file_output) + output = self.create_output(self.tp_list, True, self.machine_readable) + if(self.machine_readable): + return output + else: + print(output, file=self.file_output) # -------------------------------------------------------------------------- @@ -618,7 +628,7 @@ class Osaca(object): return self.flatten(l[0]) + self.flatten(l[1:]) return l[:1] + self.flatten(l[1:]) - def create_output(self, tp_list=False, pr_sched=True): + def create_output(self, tp_list=False, pr_sched=True, machine_readable=False): """ Creates output of analysed file including a time stamp. @@ -650,7 +660,10 @@ class Osaca(object): if(pr_sched): output += '\n\n' sched = Scheduler(self.arch, self.instr_forms) - sched_output, port_binding = sched.new_schedule() + sched_output, port_binding = sched.new_schedule(machine_readable) + # if machine_readable, we're already done here + if(machine_readable): + return sched_output binding = sched.get_port_binding(port_binding) output += sched.get_report_info() + '\n' + binding + '\n\n' + sched_output block_tp = round(max(port_binding), 2) @@ -834,6 +847,8 @@ def main(): group.add_argument('-m', '--insert-marker', dest='insert_marker', action='store_true', help='try to find blocks probably corresponding to loops in assembly and' + 'insert IACA marker') + parser.add_argument('-l', '--list-output', dest='machine_readable', action='store_true', + help='returns output as machine readable list of lists') parser.add_argument('filepath', type=str, help='path to object (Binary, ASM, CSV)') # Store args in global variables @@ -852,6 +867,10 @@ def main(): osaca = Osaca(arch, filepath) if(inp.tp_list): osaca.tp_list = True + if(inp.machine_readable): + osaca.machine_readable = True + osaca.output = None + if(incl_ibench): try: @@ -860,7 +879,7 @@ def main(): print('Please specify an architecture.', file=sys.stderr) elif(iaca_flag): try: - osaca.inspect_with_iaca() + return osaca.inspect_with_iaca() except UnboundLocalError: print('Please specify an architecture.', file=sys.stderr) elif(insert_m): @@ -878,7 +897,7 @@ def main(): iaca.iaca_instrumentation(input_file=f_in, output_file=f_out, block_selection='manual', pointer_increment=1) else: - osaca.inspect_binary() + return osaca.inspect_binary() # ------------Main method-------------- diff --git a/osaca/testcase.py b/osaca/testcase.py index d7ae7b2..973bfcf 100755 --- a/osaca/testcase.py +++ b/osaca/testcase.py @@ -5,6 +5,7 @@ from subprocess import call from math import ceil from osaca.param import Register, MemAddr, Parameter +#from param import Register, MemAddr, Parameter class Testcase(object): From ea801f81a6a9ce7e2ad9756aa28b3e1832d262d1 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Wed, 19 Dec 2018 18:26:22 +0100 Subject: [PATCH 04/17] code and argparser cleanup --- osaca/osaca.py | 57 ++++++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index c5358fb..cf74cd8 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -15,7 +15,6 @@ from osaca.param import Register, MemAddr, Parameter from osaca.eu_sched import Scheduler from osaca.testcase import Testcase - DATA_DIR = os.path.expanduser('~') + '/.osaca/' @@ -66,7 +65,6 @@ class OSACA(object): # Check for database for the chosen architecture self.df = self.read_csv() - # -----------------main functions depending on arguments-------------------- def include_ibench(self): """ @@ -77,7 +75,7 @@ class OSACA(object): sys.exit(1) # Create sequence of numbers and their reciprocals for validate the measurements cyc_list, reci_list = self.create_sequences() - #print('Everything seems fine! Let\'s start!', file=self.file_output) + # print('Everything seems fine! Let\'s start!', file=self.file_output) new_data = [] added_vals = 0 for line in self.srcCode: @@ -135,8 +133,7 @@ class OSACA(object): self.df = self.df.append(pd.DataFrame(new_data, columns=['instr', 'TP', 'LT', 'ports']), ignore_index=True) self.write_csv() - print('ibench output {}'.format(self.file_path.split('/')[-1]), - 'successfully in data file included.', file=self.file_output) + print('ibench output included successfully in data file .', file=self.file_output) print('{} values were added.'.format(added_vals), file=self.file_output) def inspect_binary(self): @@ -148,7 +145,7 @@ class OSACA(object): print('Invalid file path or file format. Not an ELF file.', file=sys.stderr) sys.exit(1) - #print('Everything seems fine! Let\'s start checking!', file=self.file_output) + # print('Everything seems fine! Let\'s start checking!', file=self.file_output) for i, line in enumerate(self.srcCode): if i == 0: @@ -156,7 +153,7 @@ class OSACA(object): else: self.check_line(line) output = self.create_output(self.tp_list, True, self.machine_readable) - if(self.machine_readable): + if self.machine_readable: return output else: print(output, file=self.file_output) @@ -174,13 +171,13 @@ class OSACA(object): print('Invalid file path or file format.', file=sys.stderr) sys.exit(1) - #print('Everything seems fine! Let\'s start checking!', file=self.file_output) - if(binary_file): + # print('Everything seems fine! Let\'s start checking!', file=self.file_output) + if binary_file: self.iaca_bin() else: self.iaca_asm() output = self.create_output(self.tp_list, True, self.machine_readable) - if(self.machine_readable): + if self.machine_readable: return output else: print(output, file=self.file_output) @@ -617,7 +614,7 @@ class OSACA(object): sched = Scheduler(self.arch, self.instr_forms) sched_output, port_binding = sched.new_schedule(machine_readable) # if machine_readable, we're already done here - if(machine_readable): + if machine_readable: return sched_output binding = sched.get_port_binding(port_binding) output += sched.get_report_info() + '\n' + binding + '\n\n' + sched_output @@ -783,53 +780,45 @@ def main(): 'estimated average throughput.') parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __find_version('__init__.py')) - parser.add_argument('--arch', dest='arch', type=str, + parser.add_argument('--arch', type=str, required=True, help='define architecture (SNB, IVB, HSW, BDW, SKL, ZEN)') - parser.add_argument('--tp-list', dest='tp_list', action='store_true', + parser.add_argument('--tp-list', action='store_true', help='print an additional list of all throughput values for the kernel') group = parser.add_mutually_exclusive_group(required=False) - group.add_argument('-i', '--include-ibench', dest='incl', action='store_true', + group.add_argument('-i', '--include-ibench', action='store_true', help='includes the given values in form of the output of ibench in the' 'data file') - group.add_argument('--iaca', dest='iaca', action='store_true', + group.add_argument('--iaca', action='store_true', help='search for IACA markers instead the OSACA marker') - group.add_argument('-m', '--insert-marker', dest='insert_marker', action='store_true', + group.add_argument('--insert-marker', '-m', action='store_true', help='try to find blocks probably corresponding to loops in assembly and' 'insert IACA marker') parser.add_argument('-l', '--list-output', dest='machine_readable', action='store_true', - help='returns output as machine readable list of lists') + help='returns output as machine readable list of lists') parser.add_argument('filepath', type=str, help='path to object (Binary, ASM, CSV)') # Store args in global variables - inp = parser.parse_args() - if inp.arch is None and inp.insert_marker is None: - raise ValueError('Please specify an architecture.', file=sys.stderr) - arch = inp.arch.upper() - filepath = inp.filepath - incl_ibench = inp.incl - iaca_flag = inp.iaca - insert_m = inp.insert_marker + args = parser.parse_args() - # Create Osaca object - osaca = OSACA(arch, filepath) - if inp.tp_list: + # Create OSACA object + osaca = OSACA(args.arch.upper(), args.filepath) + if args.tp_list: osaca.tp_list = True - if(inp.machine_readable): + if args.machine_readable: osaca.machine_readable = True osaca.output = None - - if incl_ibench: + if args.include_ibench: try: osaca.include_ibench() except UnboundLocalError: print('Please specify an architecture.', file=sys.stderr) - elif iaca_flag: + elif args.iaca: try: return osaca.inspect_with_iaca() except UnboundLocalError: print('Please specify an architecture.', file=sys.stderr) - elif insert_m: + elif args.insert_marker: try: from kerncraft import iaca except ImportError: @@ -840,7 +829,7 @@ def main(): # Change due to newer kerncraft version (hopefully temporary) # iaca.iaca_instrumentation(input_file=filepath, output_file=filepath, # block_selection='manual', pointer_increment=1) - with open(filepath, 'r') as f_in, open(filepath[:-2] + '-iaca.s', 'w') as f_out: + with open(args.filepath, 'r') as f_in, open(args.filepath[:-2] + '-iaca.s', 'w') as f_out: iaca.iaca_instrumentation(input_file=f_in, output_file=f_out, block_selection='manual', pointer_increment=1) else: From 243621e4317a1d55f309c8357531b2552281faa4 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Wed, 19 Dec 2018 18:26:31 +0100 Subject: [PATCH 05/17] removed high-level code marker functionality --- osaca/osaca.py | 96 ++------------------------------------------------ 1 file changed, 3 insertions(+), 93 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index cf74cd8..45f245b 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -25,7 +25,6 @@ class OSACA(object): numSeps = 0 indentChar = '' sem = 0 - CODE_MARKER = r'//STARTLOOP' # Variables for creating output longestInstr = 30 @@ -184,27 +183,6 @@ class OSACA(object): # -------------------------------------------------------------------------- - def check_elffile(self): - """ - Check if the given filepath exists, if the format is the needed elf64 - and store file data in attribute srcCode. - - Returns - ------- - bool - True if file is expected elf64 file - False if file does not exist or is not an elf64 file - - """ - if os.path.isfile(self.file_path): - self.store_src_code_binary() - try: - if 'file format elf64' in self.srcCode[1].lower(): - return True - except IndexError: - return False - return False - def check_file(self, iaca_flag=False): """ Check if the given filepath exists and store file data in attribute @@ -228,15 +206,6 @@ class OSACA(object): return True return False - def store_src_code_binary(self): - """ - Load binary file compiled with '-g' in class attribute srcCode and - separate by line. - """ - self.srcCode = (subprocess.run(['objdump', '--source', self.file_path], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n')) - def store_src_code(self, iaca_flag=False): """ Load arbitrary file in class attribute srcCode. @@ -344,63 +313,6 @@ class OSACA(object): print('The program will continue with the given value', file=self.file_output) return clk_cyc - def check_line(self, line, first_appearance=False): - """ - Inspect line of source code and process it if inside the marked snippet. - - Parameter - --------- - line : str - Line of source code - first_appearance : bool - Necessary for setting indenting character (default False) - """ - # Check if marker is in line - if self.CODE_MARKER in line: - # First, check if high level code in indented with whitespaces or tabs - if first_appearance: - self.indentChar = self.get_indent_chars(line) - # Now count the number of whitespaces - self.numSeps = (re.split(self.CODE_MARKER, line)[0]).count(self.indentChar) - self.sem = 3 - elif self.sem > 0: - # We're in the marked code snippet - # Check if the line is ASM code and - if not - check if we're still in the loop - match = re.search(self.ASM_LINE, line) - if match: - # Further analysis of instructions - # Check if there are comments in line - if r'//' in line: - return - self.check_instr(''.join(re.split(r'\t', line)[-1:])) - elif (re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps: - # Not in the loop anymore - or yet. We decrement the semaphore - self.sem = self.sem - 1 - - def get_indent_chars(self, line): - """ - Check if indentation characters are either tabulators or whitespaces - - Parameters - ---------- - line : str - Line with start marker in it - - Returns - ------- - str - Indentation character as string - """ - num_spaces = (re.split(self.CODE_MARKER, line)[0]).count(' ') - num_tabs = (re.split(self.CODE_MARKER, line)[0]).count('\t') - if num_spaces != 0 and num_tabs == 0: - return ' ' - elif num_spaces == 0 and num_tabs != 0: - return '\t' - else: - err_msg = 'Indentation of code is only supported for whitespaces and tabs.' - raise NotImplementedError(err_msg) - def iaca_bin(self): """ Extract instruction forms out of binary file using IACA markers. @@ -603,10 +515,8 @@ class OSACA(object): self.longestInstr = 70 horiz_line = self.create_horiz_sep() # Write general information about the benchmark - output = '--{}\n| Analyzing of file:\t{}| Architecture:\t\t{}\n| Timestamp:\t\t{}\n'.format( - horiz_line, os.path.abspath(self.file_path), self.arch, - datetime.now().strftime('%Y-%m-%d %H:%M:%S') - ) + output = '--{}\n| Architecture:\t\t{}\n|\n'.format( + horiz_line, self.arch) if tp_list: output += self.create_tp_list(horiz_line) if pr_sched: @@ -833,7 +743,7 @@ def main(): iaca.iaca_instrumentation(input_file=f_in, output_file=f_out, block_selection='manual', pointer_increment=1) else: - return osaca.inspect_binary() + raise Exception("Not clear what to do.") # ------------Main method-------------- From b6a108ba0eb213632022a83109d01613026e6597 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Wed, 19 Dec 2018 18:30:45 +0100 Subject: [PATCH 06/17] moved flatten out of class --- osaca/osaca.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index 45f245b..a90ebd5 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -18,6 +18,27 @@ from osaca.testcase import Testcase DATA_DIR = os.path.expanduser('~') + '/.osaca/' +def flatten(l): + """ + Flatten a nested list of strings. + + Parameters + ---------- + l : [[...[str]]] + Nested list of strings + + Returns + ------- + [str] + List of strings + """ + if not l: + return l + if isinstance(l[0], list): + return flatten(l[0]) + flatten(l[1:]) + return l[:1] + flatten(l[1:]) + + class OSACA(object): srcCode = None tp_list = False @@ -399,7 +420,7 @@ class OSACA(object): if re.match(empty_byte, mnemonic) and len(mnemonic) == 2: return # Check if there's one or more operands and store all in a list - param_list = self.flatten(self.separate_params(params)) + param_list = flatten(self.separate_params(params)) param_list_types = list(param_list) # Check operands and separate them by IMMEDIATE (IMD), REGISTER (REG), # MEMORY (MEM) or LABEL(LBL) @@ -472,26 +493,6 @@ class OSACA(object): param_list = [params[:i]] return param_list - def flatten(self, l): - """ - Flatten a nested list of strings. - - Parameters - ---------- - l : [[...[str]]] - Nested list of strings - - Returns - ------- - [str] - List of strings - """ - if not l: - return l - if isinstance(l[0], list): - return self.flatten(l[0]) + self.flatten(l[1:]) - return l[:1] + self.flatten(l[1:]) - def create_output(self, tp_list=False, pr_sched=True, machine_readable=False): """ Creates output of analysed file including a time stamp. From eb3679f3e974837a78f8f398bfa5fa2f4f2f3e22 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Wed, 19 Dec 2018 18:39:40 +0100 Subject: [PATCH 07/17] added temporary workaround --- osaca/osaca.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index a90ebd5..c2b2959 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -39,6 +39,16 @@ def flatten(l): return l[:1] + flatten(l[1:]) +def get_assembly_from_binary(file_path): + """ + Load binary file compiled with '-g' in class attribute srcCode and + separate by line. + """ + return subprocess.run(['objdump', '--source', file_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n') + + class OSACA(object): srcCode = None tp_list = False @@ -156,6 +166,19 @@ class OSACA(object): print('ibench output included successfully in data file .', file=self.file_output) print('{} values were added.'.format(added_vals), file=self.file_output) + def check_elffile(self): + """ + Check if the format is elf64 and then load into srcCode + + :return: true if file could be loaded and is an elf64 file + """ + # FIXME remove this workaround when restructuring is complete + srcCode = get_assembly_from_binary(self.file_path) + if len(srcCode) > 2 and 'file format elf64' in srcCode[1].lower(): + self.srcCode = srcCode + return True + return False + def inspect_binary(self): """ Main function of OSACA. Inspect binary file and create analysis. @@ -420,7 +443,7 @@ class OSACA(object): if re.match(empty_byte, mnemonic) and len(mnemonic) == 2: return # Check if there's one or more operands and store all in a list - param_list = flatten(self.separate_params(params)) + param_list = flatten(self._separate_params(params)) param_list_types = list(param_list) # Check operands and separate them by IMMEDIATE (IMD), REGISTER (REG), # MEMORY (MEM) or LABEL(LBL) @@ -461,7 +484,7 @@ class OSACA(object): if inDB == 0: tc.write_testcase(not writeTP, not writeLT) - def separate_params(self, params): + def _separate_params(self, params): """ Delete comments, separates parameters and return them as a list. @@ -487,7 +510,7 @@ class OSACA(object): i = params.index(',') else: i = params.index(',') - param_list = [params[:i], self.separate_params(params[i + 1:])] + param_list = [params[:i], self._separate_params(params[i + 1:])] elif '#' in params: i = params.index('#') param_list = [params[:i]] From e4bfd7af739081308ea8c1e8e15b30df549ba7c5 Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Wed, 19 Dec 2018 18:54:09 +0100 Subject: [PATCH 08/17] better handling of argument combinations --- osaca/osaca.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index c2b2959..e030e6c 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -722,8 +722,6 @@ def main(): group.add_argument('-i', '--include-ibench', action='store_true', help='includes the given values in form of the output of ibench in the' 'data file') - group.add_argument('--iaca', action='store_true', - help='search for IACA markers instead the OSACA marker') group.add_argument('--insert-marker', '-m', action='store_true', help='try to find blocks probably corresponding to loops in assembly and' 'insert IACA marker') @@ -743,15 +741,7 @@ def main(): osaca.output = None if args.include_ibench: - try: - osaca.include_ibench() - except UnboundLocalError: - print('Please specify an architecture.', file=sys.stderr) - elif args.iaca: - try: - return osaca.inspect_with_iaca() - except UnboundLocalError: - print('Please specify an architecture.', file=sys.stderr) + osaca.include_ibench() elif args.insert_marker: try: from kerncraft import iaca @@ -767,7 +757,7 @@ def main(): iaca.iaca_instrumentation(input_file=f_in, output_file=f_out, block_selection='manual', pointer_increment=1) else: - raise Exception("Not clear what to do.") + return osaca.inspect_with_iaca() # ------------Main method-------------- From e92106b2661bde78677eeaff446ff14aaeaa01fd Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Fri, 21 Dec 2018 16:38:13 +0100 Subject: [PATCH 09/17] seperated disassembling (error prone), marker detection and kernel extraction --- osaca/osaca.py | 724 ++++++++++++++++++-------------------- tests/test_osaca.py | 36 +- tests/test_osaca_iaca.out | 28 +- 3 files changed, 383 insertions(+), 405 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index e030e6c..07b56ab 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -7,6 +7,7 @@ import io import re import subprocess from datetime import datetime +from pprint import pprint import pandas as pd import numpy as np @@ -17,6 +18,19 @@ from osaca.testcase import Testcase DATA_DIR = os.path.expanduser('~') + '/.osaca/' +# Matches every variation of the IACA start marker +IACA_START_MARKER = re.compile(r'\s*movl?[ \t]+\$(?:111|0x6f)[ \t]*,[ \t]*%ebx.*\n\s*' + r'(?:\.byte[ \t]+100.*((,[ \t]*103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144)))|' + r'(\n\s*\.byte[ \t]+103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144))))|(?:fs addr32 )?nop)') +# Matches every variation of the IACA end marker +IACA_END_MARKER = re.compile(r'\s*movl?[ \t]+\$(?:222|0x1f3)[ \t]*,[ \t]*%ebx.*\n\s*' + r'(?:\.byte[ \t]+100.*((,[ \t]*103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144)))|' + r'(\n\s*\.byte[ \t]+103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144))))|(?:fs addr32 )?nop)') + def flatten(l): """ @@ -39,14 +53,264 @@ def flatten(l): return l[:1] + flatten(l[1:]) -def get_assembly_from_binary(file_path): +def get_assembly_from_binary(bin_path): """ - Load binary file compiled with '-g' in class attribute srcCode and - separate by line. + Disassemble binary with llvm-objdump and transform into a canonical from. + + Replace jump and call target offsets with labels. + + :param bin_path: path to binary file to disassemble + + :return assembly string """ - return subprocess.run(['objdump', '--source', file_path], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n') + asm_lines = subprocess.run( + ['objdump', '-d', '--no-show-raw-insn', bin_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n') + + asm = [] + + # Separate label, offsets and instructions + # Store offset with each label (thus iterate in reverse) + label_offsets = {} + for l in reversed(asm_lines): + m = re.match(r'^(?:(?P