diff --git a/osaca/db_interface.py b/osaca/db_interface.py index 3acd17d..12e8327 100755 --- a/osaca/db_interface.py +++ b/osaca/db_interface.py @@ -2,6 +2,7 @@ import math import os +import sys import warnings import ruamel.yaml @@ -52,23 +53,73 @@ def import_benchmark_output(arch, bench_type, filepath): with open(filepath, 'r') as f: input_data = f.readlines() db_entries = None - if bench_type == 'ibench': - db_entries = _get_ibench_output(input_data) - elif bench_type == 'asmbench': - raise NotImplementedError - # write entries to DB mm = MachineModel(arch) + if bench_type == 'ibench': + db_entries = _get_ibench_output(input_data, mm.get_ISA()) + elif bench_type == 'asmbench': + db_entries = _get_asmbench_output(input_data, mm.get_ISA()) + # write entries to DB for entry in db_entries: - mm.set_instruction_entry(entry) - with open(filepath, 'w') as f: - mm.dump(f) + mm.set_instruction_entry(db_entries[entry]) + sys.stdout.write(mm.dump()) + ################## # HELPERS IBENCH # ################## -def _get_ibench_output(input_data): +def _get_asmbench_output(input_data, isa): + """ + Parse asmbench output in the format + + 1 MNEMONIC[-OP1[_OP2][...]] + 2 Latency: X cycles + 3 Throughput: Y cycles + 4 + + and creates per 4 lines in the input_data one entry in the database. + + :param str input_data: content of asmbench output file + :param str isa: ISA of target architecture (x86, AArch64, ...) + : return: dictionary with all new db_entries + """ + db_entries = {} + for i in range(0, len(input_data), 4): + if input_data[i + 3].strip() != '': + print('asmbench output not in the correct format! Format must be: ', file=sys.stderr) + print( + '-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n' + 'Throughput: Y cycles\n\n-------------', + file=sys.stderr, + ) + print( + 'Entry {} and all further entries won\'t be added.'.format((i / 4) + 1), + file=sys.stderr, + ) + break + else: + i_form = input_data[i].strip() + mnemonic = i_form.split('-')[0] + operands = i_form.split('-')[1].split('_') + operands = [_create_db_operand(op, isa) for op in operands] + entry = { + 'name': mnemonic, + 'operands': operands, + 'throughput': _validate_measurement(float(input_data[i + 2].split()[1]), 'tp'), + 'latency': _validate_measurement(float(input_data[i + 1].split()[1]), 'lt'), + 'port_pressure': None, + } + if not entry['throughput'] or not entry['latency']: + warnings.warn( + 'Your measurement for {} looks suspicious'.format(i_form) + + ' and was not added. Please inspect your benchmark.' + ) + db_entries[i_form] = entry + return db_entries + + +def _get_ibench_output(input_data, isa): db_entries = {} for line in input_data: if 'Using frequency' in line or len(line) == 0: @@ -81,7 +132,7 @@ def _get_ibench_output(input_data): else: mnemonic = instruction.split('-')[0] operands = instruction.split('-')[1].split('_') - operands = [_create_db_operand(op) for op in operands] + operands = [_create_db_operand(op, isa) for op in operands] entry = { 'name': mnemonic, 'operands': operands, @@ -90,14 +141,14 @@ def _get_ibench_output(input_data): 'port_pressure': None, } if 'TP' in instruction: - entry['throughput'] = _validate_measurement(float(line.split()[1]), True) + entry['throughput'] = _validate_measurement(float(line.split()[1]), 'tp') if not entry['throughput']: warnings.warn( 'Your THROUGHPUT measurement for {} looks suspicious'.format(key) + ' and was not added. Please inspect your benchmark.' ) elif 'LT' in instruction: - entry['latency'] = _validate_measurement(float(line.split()[1]), False) + entry['latency'] = _validate_measurement(float(line.split()[1]), 'lt') if not entry['latency']: warnings.warn( 'Your LATENCY measurement for {} looks suspicious'.format(key) @@ -107,8 +158,8 @@ def _get_ibench_output(input_data): return db_entries -def _validate_measurement(self, measurement, is_tp): - if not is_tp: +def _validate_measurement(measurement, mode): + if mode == 'lt': if ( math.floor(measurement) * 1.05 >= measurement or math.ceil(measurement) * 0.95 <= measurement @@ -116,7 +167,7 @@ def _validate_measurement(self, measurement, is_tp): # Value is probably correct, so round it to the estimated value return float(round(measurement)) # Check reciprocal only if it is a throughput value - else: + elif mode == 'tp': reciprocals = [1 / x for x in range(1, 11)] for reci in reciprocals: if reci * 0.95 <= measurement <= reci * 1.05: @@ -127,14 +178,14 @@ def _validate_measurement(self, measurement, is_tp): return None -def _create_db_operand(self, operand): - if self.isa == 'aarch64': - return self._create_db_operand_aarch64(operand) - elif self.isa == 'x86': - return self._create_db_operand_x86(operand) +def _create_db_operand(operand, isa): + if isa == 'aarch64': + return _create_db_operand_aarch64(operand) + elif isa == 'x86': + return _create_db_operand_x86(operand) -def _create_db_operand_aarch64(self, operand): +def _create_db_operand_aarch64(operand): if operand == 'i': return {'class': 'immediate', 'imd': 'int'} elif operand in 'wxbhsdq': @@ -155,7 +206,7 @@ def _create_db_operand_aarch64(self, operand): raise ValueError('Parameter {} is not a valid operand code'.format(operand)) -def _create_db_operand_x86(self, operand): +def _create_db_operand_x86(operand): if operand == 'r': return {'class': 'register', 'name': 'gpr'} elif operand in 'xyz': diff --git a/osaca/frontend.py b/osaca/frontend.py index 6d37757..845c266 100755 --- a/osaca/frontend.py +++ b/osaca/frontend.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -import os import re from datetime import datetime as dt @@ -60,56 +59,6 @@ class Frontend(object): tp_sum = SemanticsAppender.get_throughput_sum(kernel) print(lineno_filler + self._get_port_pressure(tp_sum, port_len, ' ')) - def _get_separator_list(self, separator, separator_2=' '): - separator_list = [] - for i in range(len(self._data['ports']) - 1): - match_1 = re.search(r'\d+', self._data['ports'][i]) - match_2 = re.search(r'\d+', self._data['ports'][i + 1]) - if match_1 is not None and match_2 is not None and match_1.group() == match_2.group(): - separator_list.append(separator_2) - else: - separator_list.append(separator) - separator_list.append(separator) - return separator_list - - def _get_flag_symbols(self, flag_obj): - string_result = '' - string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else '' - string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else '' - string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else '' - # TODO add other flags - string_result += ' ' if len(string_result) == 0 else '' - return string_result - - def _get_port_pressure(self, ports, port_len, separator='|'): - if not isinstance(separator, list): - separator = [separator for x in ports] - string_result = '{} '.format(separator[-1]) - for i in range(len(ports)): - if float(ports[i]) == 0.0: - string_result += port_len[i] * ' ' + ' {} '.format(separator[i]) - continue - left_len = len(str(float(ports[i])).split('.')[0]) - substr = '{:' + str(left_len) + '.' + str(max(port_len[i] - left_len - 1, 0)) + 'f}' - string_result += substr.format(ports[i]) + ' {} '.format(separator[i]) - return string_result[:-1] - - def _get_max_port_len(self, kernel): - port_len = [4 for x in self._data['ports']] - for instruction_form in kernel: - for i, port in enumerate(instruction_form['port_pressure']): - if len('{:.2f}'.format(port)) > port_len[i]: - port_len[i] = len('{:.2f}'.format(port)) - return port_len - - def _get_port_number_line(self, port_len, separator='|'): - string_result = separator - separator_list = self._get_separator_list(separator, '-') - for i, length in enumerate(port_len): - substr = '{:^' + str(length + 2) + 's}' - string_result += substr.format(self._data['ports'][i]) + separator_list[i] - return string_result - def print_latency_analysis(self, cp_kernel, separator='|'): print('\n\nLatency Analysis Report\n' + '-----------------------') for instruction_form in cp_kernel: @@ -144,10 +93,7 @@ class Frontend(object): dep, separator, sum( - [ - instr_form['latency_lcd'] - for instr_form in dep_dict[dep]['dependencies'] - ] + [instr_form['latency_lcd'] for instr_form in dep_dict[dep]['dependencies']] ), separator, dep_dict[dep]['root']['line'], @@ -156,6 +102,145 @@ class Frontend(object): ) ) + def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False): + self._print_header_report() + self._print_symbol_map() + self.print_combined_view( + kernel, kernel_dg.get_critical_path(), kernel_dg.get_loopcarried_dependencies() + ) + self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies()) + + def print_combined_view(self, kernel, cp_kernel: KernelDG, dep_dict, show_cmnts=True): + self._print_header_report() + self._print_symbol_map() + print('\n\nCombined Analysis Report\n' + '-----------------------') + lineno_filler = ' ' + port_len = self._get_max_port_len(kernel) + # Separator for ports + separator = '-' * sum([x + 3 for x in port_len]) + '-' + # ... for line numbers + separator += '--' + len(str(kernel[-1]['line_number'])) * '-' + col_sep = '|' + # for LCD/CP column + separator += '-' * (2 * 6 + len(col_sep)) + '-' * len(col_sep) + sep_list = self._get_separator_list(col_sep) + headline = 'Port pressure in cycles' + headline_str = '{{:^{}}}'.format(len(separator)) + # Prepare CP/LCD variable + cp_lines = [x['line_number'] for x in cp_kernel] + sums = {} + for dep in dep_dict: + sums[dep] = sum( + [instr_form['latency_lcd'] for instr_form in dep_dict[dep]['dependencies']] + ) + lcd_sum = max(sums.values()) + lcd_lines = [] + longest_lcd = [line_no for line_no in sums if sums[line_no] == lcd_sum][0] + lcd_lines = [d['line_number'] for d in dep_dict[longest_lcd]['dependencies']] + + print(headline_str.format(headline)) + print( + lineno_filler + + self._get_port_number_line(port_len, separator=col_sep) + + '{}{:^6}{}{:^6}{}'.format(col_sep, 'CP', col_sep, 'LCD', col_sep) + ) + print(separator) + for instruction_form in kernel: + if show_cmnts is False and self._is_comment(instruction_form): + continue + line_number = instruction_form['line_number'] + line = '{:4d} {}{} {} {}'.format( + line_number, + self._get_port_pressure(instruction_form['port_pressure'], port_len, sep_list), + self._get_lcd_cp_ports( + instruction_form['line_number'], + cp_kernel if line_number in cp_lines else None, + dep_dict[longest_lcd] if line_number in lcd_lines else None, + ), + self._get_flag_symbols(instruction_form['flags']) + if instruction_form['instruction'] is not None + else ' ', + instruction_form['line'].strip(), + ) + print(line) + print() + # lcd_sum already calculated before + tp_sum = SemanticsAppender.get_throughput_sum(kernel) + cp_sum = sum([x['latency_cp'] for x in cp_kernel]) + print( + lineno_filler + + self._get_port_pressure(tp_sum, port_len, ' ') + + ' {:^6} {:^6}'.format(cp_sum, lcd_sum) + ) + + #################### + # HELPER FUNCTIONS + #################### + + def _get_separator_list(self, separator, separator_2=' '): + separator_list = [] + for i in range(len(self._data['ports']) - 1): + match_1 = re.search(r'\d+', self._data['ports'][i]) + match_2 = re.search(r'\d+', self._data['ports'][i + 1]) + if match_1 is not None and match_2 is not None and match_1.group() == match_2.group(): + separator_list.append(separator_2) + else: + separator_list.append(separator) + separator_list.append(separator) + return separator_list + + def _get_flag_symbols(self, flag_obj): + string_result = '' + string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else '' + string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else '' + string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else '' + # TODO add other flags + string_result += ' ' if len(string_result) == 0 else '' + return string_result + + def _get_port_pressure(self, ports, port_len, separator='|'): + if not isinstance(separator, list): + separator = [separator for x in ports] + string_result = '{} '.format(separator[-1]) + for i in range(len(ports)): + if float(ports[i]) == 0.0: + string_result += port_len[i] * ' ' + ' {} '.format(separator[i]) + continue + left_len = len(str(float(ports[i])).split('.')[0]) + substr = '{:' + str(left_len) + '.' + str(max(port_len[i] - left_len - 1, 0)) + 'f}' + string_result += substr.format(ports[i]) + ' {} '.format(separator[i]) + return string_result[:-1] + + def _get_node_by_lineno(self, lineno, kernel): + nodes = [instr for instr in kernel if instr['line_number'] == lineno] + return nodes[0] if len(nodes) > 0 else None + + def _get_lcd_cp_ports(self, line_number, cp_dg, dependency, separator='|'): + lat_cp = lat_lcd = '' + if cp_dg: + lat_cp = self._get_node_by_lineno(line_number, cp_dg)['latency_cp'] + if dependency: + lat_lcd = self._get_node_by_lineno(line_number, dependency['dependencies'])[ + 'latency_lcd' + ] + return '{} {:>4} {} {:>4} {}'.format(separator, lat_cp, separator, lat_lcd, separator) + + def _get_max_port_len(self, kernel): + port_len = [4 for x in self._data['ports']] + for instruction_form in kernel: + for i, port in enumerate(instruction_form['port_pressure']): + if len('{:.2f}'.format(port)) > port_len[i]: + port_len[i] = len('{:.2f}'.format(port)) + return port_len + + def _get_port_number_line(self, port_len, separator='|'): + string_result = separator + separator_list = self._get_separator_list(separator, '-') + for i, length in enumerate(port_len): + substr = '{:^' + str(length + 2) + 's}' + string_result += substr.format(self._data['ports'][i]) + separator_list[i] + return string_result + def _print_header_report(self): version = 'v0.3' adjust = 20 @@ -184,10 +269,3 @@ class Frontend(object): def _print_port_binding_summary(self): raise NotImplementedError - - def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False): - self._print_header_report() - self._print_symbol_map() - self.print_throughput_analysis(kernel, show_lineno=True) - self.print_latency_analysis(kernel_dg.get_critical_path()) - self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies()) diff --git a/osaca/osaca.py b/osaca/osaca.py index 22e2a3f..71c832e 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -130,6 +130,8 @@ def check_user_dir(): def import_data(benchmark_type, arch, filepath): if benchmark_type.lower() == 'ibench': import_benchmark_output(arch, 'ibench', filepath) + elif benchmark_type.lower() == 'asmbench': + import_benchmark_output(arch, 'asmbench', filepath) else: raise NotImplementedError('This benchmark input variant is not implemented yet.') @@ -196,10 +198,10 @@ def run(args, output_file=sys.stdout): # Sanity check on DB verbose = True if args.verbose > 0 else False sanity_check(args.arch, verbose=verbose) - if 'import_data' in args: + elif 'import_data' in args: # Import microbench output file into DB - import_data(args.import_data, args.arch, args.file) - if args.insert_marker: + import_data(args.import_data, args.arch, args.file.name) + elif args.insert_marker: # Try to add IACA marker insert_byte_marker(args) else: diff --git a/osaca/parser/parser_AArch64v81.py b/osaca/parser/parser_AArch64v81.py index 8aa4585..56abb98 100755 --- a/osaca/parser/parser_AArch64v81.py +++ b/osaca/parser/parser_AArch64v81.py @@ -9,6 +9,7 @@ from osaca.parser import AttrDict, BaseParser class ParserAArch64v81(BaseParser): def __init__(self): super().__init__() + self.isa = 'aarch64' def construct_parser(self): # Comment diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 15c0da1..7063721 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -8,6 +8,7 @@ from osaca.parser import AttrDict, BaseParser class ParserX86ATT(BaseParser): def __init__(self): super().__init__() + self.isa = 'x86' def construct_parser(self): decimal_number = pp.Combine( @@ -67,8 +68,13 @@ class ParserX86ATT(BaseParser): directive_option = pp.Combine( pp.Word('#@.', exact=1) + pp.Word(pp.printables, excludeChars=',') ) - directive_parameter = (pp.quotedString | directive_option | identifier | hex_number | - decimal_number | self.register + directive_parameter = ( + pp.quotedString + | directive_option + | identifier + | hex_number + | decimal_number + | self.register ) commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',') self.directive = pp.Group( @@ -172,9 +178,10 @@ class ParserX86ATT(BaseParser): if result is None: try: result = self.parse_instruction(line) - except pp.ParseException as e: - raise ValueError('Could not parse instruction on line {}: {!r}'.format( - line_number, line)) + except pp.ParseException: + raise ValueError( + 'Could not parse instruction on line {}: {!r}'.format(line_number, line) + ) instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID] instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID] instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID] @@ -202,8 +209,9 @@ class ParserX86ATT(BaseParser): { self.INSTRUCTION_ID: result['mnemonic'], self.OPERANDS_ID: operands, - self.COMMENT_ID: - ' '.join(result[self.COMMENT_ID]) if self.COMMENT_ID in result else None, + self.COMMENT_ID: ' '.join(result[self.COMMENT_ID]) + if self.COMMENT_ID in result + else None, } ) return return_dict diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index ac53f2c..08bc989 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -189,9 +189,10 @@ class MachineModel(object): # Replace instruction form's port_pressure with styled version for RoundtripDumper formatted_instruction_forms = deepcopy(self._data['instruction_forms']) for instruction_form in formatted_instruction_forms: - cs = ruamel.yaml.comments.CommentedSeq(instruction_form['port_pressure']) - cs.fa.set_flow_style() - instruction_form['port_pressure'] = cs + if instruction_form['port_pressure'] is not None: + cs = ruamel.yaml.comments.CommentedSeq(instruction_form['port_pressure']) + cs.fa.set_flow_style() + instruction_form['port_pressure'] = cs # Replace load_throughput with styled version for RoundtripDumper formatted_load_throughput = [] diff --git a/osaca/semantics/semantics_appender.py b/osaca/semantics/semantics_appender.py index 0c92272..2116ad5 100755 --- a/osaca/semantics/semantics_appender.py +++ b/osaca/semantics/semantics_appender.py @@ -319,14 +319,14 @@ class SemanticsAppender(object): def _get_regular_source_x86ATT(self, instruction_form): # return all but last operand sources = [ - op for op in instruction_form['operands'][0 : len(instruction_form['operands']) - 1] + op for op in instruction_form['operands'][0:len(instruction_form['operands']) - 1] ] return sources def _get_regular_source_AArch64(self, instruction_form): # return all but first operand sources = [ - op for op in instruction_form['operands'][1 : len(instruction_form['operands'])] + op for op in instruction_form['operands'][1:len(instruction_form['operands'])] ] return sources