From 530ad8484e49a2ba3fb6d7b321a1bafb4be976cd Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Tue, 28 Jan 2020 17:24:00 +0100 Subject: [PATCH] frontend returns strings; added helper function to calc. unmatched ratio --- osaca/api/kerncraft_interface.py | 4 +- osaca/frontend.py | 129 +++++++++++++++---------------- osaca/osaca.py | 16 ++-- osaca/semantics/marker_utils.py | 4 +- tests/test_frontend.py | 8 +- tests/test_marker_utils.py | 34 ++++---- 6 files changed, 100 insertions(+), 95 deletions(-) diff --git a/osaca/api/kerncraft_interface.py b/osaca/api/kerncraft_interface.py index 47a8d69..aceb23c 100755 --- a/osaca/api/kerncraft_interface.py +++ b/osaca/api/kerncraft_interface.py @@ -40,9 +40,7 @@ class KerncraftAPI(object): def create_output(self, verbose=False): kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model) frontend = Frontend(arch=self.machine_model.get_arch()) - with Capturing() as output: - frontend.print_full_analysis(self.kernel, kernel_graph, verbose=verbose) - return '\n'.join(output) + return frontend.full_analysis(self.kernel, kernel_graph, verbose=verbose) def get_unmatched_instruction_ratio(self): unmatched_counter = 0 diff --git a/osaca/frontend.py b/osaca/frontend.py index cc69c56..61aae90 100755 --- a/osaca/frontend.py +++ b/osaca/frontend.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Frontend interface for OSACA. Does everything necessary for printing analysis to the terminal. +Frontend interface for OSACA. Does everything necessary for analysis report generation. """ import re from datetime import datetime as dt @@ -42,11 +42,11 @@ class Frontend(object): """ return instruction_form['comment'] is not None and instruction_form['instruction'] is None - def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True): + def throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True): """ - Print throughput analysis only. + Build throughput analysis only. - :param kernel: Kernel to print throughput analysis for. + :param kernel: Kernel to build throughput analysis for. :type kernel: list :param show_lineno: flag for showing the line number of instructions, defaults to `False` :type show_lineno: bool, optional @@ -62,10 +62,10 @@ class Frontend(object): headline = 'Port pressure in cycles' headline_str = '{{:^{}}}'.format(len(separator)) - print('\n\nThroughput Analysis Report\n' + '--------------------------') - print(headline_str.format(headline)) - print(lineno_filler + self._get_port_number_line(port_len)) - print(separator) + s = '\n\nThroughput Analysis Report\n--------------------------\n' + s += headline_str.format(headline) + '\n' + s += lineno_filler + self._get_port_number_line(port_len) + '\n' + s += separator + '\n' for instruction_form in kernel: line = '{:4d} {} {} {}'.format( instruction_form['line_number'], @@ -80,23 +80,24 @@ class Frontend(object): line = line if show_lineno else col_sep + col_sep.join(line.split(col_sep)[1:]) if show_cmnts is False and self._is_comment(instruction_form): continue - print(line) - print() + s += line + '\n' + s += '\n' tp_sum = ArchSemantics.get_throughput_sum(kernel) - print(lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' ')) + s += lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' ') + '\n' + return s - def print_latency_analysis(self, cp_kernel, separator='|'): + def latency_analysis(self, cp_kernel, separator='|'): """ - Print a list-based CP analysis to the terminal. + Build a list-based CP analysis report. :param cp_kernel: loop kernel containing the CP information for each instruction form :type cp_kernel: list :separator: separator symbol for the columns, defaults to '|' :type separator: str, optional """ - print('\n\nLatency Analysis Report\n' + '-----------------------') + s = '\n\nLatency Analysis Report\n-----------------------\n' for instruction_form in cp_kernel: - print( + s += ( '{:4d} {} {:4.1f} {}{}{} {}'.format( instruction_form['line_number'], separator, @@ -106,16 +107,17 @@ class Frontend(object): separator, instruction_form['line'], ) - ) - print( + ) + '\n' + s += ( '\n{:4} {} {:4.1f}'.format( ' ' * max([len(str(instr_form['line_number'])) for instr_form in cp_kernel]), ' ' * len(separator), sum([instr_form['latency_cp'] for instr_form in cp_kernel]), ) - ) + ) + '\n' + return s - def print_loopcarried_dependencies(self, dep_dict, separator='|'): + def loopcarried_dependencies(self, dep_dict, separator='|'): """ Print a list-based LCD analysis to the terminal. @@ -124,13 +126,13 @@ class Frontend(object): :separator: separator symbol for the columns, defaults to '|' :type separator: str, optional """ - print( + s = ( '\n\nLoop-Carried Dependencies Analysis Report\n' - + '-----------------------------------------' + + '-----------------------------------------\n' ) # TODO find a way to overcome padding for different tab-lengths for dep in dep_dict: - print( + s += ( '{:4d} {} {:4.1f} {} {:36}{} {}'.format( dep, separator, @@ -143,15 +145,16 @@ class Frontend(object): [node['line_number'] for node in dep_dict[dep]['dependencies']], ) ) + return s - def print_full_analysis( + def full_analysis( self, kernel, kernel_dg: KernelDG, ignore_unknown=False, verbose=False ): """ - Prints the full analysis report including header, the symbol map, the combined TP/CP/LCD + Build the full analysis report including header, the symbol map, the combined TP/CP/LCD view and the list based LCD view. - :param kernel: kernel to print + :param kernel: kernel to report on :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` @@ -161,24 +164,24 @@ class Frontend(object): :param verbose: flag for verbosity level, defaults to False :type verbose: boolean, optional """ - self._print_header_report() - self._print_symbol_map() - self.print_combined_view( - kernel, - kernel_dg.get_critical_path(), - kernel_dg.get_loopcarried_dependencies(), - ignore_unknown, - ) - self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies()) + return ( + self._header_report() + + self._symbol_map() + + self.combined_view( + kernel, + kernel_dg.get_critical_path(), + kernel_dg.get_loopcarried_dependencies(), + ignore_unknown) + + self.loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())) - def print_combined_view( + def combined_view( self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True ): """ - Prints the combined view of the kernel including the port pressure (TP), a CP column and a + Build combined view of kernel including port pressure (TP), a CP column and a LCD column. - :param kernel: kernel to print + :param kernel: kernel to report on :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` @@ -190,7 +193,7 @@ class Frontend(object): :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True` :type show_cmnts: bool, optional """ - print('\n\nCombined Analysis Report\n' + '------------------------') + s = '\n\nCombined Analysis Report\n------------------------\n' lineno_filler = ' ' port_len = self._get_max_port_len(kernel) # Separator for ports @@ -216,20 +219,19 @@ class Frontend(object): longest_lcd = [line_no for line_no in sums if sums[line_no] == lcd_sum][0] lcd_lines = [d['line_number'] for d in dep_dict[longest_lcd]['dependencies']] - print(headline_str.format(headline)) - print( + s += headline_str.format(headline) + '\n' + s += ( lineno_filler + self._get_port_number_line(port_len, separator=col_sep) + '{}{:^6}{}{:^6}{}'.format(col_sep, 'CP', col_sep, 'LCD', col_sep) - ) - print(separator) + ) + '\n' + separator + '\n' for instruction_form in kernel: if show_cmnts is False and self._is_comment(instruction_form): continue line_number = instruction_form['line_number'] used_ports = [list(uops[1]) for uops in instruction_form['port_uops']] used_ports = list(set([p for uops_ports in used_ports for p in uops_ports])) - line = '{:4d} {}{} {} {}'.format( + s += '{:4d} {}{} {} {}\n'.format( line_number, self._get_port_pressure( instruction_form['port_pressure'], port_len, used_ports, sep_list @@ -244,8 +246,7 @@ class Frontend(object): else ' ', instruction_form['line'].strip(), ) - print(line) - print() + s += '\n' # check for unknown instructions and throw warning if called without --ignore-unknown if not ignore_unknown and INSTR_FLAGS.TP_UNKWN in [ flag for instr in kernel for flag in instr['flags'] @@ -253,33 +254,31 @@ class Frontend(object): num_missing = len( [instr['flags'] for instr in kernel if INSTR_FLAGS.TP_UNKWN in instr['flags']] ) - self._print_missing_instruction_error(num_missing) + self._missing_instruction_error(num_missing) else: # lcd_sum already calculated before tp_sum = ArchSemantics.get_throughput_sum(kernel) cp_sum = sum([x['latency_cp'] for x in cp_kernel]) - print( + s += ( lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' ') - + ' {:^6} {:^6}'.format(cp_sum, lcd_sum) + + ' {:^6} {:^6}\n'.format(cp_sum, lcd_sum) ) + return s #################### # HELPER FUNCTIONS #################### - def _print_missing_instruction_error(self, amount): - print( - ( - '------------------ WARNING: The performance data for {} instructions is missing.' - '------------------\n' - ' No final analysis is given. If you want to ignore this\n' - ' warning and run the analysis anyway, start osaca with\n' - ' --ignore_unknown flag.\n' - '--------------------------------------------------------------------------------' - '----------------{}' - ).format(amount, '-' * len(str(amount))) - ) + def _missing_instruction_error(self, amount): + s = ( + '------------------ WARNING: The performance data for {} instructions is missing.' + '------------------\n' + ' No final analysis is given. If you want to ignore this\n' + ' warning and run the analysis anyway, start osaca with\n' + ' --ignore_unknown flag.\n' + '--------------------------------------------------------------------------------' + '----------------{}\n').format(amount, '-' * len(str(amount))) def _get_separator_list(self, separator, separator_2=' '): """Creates column view for seperators in the TP/combined view.""" @@ -352,7 +351,7 @@ class Frontend(object): string_result += substr.format(self._machine_model.get_ports()[i]) + separator_list[i] return string_result - def _print_header_report(self): + def _header_report(self): """Prints header information""" version = 'v0.3' adjust = 20 @@ -363,9 +362,9 @@ class Frontend(object): header += 'Timestamp:'.ljust(adjust) + '{}\n'.format( dt.utcnow().strftime('%Y-%m-%d %H:%M:%S') ) - print(header) + return header + '\n' - def _print_symbol_map(self): + def _symbol_map(self): """Prints instruction flag map.""" symbol_dict = { INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port', @@ -378,7 +377,7 @@ class Frontend(object): for flag in sorted(symbol_dict.keys()): symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]), symbol_dict[flag]) - print(symbol_map, end='') + return symbol_map - def _print_port_binding_summary(self): + def _port_binding_summary(self): raise NotImplementedError diff --git a/osaca/osaca.py b/osaca/osaca.py index 56f90c8..1a0ecb5 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -10,7 +10,7 @@ from osaca.db_interface import import_benchmark_output, sanity_check from osaca.frontend import Frontend from osaca.parser import BaseParser, ParserAArch64v81, ParserX86ATT from osaca.semantics import (ArchSemantics, KernelDG, MachineModel, - reduce_to_section) + reduce_to_section, INSTR_FLAGS) MODULE_DATA_DIR = os.path.join( os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/' @@ -231,9 +231,9 @@ def inspect(args): kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None) # Print analysis frontend = Frontend(args.file.name, arch=arch) - frontend.print_full_analysis( + print(frontend.full_analysis( kernel, kernel_graph, ignore_unknown=ignore_unknown, verbose=verbose - ) + )) def run(args, output_file=sys.stdout): @@ -258,7 +258,6 @@ def run(args, output_file=sys.stdout): inspect(args) -# --------------------------------------------------- def get_asm_parser(arch) -> BaseParser: """ Helper function to create the right parser for a specific architecture. @@ -274,7 +273,14 @@ def get_asm_parser(arch) -> BaseParser: return ParserAArch64v81() -# --------------------------------------------------- +def get_unmatched_instruction_ratio(kernel): + """Return ratio of unmatched from total instructions in kernel.""" + unmatched_counter = 0 + for instruction in kernel: + if INSTR_FLAGS.TP_UNKWN in instruction['flags'] and \ + INSTR_FLAGS.LT_UNKWN in instruction['flags']: + unmatched_counter += 1 + return unmatched_counter / len(kernel) def main(): diff --git a/osaca/semantics/marker_utils.py b/osaca/semantics/marker_utils.py index 9cd5271..11127ce 100755 --- a/osaca/semantics/marker_utils.py +++ b/osaca/semantics/marker_utils.py @@ -201,7 +201,7 @@ def find_basic_blocks(lines): # an instruction referencing a valid jump label blocks = OrderedDict() for label, label_line_idx in valid_jump_labels.items(): - blocks[label] = [] + blocks[label] = [lines[label_line_idx]] for line in lines[label_line_idx + 1 :]: terminate = False blocks[label].append(line) @@ -230,7 +230,7 @@ def find_basic_loop_bodies(lines): # an instruction referencing a valid jump label loop_bodies = OrderedDict() for label, label_line_idx in valid_jump_labels.items(): - current_block = [] + current_block = [lines[label_line_idx]] for line in lines[label_line_idx + 1 :]: terminate = False current_block.append(line) diff --git a/tests/test_frontend.py b/tests/test_frontend.py index d4863f8..8bfd963 100755 --- a/tests/test_frontend.py +++ b/tests/test_frontend.py @@ -67,13 +67,15 @@ class TestFrontend(unittest.TestCase): def test_frontend_x86(self): dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx) fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')) - fe.print_throughput_analysis(self.kernel_x86, show_cmnts=False) - fe.print_latency_analysis(dg.get_critical_path()) + fe.throughput_analysis(self.kernel_x86, show_cmnts=False) + fe.latency_analysis(dg.get_critical_path()) + # TODO compare output with checked string def test_frontend_AArch64(self): dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2) fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml')) - fe.print_full_analysis(self.kernel_AArch64, dg, verbose=True) + fe.full_analysis(self.kernel_AArch64, dg, verbose=True) + # TODO compare output with checked string ################## # Helper functions diff --git a/tests/test_marker_utils.py b/tests/test_marker_utils.py index e5bac0f..fc5e7f6 100755 --- a/tests/test_marker_utils.py +++ b/tests/test_marker_utils.py @@ -317,36 +317,36 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( [(k, v[0]['line_number'], v[-1]['line_number']) for k, v in find_basic_blocks(self.parsed_x86).items()], - [('.LFB24', 12, 56), ('.L4', 67, 74), ('.L3', 81, 89), ('.L2', 104, 112), - ('.L13', 113, 121), ('.L12', 122, 125), ('.L6', 134, 135), ('.L10', 147, 154), - ('.L9', 163, 170), ('.L8', 185, 187), ('.L15', 254, 256), ('.L26', 258, 259), - ('.L14', 261, 262), ('.LFB25', 279, 290), ('.L28', 291, 300)]) + [('.LFB24', 11, 56), ('.L4', 66, 74), ('.L3', 80, 89), ('.L2', 103, 112), + ('.L13', 112, 121), ('.L12', 121, 125), ('.L6', 133, 135), ('.L10', 146, 154), + ('.L9', 162, 170), ('.L8', 184, 187), ('.L15', 253, 256), ('.L26', 257, 259), + ('.L14', 260, 262), ('.LFB25', 278, 290), ('.L28', 290, 300)]) self.assertEqual( [(k, v[0]['line_number'], v[-1]['line_number']) for k, v in find_basic_blocks(self.parsed_AArch).items()], - [('triad', 20, 64), ('.LBB0_3', 73, 77), ('.LBB0_4', 78, 83), ('.LBB0_5', 86, 89), - ('.LBB0_7', 94, 95), ('.LBB0_8', 97, 105), ('.LBB0_9', 108, 114), - ('.LBB0_11', 120, 134), ('.LBB0_12', 135, 173), ('.LBB0_14', 179, 191), - ('.LBB0_15', 192, 205), ('.LBB0_16', 207, 208), ('.LBB0_17', 210, 222), - ('.LBB0_18', 223, 228), ('.LBB0_19', 230, 261), ('.LBB0_20', 262, 269), - ('.LBB0_22', 274, 280), ('.LBB0_24', 285, 286), ('.LBB0_26', 292, 293), - ('.LBB0_28', 300, 307), ('.LBB0_29', 308, 444), ('.LBB0_31', 450, 459), - ('.LBB0_32', 460, 480), ('.LBB0_33', 482, 484), ('.LBB0_34', 486, 494), - ('.LBB0_35', 495, 504), ('.LBB0_36', 506, 508), ('.LBB0_37', 510, 518), - ('.LBB0_38', 520, 568), ('main', 576, 590)]) + [('triad', 19, 64), ('.LBB0_3', 72, 77), ('.LBB0_4', 77, 83), ('.LBB0_5', 85, 89), + ('.LBB0_7', 93, 95), ('.LBB0_8', 96, 105), ('.LBB0_9', 107, 114), + ('.LBB0_11', 119, 134), ('.LBB0_12', 134, 173), ('.LBB0_14', 178, 191), + ('.LBB0_15', 191, 205), ('.LBB0_16', 206, 208), ('.LBB0_17', 209, 222), + ('.LBB0_18', 222, 228), ('.LBB0_19', 229, 261), ('.LBB0_20', 261, 269), + ('.LBB0_22', 273, 280), ('.LBB0_24', 284, 286), ('.LBB0_26', 291, 293), + ('.LBB0_28', 299, 307), ('.LBB0_29', 307, 444), ('.LBB0_31', 449, 459), + ('.LBB0_32', 459, 480), ('.LBB0_33', 481, 484), ('.LBB0_34', 485, 494), + ('.LBB0_35', 494, 504), ('.LBB0_36', 505, 508), ('.LBB0_37', 509, 518), + ('.LBB0_38', 519, 568), ('main', 575, 590)]) def test_find_basic_loop_body(self): self.assertEqual( [(k, v[0]['line_number'], v[-1]['line_number']) for k, v in find_basic_loop_bodies(self.parsed_x86).items()], - [('.L4', 67, 74), ('.L10', 147, 154), ('.L28', 291, 300)]) + [('.L4', 66, 74), ('.L10', 146, 154), ('.L28', 290, 300)]) self.assertEqual( [(k, v[0]['line_number'], v[-1]['line_number']) for k, v in find_basic_loop_bodies(self.parsed_AArch).items()], - [('.LBB0_12', 135, 173), ('.LBB0_15', 192, 205), ('.LBB0_18', 223, 228), - ('.LBB0_29', 308, 444), ('.LBB0_32', 460, 480), ('.LBB0_35', 495, 504)]) + [('.LBB0_12', 134, 173), ('.LBB0_15', 191, 205), ('.LBB0_18', 222, 228), + ('.LBB0_29', 307, 444), ('.LBB0_32', 459, 480), ('.LBB0_35', 494, 504)]) ################## # Helper functions