From 530ad8484e49a2ba3fb6d7b321a1bafb4be976cd Mon Sep 17 00:00:00 2001
From: Julian Hammer <julian.hammer@fau.de>
Date: Tue, 28 Jan 2020 17:24:00 +0100
Subject: [PATCH] frontend returns strings; added helper function to calc.
 unmatched ratio

---
 osaca/api/kerncraft_interface.py |   4 +-
 osaca/frontend.py                | 129 +++++++++++++++----------------
 osaca/osaca.py                   |  16 ++--
 osaca/semantics/marker_utils.py  |   4 +-
 tests/test_frontend.py           |   8 +-
 tests/test_marker_utils.py       |  34 ++++----
 6 files changed, 100 insertions(+), 95 deletions(-)

diff --git a/osaca/api/kerncraft_interface.py b/osaca/api/kerncraft_interface.py
index 47a8d69..aceb23c 100755
--- a/osaca/api/kerncraft_interface.py
+++ b/osaca/api/kerncraft_interface.py
@@ -40,9 +40,7 @@ class KerncraftAPI(object):
     def create_output(self, verbose=False):
         kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model)
         frontend = Frontend(arch=self.machine_model.get_arch())
-        with Capturing() as output:
-            frontend.print_full_analysis(self.kernel, kernel_graph, verbose=verbose)
-        return '\n'.join(output)
+        return frontend.full_analysis(self.kernel, kernel_graph, verbose=verbose)
 
     def get_unmatched_instruction_ratio(self):
         unmatched_counter = 0
diff --git a/osaca/frontend.py b/osaca/frontend.py
index cc69c56..61aae90 100755
--- a/osaca/frontend.py
+++ b/osaca/frontend.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Frontend interface for OSACA. Does everything necessary for printing analysis to the terminal.
+Frontend interface for OSACA. Does everything necessary for analysis report generation.
 """
 import re
 from datetime import datetime as dt
@@ -42,11 +42,11 @@ class Frontend(object):
         """
         return instruction_form['comment'] is not None and instruction_form['instruction'] is None
 
-    def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
+    def throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
         """
-        Print throughput analysis only.
+        Build throughput analysis only.
 
-        :param kernel: Kernel to print throughput analysis for.
+        :param kernel: Kernel to build throughput analysis for.
         :type kernel: list
         :param show_lineno: flag for showing the line number of instructions, defaults to `False`
         :type show_lineno: bool, optional
@@ -62,10 +62,10 @@ class Frontend(object):
         headline = 'Port pressure in cycles'
         headline_str = '{{:^{}}}'.format(len(separator))
 
-        print('\n\nThroughput Analysis Report\n' + '--------------------------')
-        print(headline_str.format(headline))
-        print(lineno_filler + self._get_port_number_line(port_len))
-        print(separator)
+        s = '\n\nThroughput Analysis Report\n--------------------------\n'
+        s += headline_str.format(headline) + '\n'
+        s += lineno_filler + self._get_port_number_line(port_len) + '\n'
+        s += separator + '\n'
         for instruction_form in kernel:
             line = '{:4d} {} {} {}'.format(
                 instruction_form['line_number'],
@@ -80,23 +80,24 @@ class Frontend(object):
             line = line if show_lineno else col_sep + col_sep.join(line.split(col_sep)[1:])
             if show_cmnts is False and self._is_comment(instruction_form):
                 continue
-            print(line)
-        print()
+            s += line + '\n'
+        s += '\n'
         tp_sum = ArchSemantics.get_throughput_sum(kernel)
-        print(lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' '))
+        s += lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' ') + '\n'
+        return s
 
-    def print_latency_analysis(self, cp_kernel, separator='|'):
+    def latency_analysis(self, cp_kernel, separator='|'):
         """
-        Print a list-based CP analysis to the terminal.
+        Build a list-based CP analysis report.
 
         :param cp_kernel: loop kernel containing the CP information for each instruction form
         :type cp_kernel: list
         :separator: separator symbol for the columns, defaults to '|'
         :type separator: str, optional
         """
-        print('\n\nLatency Analysis Report\n' + '-----------------------')
+        s = '\n\nLatency Analysis Report\n-----------------------\n'
         for instruction_form in cp_kernel:
-            print(
+            s += (
                 '{:4d} {} {:4.1f} {}{}{} {}'.format(
                     instruction_form['line_number'],
                     separator,
@@ -106,16 +107,17 @@ class Frontend(object):
                     separator,
                     instruction_form['line'],
                 )
-            )
-        print(
+            ) + '\n'
+        s += (
             '\n{:4} {} {:4.1f}'.format(
                 ' ' * max([len(str(instr_form['line_number'])) for instr_form in cp_kernel]),
                 ' ' * len(separator),
                 sum([instr_form['latency_cp'] for instr_form in cp_kernel]),
             )
-        )
+        ) + '\n'
+        return s
 
-    def print_loopcarried_dependencies(self, dep_dict, separator='|'):
+    def loopcarried_dependencies(self, dep_dict, separator='|'):
         """
         Print a list-based LCD analysis to the terminal.
 
@@ -124,13 +126,13 @@ class Frontend(object):
         :separator: separator symbol for the columns, defaults to '|'
         :type separator: str, optional
         """
-        print(
+        s = (
             '\n\nLoop-Carried Dependencies Analysis Report\n'
-            + '-----------------------------------------'
+            + '-----------------------------------------\n'
         )
         # TODO find a way to overcome padding for different tab-lengths
         for dep in dep_dict:
-            print(
+            s += (
                 '{:4d} {} {:4.1f} {} {:36}{} {}'.format(
                     dep,
                     separator,
@@ -143,15 +145,16 @@ class Frontend(object):
                     [node['line_number'] for node in dep_dict[dep]['dependencies']],
                 )
             )
+        return s
 
-    def print_full_analysis(
+    def full_analysis(
         self, kernel, kernel_dg: KernelDG, ignore_unknown=False, verbose=False
     ):
         """
-        Prints the full analysis report including header, the symbol map, the combined TP/CP/LCD
+        Build the full analysis report including header, the symbol map, the combined TP/CP/LCD
         view and the list based LCD view.
 
-        :param kernel: kernel to print
+        :param kernel: kernel to report on
         :type kernel: list
         :param kernel_dg: directed graph containing CP and LCD
         :type kernel_dg: :class:`~osaca.semantics.KernelDG`
@@ -161,24 +164,24 @@ class Frontend(object):
         :param verbose: flag for verbosity level, defaults to False
         :type verbose: boolean, optional
         """
-        self._print_header_report()
-        self._print_symbol_map()
-        self.print_combined_view(
-            kernel,
-            kernel_dg.get_critical_path(),
-            kernel_dg.get_loopcarried_dependencies(),
-            ignore_unknown,
-        )
-        self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())
+        return (
+            self._header_report() +
+            self._symbol_map() +
+            self.combined_view(
+                kernel,
+                kernel_dg.get_critical_path(),
+                kernel_dg.get_loopcarried_dependencies(),
+                ignore_unknown) +
+            self.loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies()))
 
-    def print_combined_view(
+    def combined_view(
         self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True
     ):
         """
-        Prints the combined view of the kernel including the port pressure (TP), a CP column and a
+        Build combined view of kernel including port pressure (TP), a CP column and a
         LCD column.
 
-        :param kernel: kernel to print
+        :param kernel: kernel to report on
         :type kernel: list
         :param kernel_dg: directed graph containing CP and LCD
         :type kernel_dg: :class:`~osaca.semantics.KernelDG`
@@ -190,7 +193,7 @@ class Frontend(object):
         :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
         :type show_cmnts: bool, optional
         """
-        print('\n\nCombined Analysis Report\n' + '------------------------')
+        s = '\n\nCombined Analysis Report\n------------------------\n'
         lineno_filler = '     '
         port_len = self._get_max_port_len(kernel)
         # Separator for ports
@@ -216,20 +219,19 @@ class Frontend(object):
             longest_lcd = [line_no for line_no in sums if sums[line_no] == lcd_sum][0]
             lcd_lines = [d['line_number'] for d in dep_dict[longest_lcd]['dependencies']]
 
-        print(headline_str.format(headline))
-        print(
+        s += headline_str.format(headline) + '\n'
+        s += (
             lineno_filler
             + self._get_port_number_line(port_len, separator=col_sep)
             + '{}{:^6}{}{:^6}{}'.format(col_sep, 'CP', col_sep, 'LCD', col_sep)
-        )
-        print(separator)
+        ) + '\n' + separator + '\n'
         for instruction_form in kernel:
             if show_cmnts is False and self._is_comment(instruction_form):
                 continue
             line_number = instruction_form['line_number']
             used_ports = [list(uops[1]) for uops in instruction_form['port_uops']]
             used_ports = list(set([p for uops_ports in used_ports for p in uops_ports]))
-            line = '{:4d} {}{} {} {}'.format(
+            s +=  '{:4d} {}{} {} {}\n'.format(
                 line_number,
                 self._get_port_pressure(
                     instruction_form['port_pressure'], port_len, used_ports, sep_list
@@ -244,8 +246,7 @@ class Frontend(object):
                 else ' ',
                 instruction_form['line'].strip(),
             )
-            print(line)
-        print()
+        s += '\n'
         # check for unknown instructions and throw warning if called without --ignore-unknown
         if not ignore_unknown and INSTR_FLAGS.TP_UNKWN in [
             flag for instr in kernel for flag in instr['flags']
@@ -253,33 +254,31 @@ class Frontend(object):
             num_missing = len(
                 [instr['flags'] for instr in kernel if INSTR_FLAGS.TP_UNKWN in instr['flags']]
             )
-            self._print_missing_instruction_error(num_missing)
+            self._missing_instruction_error(num_missing)
         else:
             # lcd_sum already calculated before
             tp_sum = ArchSemantics.get_throughput_sum(kernel)
             cp_sum = sum([x['latency_cp'] for x in cp_kernel])
-            print(
+            s += (
                 lineno_filler
                 + self._get_port_pressure(tp_sum, port_len, separator=' ')
-                + ' {:^6} {:^6}'.format(cp_sum, lcd_sum)
+                + ' {:^6} {:^6}\n'.format(cp_sum, lcd_sum)
             )
+        return s
 
     ####################
     # HELPER FUNCTIONS
     ####################
 
-    def _print_missing_instruction_error(self, amount):
-        print(
-            (
-                '------------------ WARNING: The performance data for {} instructions is missing.'
-                '------------------\n'
-                '                     No final analysis is given. If you want to ignore this\n'
-                '                     warning and run the analysis anyway, start osaca with\n'
-                '                                       --ignore_unknown flag.\n'
-                '--------------------------------------------------------------------------------'
-                '----------------{}'
-            ).format(amount, '-' * len(str(amount)))
-        )
+    def _missing_instruction_error(self, amount):
+        s = (
+            '------------------ WARNING: The performance data for {} instructions is missing.'
+            '------------------\n'
+            '                     No final analysis is given. If you want to ignore this\n'
+            '                     warning and run the analysis anyway, start osaca with\n'
+            '                                       --ignore_unknown flag.\n'
+            '--------------------------------------------------------------------------------'
+            '----------------{}\n').format(amount, '-' * len(str(amount)))
 
     def _get_separator_list(self, separator, separator_2=' '):
         """Creates column view for seperators in the TP/combined view."""
@@ -352,7 +351,7 @@ class Frontend(object):
             string_result += substr.format(self._machine_model.get_ports()[i]) + separator_list[i]
         return string_result
 
-    def _print_header_report(self):
+    def _header_report(self):
         """Prints header information"""
         version = 'v0.3'
         adjust = 20
@@ -363,9 +362,9 @@ class Frontend(object):
         header += 'Timestamp:'.ljust(adjust) + '{}\n'.format(
             dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
         )
-        print(header)
+        return header + '\n'
 
-    def _print_symbol_map(self):
+    def _symbol_map(self):
         """Prints instruction flag map."""
         symbol_dict = {
             INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port',
@@ -378,7 +377,7 @@ class Frontend(object):
         for flag in sorted(symbol_dict.keys()):
             symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]), symbol_dict[flag])
 
-        print(symbol_map, end='')
+        return symbol_map
 
-    def _print_port_binding_summary(self):
+    def _port_binding_summary(self):
         raise NotImplementedError
diff --git a/osaca/osaca.py b/osaca/osaca.py
index 56f90c8..1a0ecb5 100755
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -10,7 +10,7 @@ from osaca.db_interface import import_benchmark_output, sanity_check
 from osaca.frontend import Frontend
 from osaca.parser import BaseParser, ParserAArch64v81, ParserX86ATT
 from osaca.semantics import (ArchSemantics, KernelDG, MachineModel,
-                             reduce_to_section)
+                             reduce_to_section, INSTR_FLAGS)
 
 MODULE_DATA_DIR = os.path.join(
     os.path.dirname(os.path.split(os.path.abspath(__file__))[0]), 'osaca/data/'
@@ -231,9 +231,9 @@ def inspect(args):
         kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None)
     # Print analysis
     frontend = Frontend(args.file.name, arch=arch)
-    frontend.print_full_analysis(
+    print(frontend.full_analysis(
         kernel, kernel_graph, ignore_unknown=ignore_unknown, verbose=verbose
-    )
+    ))
 
 
 def run(args, output_file=sys.stdout):
@@ -258,7 +258,6 @@ def run(args, output_file=sys.stdout):
         inspect(args)
 
 
-# ---------------------------------------------------
 def get_asm_parser(arch) -> BaseParser:
     """
     Helper function to create the right parser for a specific architecture.
@@ -274,7 +273,14 @@ def get_asm_parser(arch) -> BaseParser:
         return ParserAArch64v81()
 
 
-# ---------------------------------------------------
+def get_unmatched_instruction_ratio(kernel):
+    """Return ratio of unmatched from total instructions in kernel."""
+    unmatched_counter = 0
+    for instruction in kernel:
+        if INSTR_FLAGS.TP_UNKWN in instruction['flags'] and \
+                INSTR_FLAGS.LT_UNKWN in instruction['flags']:
+            unmatched_counter += 1
+    return unmatched_counter / len(kernel)
 
 
 def main():
diff --git a/osaca/semantics/marker_utils.py b/osaca/semantics/marker_utils.py
index 9cd5271..11127ce 100755
--- a/osaca/semantics/marker_utils.py
+++ b/osaca/semantics/marker_utils.py
@@ -201,7 +201,7 @@ def find_basic_blocks(lines):
     # an instruction referencing a valid jump label
     blocks = OrderedDict()
     for label, label_line_idx in valid_jump_labels.items():
-        blocks[label] = []
+        blocks[label] = [lines[label_line_idx]]
         for line in lines[label_line_idx + 1 :]:
             terminate = False
             blocks[label].append(line)
@@ -230,7 +230,7 @@ def find_basic_loop_bodies(lines):
     # an instruction referencing a valid jump label
     loop_bodies = OrderedDict()
     for label, label_line_idx in valid_jump_labels.items():
-        current_block = []
+        current_block = [lines[label_line_idx]]
         for line in lines[label_line_idx + 1 :]:
             terminate = False
             current_block.append(line)
diff --git a/tests/test_frontend.py b/tests/test_frontend.py
index d4863f8..8bfd963 100755
--- a/tests/test_frontend.py
+++ b/tests/test_frontend.py
@@ -67,13 +67,15 @@ class TestFrontend(unittest.TestCase):
     def test_frontend_x86(self):
         dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
         fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml'))
-        fe.print_throughput_analysis(self.kernel_x86, show_cmnts=False)
-        fe.print_latency_analysis(dg.get_critical_path())
+        fe.throughput_analysis(self.kernel_x86, show_cmnts=False)
+        fe.latency_analysis(dg.get_critical_path())
+        # TODO compare output with checked string
 
     def test_frontend_AArch64(self):
         dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2)
         fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'tx2.yml'))
-        fe.print_full_analysis(self.kernel_AArch64, dg, verbose=True)
+        fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
+        # TODO compare output with checked string
 
     ##################
     # Helper functions
diff --git a/tests/test_marker_utils.py b/tests/test_marker_utils.py
index e5bac0f..fc5e7f6 100755
--- a/tests/test_marker_utils.py
+++ b/tests/test_marker_utils.py
@@ -317,36 +317,36 @@ class TestMarkerUtils(unittest.TestCase):
         self.assertEqual(
             [(k, v[0]['line_number'], v[-1]['line_number'])
              for k, v in find_basic_blocks(self.parsed_x86).items()],
-            [('.LFB24', 12, 56), ('.L4', 67, 74), ('.L3', 81, 89), ('.L2', 104, 112),
-             ('.L13', 113, 121), ('.L12', 122, 125), ('.L6', 134, 135), ('.L10', 147, 154),
-             ('.L9', 163, 170), ('.L8', 185, 187), ('.L15', 254, 256), ('.L26', 258, 259),
-             ('.L14', 261, 262), ('.LFB25', 279, 290), ('.L28', 291, 300)])
+            [('.LFB24', 11, 56), ('.L4', 66, 74), ('.L3', 80, 89), ('.L2', 103, 112),
+             ('.L13', 112, 121), ('.L12', 121, 125), ('.L6', 133, 135), ('.L10', 146, 154),
+             ('.L9', 162, 170), ('.L8', 184, 187), ('.L15', 253, 256), ('.L26', 257, 259),
+             ('.L14', 260, 262), ('.LFB25', 278, 290), ('.L28', 290, 300)])
 
         self.assertEqual(
             [(k, v[0]['line_number'], v[-1]['line_number'])
              for k, v in find_basic_blocks(self.parsed_AArch).items()],
-            [('triad', 20, 64), ('.LBB0_3', 73, 77), ('.LBB0_4', 78, 83), ('.LBB0_5', 86, 89),
-             ('.LBB0_7', 94, 95), ('.LBB0_8', 97, 105), ('.LBB0_9', 108, 114),
-             ('.LBB0_11', 120, 134), ('.LBB0_12', 135, 173), ('.LBB0_14', 179, 191),
-             ('.LBB0_15', 192, 205), ('.LBB0_16', 207, 208), ('.LBB0_17', 210, 222),
-             ('.LBB0_18', 223, 228), ('.LBB0_19', 230, 261), ('.LBB0_20', 262, 269),
-             ('.LBB0_22', 274, 280), ('.LBB0_24', 285, 286), ('.LBB0_26', 292, 293),
-             ('.LBB0_28', 300, 307), ('.LBB0_29', 308, 444), ('.LBB0_31', 450, 459),
-             ('.LBB0_32', 460, 480), ('.LBB0_33', 482, 484), ('.LBB0_34', 486, 494),
-             ('.LBB0_35', 495, 504), ('.LBB0_36', 506, 508), ('.LBB0_37', 510, 518),
-             ('.LBB0_38', 520, 568), ('main', 576, 590)])
+            [('triad', 19, 64), ('.LBB0_3', 72, 77), ('.LBB0_4', 77, 83), ('.LBB0_5', 85, 89),
+             ('.LBB0_7', 93, 95), ('.LBB0_8', 96, 105), ('.LBB0_9', 107, 114),
+             ('.LBB0_11', 119, 134), ('.LBB0_12', 134, 173), ('.LBB0_14', 178, 191),
+             ('.LBB0_15', 191, 205), ('.LBB0_16', 206, 208), ('.LBB0_17', 209, 222),
+             ('.LBB0_18', 222, 228), ('.LBB0_19', 229, 261), ('.LBB0_20', 261, 269),
+             ('.LBB0_22', 273, 280), ('.LBB0_24', 284, 286), ('.LBB0_26', 291, 293),
+             ('.LBB0_28', 299, 307), ('.LBB0_29', 307, 444), ('.LBB0_31', 449, 459),
+             ('.LBB0_32', 459, 480), ('.LBB0_33', 481, 484), ('.LBB0_34', 485, 494),
+             ('.LBB0_35', 494, 504), ('.LBB0_36', 505, 508), ('.LBB0_37', 509, 518),
+             ('.LBB0_38', 519, 568), ('main', 575, 590)])
 
     def test_find_basic_loop_body(self):
         self.assertEqual(
             [(k, v[0]['line_number'], v[-1]['line_number'])
              for k, v in find_basic_loop_bodies(self.parsed_x86).items()],
-            [('.L4', 67, 74), ('.L10', 147, 154), ('.L28', 291, 300)])
+            [('.L4', 66, 74), ('.L10', 146, 154), ('.L28', 290, 300)])
 
         self.assertEqual(
             [(k, v[0]['line_number'], v[-1]['line_number'])
              for k, v in find_basic_loop_bodies(self.parsed_AArch).items()],
-            [('.LBB0_12', 135, 173), ('.LBB0_15', 192, 205), ('.LBB0_18', 223, 228),
-             ('.LBB0_29', 308, 444), ('.LBB0_32', 460, 480), ('.LBB0_35', 495, 504)])
+            [('.LBB0_12', 134, 173), ('.LBB0_15', 191, 205), ('.LBB0_18', 222, 228),
+             ('.LBB0_29', 307, 444), ('.LBB0_32', 459, 480), ('.LBB0_35', 494, 504)])
 
     ##################
     # Helper functions