Merge branch 'master' of github.com:RRZE-HPC/OSACA

2026-01-05 10:40:06 +01:00 · 2019-11-13 12:52:34 +01:00
parent 4dc4323e2e 744e1d83cc
commit e7838cac54
11 changed files with 85824 additions and 57 deletions
--- a/README.rst
+++ b/README.rst
@@ -92,7 +92,7 @@ The usage of OSACA can be listed as:
  Keep in mind you have to provide a (dummy) filename in anyway.
 --import MICROBENCH
  Import a given microbenchmark output file into the corresponding architecture instruction database.
-  Define the type of microbenchmark either as "ibench", "asmbench" or "uopsinfo".
+  Define the type of microbenchmark either as "ibench" or "asmbench".
 --insert-marker
  OSACA calls the Kerncraft module for the interactively insertion of `IACA <https://software.intel.com/en-us/articles/intel-architecture-code-analyzer>`_ marker in suggested assembly blocks.
 --export-graph EXPORT_PATH
--- a/osaca/data/bdw.yml
+++ b/osaca/data/bdw.yml
--- a/osaca/data/csx.yml
+++ b/osaca/data/csx.yml
@@ -27,12 +27,12 @@ port_model_scheme: |
   |  ALU  | |  ALU  | |  LD | |  LD | |  ST | |  ALU  | |  ALU & |  | AGU |
   └-------┘ └-------┘ └-----┘ └-----┘ └-----┘ └-------┘ |  Shift |  └-----┘
   ┌-------┐ ┌-------┐ ┌-----┐ ┌-----┐         ┌-------┐ └--------┘
-   |  2ND  | |  Fast | | AGU | | AGU |         |  Fast |
-   | BRANCH| |  LEA  | └-----┘ └-----┘         |  LEA  |
-   └-------┘ └-------┘                         └-------┘
-   ┌-------┐ ┌-------┐                         ┌-------┐
-   |AVX DIV| |AVX FMA|                         |  AVX  |
-   └-------┘ └-------┘                         |  SHUF |
+   |  2ND  | |  Fast | | AGU | | AGU |         |  Fast | ┌--------┐
+   | BRANCH| |  LEA  | └-----┘ └-----┘         |  LEA  | | BRANCH |
+   └-------┘ └-------┘                         └-------┘ └--------┘
+   ┌-------┐ ┌-------┐                         ┌-------┐ 
+   |AVX DIV| |AVX FMA|                         |  AVX  | 
+   └-------┘ └-------┘                         |  SHUF | 
   ┌-------┐ ┌-------┐                         └-------┘
   |AVX FMA| |AVX MUL|                         ┌-------┐
   └-------┘ └-------┘                         |AVX-512|
@@ -50,9 +50,9 @@ port_model_scheme: |
   | Shift | |  Slow |                         |AVX-512|
   └-------┘ |  LEA  |                         |  ALU  |
   ┌-------┐ └-------┘                         └-------┘
-   |  VNNI | ┌-------┐
-   └-------┘ |  VNNI |
-             └-------┘
+   |  VNNI | ┌-------┐                         
+   └-------┘ |  VNNI |                         
+             └-------┘                         
 instruction_forms:
 - name: addsd
  operands:
--- a/osaca/data/hsw.yml
+++ b/osaca/data/hsw.yml
--- a/osaca/data/ivb.yml
+++ b/osaca/data/ivb.yml
--- a/osaca/data/skx.yml
+++ b/osaca/data/skx.yml
--- a/osaca/data/snb.yml
+++ b/osaca/data/snb.yml
--- a/osaca/db_interface.py
+++ b/osaca/db_interface.py
@@ -11,6 +11,15 @@ from osaca.semantics import MachineModel


 def sanity_check(arch: str, verbose=False):
+    """
+    Checks the database for missing TP/LT values, instructions might missing int the ISA DB and
+    duplicate instructions.
+
+    :param arch: micro-arch key to define DB to check
+    :type arch: str
+    :param verbose: verbose output flag, defaults to `False`
+    :type verbose: bool, optional
+    """
    # load arch machine model
    arch_mm = MachineModel(arch=arch)
    data = arch_mm['instruction_forms']
@@ -24,7 +33,6 @@ def sanity_check(arch: str, verbose=False):
        missing_throughput,
        missing_latency,
        missing_port_pressure,
-        wrong_port,
        suspicious_instructions,
        duplicate_instr_arch,
    ) = _check_sanity_arch_db(arch_mm, isa_mm)
@@ -36,7 +44,6 @@ def sanity_check(arch: str, verbose=False):
        missing_throughput,
        missing_latency,
        missing_port_pressure,
-        wrong_port,
        suspicious_instructions,
        duplicate_instr_arch,
        duplicate_instr_isa,
@@ -46,6 +53,16 @@ def sanity_check(arch: str, verbose=False):


 def import_benchmark_output(arch, bench_type, filepath):
+    """
+    Import benchmark results from micro-benchmarks.
+
+    :param arch: target architecture key
+    :type arch: str
+    :param bench_type: key for defining type of benchmark output
+    :type bench_type: str
+    :param filepath: filepath to the output file
+    :type filepath: str
+    """
    supported_bench_outputs = ['ibench', 'asmbench']
    assert os.path.exists(filepath)
    if bench_type not in supported_bench_outputs:
@@ -120,6 +137,7 @@ def _get_asmbench_output(input_data, isa):


 def _get_ibench_output(input_data, isa):
+    """Parse the standard output of ibench and add instructions to DB."""
    db_entries = {}
    for line in input_data:
        if 'Using frequency' in line or len(line) == 0:
@@ -242,7 +260,6 @@ def _check_sanity_arch_db(arch_mm, isa_mm):
    missing_throughput = []
    missing_latency = []
    missing_port_pressure = []
-    wrong_port = []
    suspicious_instructions = []
    duplicate_instr_arch = []

@@ -254,12 +271,9 @@ def _check_sanity_arch_db(arch_mm, isa_mm):
            missing_latency.append(instr_form)
        if instr_form['port_pressure'] is None:
            missing_port_pressure.append(instr_form)
-        else:
-            if _check_for_wrong_port(arch_mm['ports'], instr_form):
-                wrong_port.append(instr_form)
        # check entry against ISA DB
        for prefix in suspicious_prefixes:
-            if instr_form['name'].startswith(prefix):
+            if instr_form['name'].lower().startswith(prefix):
                # check if instruction in ISA DB
                if isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
                    # if not, mark them as suspicious and print it on the screen
@@ -278,20 +292,11 @@ def _check_sanity_arch_db(arch_mm, isa_mm):
        missing_throughput,
        missing_latency,
        missing_port_pressure,
-        wrong_port,
        suspicious_instructions,
        duplicate_instr_arch,
    )


-def _check_for_wrong_port(port_list, instr_form):
-    for cycles, ports in instr_form['port_pressure']:
-        for p in ports:
-            if p not in port_list:
-                return False
-    return True
-
-
 def _check_sanity_isa_db(arch_mm, isa_mm):
    # returned lists
    duplicate_instr_isa = []
@@ -316,7 +321,7 @@ def _check_sanity_isa_db(arch_mm, isa_mm):


 def _print_sanity_report(
-    total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False
+    total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False
 ):
    # non-verbose summary
    print('SUMMARY\n----------------------')
@@ -335,11 +340,6 @@ def _print_sanity_report(
            round(100 * len(m_pp) / total), len(m_pp), total
        )
    )
-    print(
-        '{}% ({}/{}) of instruction forms have an invalid port identifier.'.format(
-            round(100 * len(wrong_pp) / total), len(wrong_pp), total
-        )
-    )
    print(
        '{}% ({}/{}) of instruction forms might miss an ISA DB entry.'.format(
            round(100 * len(suspic_instr) / total), len(suspic_instr), total
@@ -355,12 +355,12 @@ def _print_sanity_report(
    # verbose version
    if verbose:
        _print_sanity_report_verbose(
-            total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa
+            total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
        )


 def _print_sanity_report_verbose(
-    total, m_tp, m_l, m_pp, wrong_pp, suspic_instr, dup_arch, dup_isa, only_isa
+    total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
 ):
    BRIGHT_CYAN = '\033[1;36;1m'
    BRIGHT_BLUE = '\033[1;34;1m'
@@ -382,14 +382,6 @@ def _print_sanity_report_verbose(
    )
    for instr_form in m_pp:
        print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
-    print(
-        'Instruction forms with invalid port identifiers in port pressure:\n'
-        if len(wrong_pp) != 0
-        else '',
-        end='',
-    )
-    for instr_form in wrong_pp:
-        print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
    print(
        'Instruction forms which might miss an ISA DB entry:\n' if len(suspic_instr) != 0 else '',
        end='',
--- a/osaca/frontend.py
+++ b/osaca/frontend.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
-
+"""
+Frontend interface for OSACA. Does everything necessary for printing analysis to the terminal.
+"""
 import re
 from datetime import datetime as dt

@@ -8,6 +10,16 @@ from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel

 class Frontend(object):
    def __init__(self, filename='', arch=None, path_to_yaml=None):
+        """
+        Constructor method.
+
+        :param filename: path to the analyzed kernel file for documentation, defaults to ''
+        :type filename: str, optional
+        :param arch: micro-arch code for getting the machine model, defaults to None
+        :type arch: str, optional
+        :param path_to_yaml: path to the YAML file for getting the machine model, defaults to None
+        :type path_to_yaml: str, optional
+        """
        self._filename = filename
        if not arch and not path_to_yaml:
            raise ValueError('Either arch or path_to_yaml required.')
@@ -22,9 +34,25 @@ class Frontend(object):
            self._arch = self._machine_model.get_arch()

    def _is_comment(self, instruction_form):
+        """
+        Checks if instruction form is a comment-only line.
+
+        :param instruction_form: instruction form as dict
+        :returns: `True` if comment line, `False` otherwise
+        """
        return instruction_form['comment'] is not None and instruction_form['instruction'] is None

    def print_throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True):
+        """
+        Print throughput analysis only.
+
+        :param kernel: Kernel to print throughput analysis for.
+        :type kernel: list
+        :param show_lineno: flag for showing the line number of instructions, defaults to `False`
+        :type show_lineno: bool, optional
+        :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
+        :type show_cmnts: bool, optional
+        """
        lineno_filler = '     ' if show_lineno else ''
        port_len = self._get_max_port_len(kernel)
        separator = '-' * sum([x + 3 for x in port_len]) + '-'
@@ -58,6 +86,14 @@ class Frontend(object):
        print(lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' '))

    def print_latency_analysis(self, cp_kernel, separator='|'):
+        """
+        Print a list-based CP analysis to the terminal.
+
+        :param cp_kernel: loop kernel containing the CP information for each instruction form
+        :type cp_kernel: list
+        :separator: separator symbol for the columns, defaults to '|'
+        :type separator: str, optional
+        """
        print('\n\nLatency Analysis Report\n' + '-----------------------')
        for instruction_form in cp_kernel:
            print(
@@ -80,6 +116,14 @@ class Frontend(object):
        )

    def print_loopcarried_dependencies(self, dep_dict, separator='|'):
+        """
+        Print a list-based LCD analysis to the terminal.
+
+        :param dep_dict: dictionary with first instruction in LCD as key and the deps as value
+        :type dep_dict: dict
+        :separator: separator symbol for the columns, defaults to '|'
+        :type separator: str, optional
+        """
        print(
            '\n\nLoop-Carried Dependencies Analysis Report\n'
            + '-----------------------------------------'
@@ -101,6 +145,17 @@ class Frontend(object):
            )

    def print_full_analysis(self, kernel, kernel_dg: KernelDG, verbose=False):
+        """
+        Prints the full analysis report including header, the symbol map, the combined TP/CP/LCD
+        view and the list based LCD view.
+
+        :param kernel: kernel to print
+        :type kernel: list
+        :param kernel_dg: directed graph containing CP and LCD
+        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
+        :param verbose: verbose output flag, defaults to `False`
+        :type verbose: bool, optional
+        """
        self._print_header_report()
        self._print_symbol_map()
        self.print_combined_view(
@@ -109,9 +164,20 @@ class Frontend(object):
        self.print_loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())

    def print_combined_view(self, kernel, cp_kernel: KernelDG, dep_dict, show_cmnts=True):
-        self._print_header_report()
-        self._print_symbol_map()
-        print('\n\nCombined Analysis Report\n' + '-----------------------')
+        """
+        Prints the combined view of the kernel including the port pressure (TP), a CP column and a
+        LCD column.
+
+        :param kernel: kernel to print
+        :type kernel: list
+        :param kernel_dg: directed graph containing CP and LCD
+        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
+        :param dep_dict: dictionary with first instruction in LCD as key and the deps as value
+        :type dep_dict: dict
+        :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True`
+        :type show_cmnts: bool, optional
+        """
+        print('\n\nCombined Analysis Report\n' + '------------------------')
        lineno_filler = '     '
        port_len = self._get_max_port_len(kernel)
        # Separator for ports
@@ -180,6 +246,7 @@ class Frontend(object):
    ####################

    def _get_separator_list(self, separator, separator_2=' '):
+        """Creates column view for seperators in the TP/combined view."""
        separator_list = []
        for i in range(len(self._machine_model.get_ports()) - 1):
            match_1 = re.search(r'\d+', self._machine_model.get_ports()[i])
@@ -192,6 +259,7 @@ class Frontend(object):
        return separator_list

    def _get_flag_symbols(self, flag_obj):
+        """Returns flags for a flag object of an instruction"""
        string_result = ''
        string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else ''
        string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else ''
@@ -201,6 +269,7 @@ class Frontend(object):
        return string_result

    def _get_port_pressure(self, ports, port_len, used_ports=[], separator='|'):
+        """Returns line of port pressure for an instruction."""
        if not isinstance(separator, list):
            separator = [separator for x in ports]
        string_result = '{} '.format(separator[-1])
@@ -214,20 +283,23 @@ class Frontend(object):
        return string_result[:-1]

    def _get_node_by_lineno(self, lineno, kernel):
+        """Returns instruction form from kernel by its line number."""
        nodes = [instr for instr in kernel if instr['line_number'] == lineno]
        return nodes[0] if len(nodes) > 0 else None

    def _get_lcd_cp_ports(self, line_number, cp_dg, dependency, separator='|'):
+        """Returns the CP and LCD line for one instruction."""
        lat_cp = lat_lcd = ''
        if cp_dg:
-            lat_cp = self._get_node_by_lineno(line_number, cp_dg)['latency_cp']
+            lat_cp = float(self._get_node_by_lineno(line_number, cp_dg)['latency_cp'])
        if dependency:
-            lat_lcd = self._get_node_by_lineno(line_number, dependency['dependencies'])[
-                'latency_lcd'
-            ]
+            lat_lcd = float(
+                self._get_node_by_lineno(line_number, dependency['dependencies'])['latency_lcd']
+            )
        return '{} {:>4} {} {:>4} {}'.format(separator, lat_cp, separator, lat_lcd, separator)

    def _get_max_port_len(self, kernel):
+        """Returns the maximal length needed to print all throughputs of the kernel."""
        port_len = [4 for x in self._machine_model.get_ports()]
        for instruction_form in kernel:
            for i, port in enumerate(instruction_form['port_pressure']):
@@ -236,6 +308,7 @@ class Frontend(object):
        return port_len

    def _get_port_number_line(self, port_len, separator='|'):
+        """Returns column view of port identificators of machine_model."""
        string_result = separator
        separator_list = self._get_separator_list(separator, '-')
        for i, length in enumerate(port_len):
@@ -244,6 +317,7 @@ class Frontend(object):
        return string_result

    def _print_header_report(self):
+        """Prints header information"""
        version = 'v0.3'
        adjust = 20
        header = ''
@@ -256,6 +330,7 @@ class Frontend(object):
        print(header)

    def _print_symbol_map(self):
+        """Prints instruction flag map."""
        symbol_dict = {
            INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port',
            INSTR_FLAGS.TP_UNKWN: 'No throughput/latency information for this instruction in '
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-
+"""CLI for OSACA"""
 import argparse
 import io
 import os
@@ -22,6 +22,7 @@ DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')

 # Stolen from pip
 def __read(*names, **kwargs):
+    """Reads in file"""
    with io.open(
        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
    ) as fp:
@@ -30,6 +31,7 @@ def __read(*names, **kwargs):

 # Stolen from pip
 def __find_version(*file_paths):
+    """Searches for a version attribute in the given file(s)"""
    version_file = __read(*file_paths)
    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
    if version_match:
@@ -38,11 +40,20 @@ def __find_version(*file_paths):


 def get_version():
+    """
+    Gets the current OSACA version stated in the __init__ file
+
+    :returns: str -- the version string.
+    """
    return __find_version('__init__.py')


 def create_parser():
-    """Return argparse parser."""
+    """
+    Return argparse parser.
+
+    :returns: The newly created :class:`~Argparse.ArgumentParser` object.
+    """
    # Create parser
    parser = argparse.ArgumentParser(
        description='Analyzes a marked innermost loop snippet for a given architecture type.',
@@ -79,8 +90,8 @@ def create_parser():
        type=str,
        default=argparse.SUPPRESS,
        help='Import a given microbenchmark output file into the corresponding architecture '
-        'instruction database. Define the type of microbenchmark either as "ibench", '
-        '"asmbench" or "uopsinfo".',
+        'instruction database. Define the type of microbenchmark either as "ibench" or '
+        '"asmbench".',
    )
    parser.add_argument(
        '--insert-marker',
@@ -109,9 +120,14 @@ def create_parser():


 def check_arguments(args, parser):
-    """Check arguments passed by user that are not checked by argparse itself."""
+    """
+    Check arguments passed by user that are not checked by argparse itself.
+
+    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
+    :param parser: :class:`~argparse.ArgumentParser` object
+    """
    supported_archs = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'TX2']
-    supported_import_files = ['ibench', 'asmbench', 'uopsinfo']
+    supported_import_files = ['ibench', 'asmbench']

    if 'arch' in args and args.arch.upper() not in supported_archs:
        parser.error(
@@ -125,6 +141,10 @@ def check_arguments(args, parser):


 def check_user_dir():
+    """
+    Creates user directory if it does not exist and copies all not already existing YAML files
+    into it.
+    """
    # Check if data files are already in usr dir, otherwise create them
    if not os.path.isdir(DATA_DIR):
        os.makedirs(DATA_DIR)
@@ -134,15 +154,30 @@ def check_user_dir():


 def import_data(benchmark_type, arch, filepath):
+    """
+    Imports benchmark results from micro-benchmarks.
+
+    :param benchmark_type: key for defining type of benchmark output
+    :type benchmark_type: str
+    :param arch: target architecture to put the data into the right database
+    :type arch: str
+    :param filepath: filepath of the output file"
+    :type filepath: str
+    """
    if benchmark_type.lower() == 'ibench':
        import_benchmark_output(arch, 'ibench', filepath)
    elif benchmark_type.lower() == 'asmbench':
        import_benchmark_output(arch, 'asmbench', filepath)
    else:
-        raise NotImplementedError('This benchmark input variant is not implemented yet.')
+        raise NotImplementedError('This benchmark input variant is not supported.')


 def insert_byte_marker(args):
+    """
+    Inserts byte markers into an assembly file using kerncraft.
+
+    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
+    """
    if MachineModel.get_isa_for_arch(args.arch) != 'x86':
        print('Marker insertion for non-x86 is not yet supported by Kerncraft.', file=sys.stderr)
        sys.exit(1)
@@ -174,6 +209,12 @@ def insert_byte_marker(args):


 def inspect(args):
+    """
+    Does the actual throughput and critical path analysis of OSACA and prints it to the
+    terminal.
+
+    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
+    """
    arch = args.arch
    isa = MachineModel.get_isa_for_arch(arch)
    verbose = args.verbose
@@ -203,6 +244,12 @@ def inspect(args):


 def run(args, output_file=sys.stdout):
+    """
+    Main entry point for OSACAs workflow. Decides whether to run an analysis or other things.
+
+    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
+    :param output_file: Define the stream for output, defaults to :class:`sys.stdout`
+    """
    if args.check_db:
        # Sanity check on DB
        verbose = True if args.verbose > 0 else False
@@ -220,6 +267,13 @@ def run(args, output_file=sys.stdout):

 # ---------------------------------------------------
 def get_asm_parser(arch) -> BaseParser:
+    """
+    Helper function to create the right parser for a specific architecture.
+
+    :param arch: architecture code
+    :type arch: str
+    :returns: :class:`~osaca.parser.BaseParser` object
+    """
    isa = MachineModel.get_isa_for_arch(arch)
    if isa == 'x86':
        return ParserX86ATT()
--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -125,6 +125,14 @@ class ArchSemantics(ISASemantics):
            instruction_data = self._machine_model.get_instruction(
                instruction_form['instruction'], instruction_form['operands']
            )
+            if (
+                not instruction_data
+                and self._isa == 'x86'
+                and instruction_form['instruction'][-1] in 'bwlq'
+            ):
+                instruction_data = self._machine_model.get_instruction(
+                    instruction_form['instruction'][:-1], instruction_form['operands']
+                )
            if instruction_data:
                # instruction form in DB
                throughput = instruction_data['throughput']