applied flake8 and black rules

2025-12-16 00:50:06 +01:00 · 2021-08-26 16:58:19 +02:00
parent 34523e1b23
commit d418c16f4a
23 changed files with 781 additions and 471 deletions
--- a/docs/version_from_src.py
+++ b/docs/version_from_src.py
@@ -7,7 +7,8 @@ import re
 def __read(*names, **kwargs):
    """Reads in file"""
    with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
        encoding=kwargs.get("encoding", "utf8"),
    ) as fp:
        return fp.read()
--- a/osaca/data/generate_mov_entries.py
+++ b/osaca/data/generate_mov_entries.py
@@ -88,7 +88,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
        comment = None
        if load:
-            if 'ymm' in operand_types:
+            if "ymm" in operand_types:
                port2D3D_pressure = 2
            else:
                port2D3D_pressure = 1
@@ -96,7 +96,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
            latency += 4
            comment = "with load"
        if store:
-            if 'ymm' in operand_types:
+            if "ymm" in operand_types:
                port4_pressure = 2
            else:
                port4_pressure = 1
@@ -716,14 +716,14 @@ skx_mov_instructions = list(
            # ('movapd xmm xmm', ('1*p5', 1)),
            # ('vmovapd xmm xmm', ('1*p5', 1)),
            # ('vmovapd ymm ymm', ('1*p5', 1)),
-            ('vmovapd zmm zmm', ('', 0)),
+            ("vmovapd zmm zmm", ("", 0)),
            # https://www.felixcloutier.com/x86/movaps
            # TODO with masking!
            # TODO the following may eliminate or be bound to 1*p0156:
            # ('movaps xmm xmm', ('1*p5', 1)),
            # ('vmovaps xmm xmm', ('1*p5', 1)),
            # ('vmovaps ymm ymm', ('1*p5', 1)),
-            ('vmovaps zmm zmm', ('', 0)),
+            ("vmovaps zmm zmm", ("", 0)),
            # https://www.felixcloutier.com/x86/movbe
            ("movbe gpr mem", ("1*p15", 4)),
            ("movbe mem gpr", ("1*p15", 4)),
--- a/osaca/data/model_importer.py
+++ b/osaca/data/model_importer.py
@@ -140,9 +140,11 @@ def extract_model(tree, arch, skip_mem=True):
            print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
            continue
        # skip if measured TP is smaller than computed
-        if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
+        if [
-                                              float(x.attrib["TP_unrolled"]))
+            float(x.attrib["TP_ports"])
-                for x in arch_tag.findall("measurement")][0]:
+            > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
            for x in arch_tag.findall("measurement")
        ][0]:
            print(
                "Calculated TP is greater than measured TP.",
                iform,
@@ -160,13 +162,15 @@ def extract_model(tree, arch, skip_mem=True):
                throughput = float(measurement_tag.attrib["TP_ports"])
            else:
                throughput = min(
-                    measurement_tag.attrib.get("TP_loop", float('inf')),
+                    measurement_tag.attrib.get("TP_loop", float("inf")),
-                    measurement_tag.attrib.get("TP_unroll", float('inf')),
+                    measurement_tag.attrib.get("TP_unroll", float("inf")),
-                    measurement_tag.attrib.get("TP", float('inf')),
+                    measurement_tag.attrib.get("TP", float("inf")),
                )
-                if throughput == float('inf'):
+                if throughput == float("inf"):
                    throughput = None
-            uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
+            uops = (
                int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
            )
            if "ports" in measurement_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
            latencies = [
@@ -202,7 +206,11 @@ def extract_model(tree, arch, skip_mem=True):
        # Check if all are equal
        if port_pressure:
            if port_pressure[1:] != port_pressure[:-1]:
-                print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
+                print(
                    "Contradicting port occupancies, using latest IACA:",
                    iform,
                    file=sys.stderr,
                )
            port_pressure = port_pressure[-1]
        else:
            # print("No data available for this architecture:", mnemonic, file=sys.stderr)
@@ -222,9 +230,11 @@ def extract_model(tree, arch, skip_mem=True):
                        port_4 = True
                # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
                if port_23 and not port_4:
-                    if arch.upper() in ["SNB", "IVB"] and any(
+                    if (
-                            [p.get('name', '') == 'ymm' for p in parameters]) and \
+                        arch.upper() in ["SNB", "IVB"]
-                            not '128' in mnemonic:
+                        and any([p.get("name", "") == "ymm" for p in parameters])
                        and not ("128" in mnemonic)
                    ):
                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
                        # instruction name
                        port2D3D_pressure = 2
--- a/osaca/db_interface.py
+++ b/osaca/db_interface.py
@@ -125,7 +125,10 @@ def _get_asmbench_output(input_data, isa):
    db_entries = {}
    for i in range(0, len(input_data), 4):
        if input_data[i + 3].strip() != "":
-            print("asmbench output not in the correct format! Format must be: ", file=sys.stderr)
+            print(
                "asmbench output not in the correct format! Format must be: ",
                file=sys.stderr,
            )
            print(
                "-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n"
                "Throughput: Y cycles\n\n-------------",
@@ -540,7 +543,16 @@ def _get_sanity_report(
 def _get_sanity_report_verbose(
-    total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, bad_operands, colors=False
+    total,
    m_tp,
    m_l,
    m_pp,
    suspic_instr,
    dup_arch,
    dup_isa,
    only_isa,
    bad_operands,
    colors=False,
 ):
    """Get the verbose part of the sanity report with all missing instruction forms."""
    BRIGHT_CYAN = "\033[1;36;1m" if colors else ""
--- a/osaca/frontend.py
+++ b/osaca/frontend.py
@@ -202,7 +202,12 @@ class Frontend(object):
        )
    def combined_view(
-        self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True
+        self,
        kernel,
        cp_kernel: KernelDG,
        dep_dict,
        ignore_unknown=False,
        show_cmnts=True,
    ):
        """
        Build combined view of kernel including port pressure (TP), a CP column and a
@@ -238,8 +243,8 @@ class Frontend(object):
        lcd_sum = 0.0
        lcd_lines = {}
        if dep_dict:
-            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
+            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
-            lcd_sum = dep_dict[longest_lcd]['latency']
+            lcd_sum = dep_dict[longest_lcd]["latency"]
            lcd_lines = {
                instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
            }
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -10,7 +10,13 @@ from functools import lru_cache
 from osaca.db_interface import import_benchmark_output, sanity_check
 from osaca.frontend import Frontend
 from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
-from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section
+from osaca.semantics import (
    INSTR_FLAGS,
    ArchSemantics,
    KernelDG,
    MachineModel,
    reduce_to_section,
 )
 SUPPORTED_ARCHS = [
@@ -37,7 +43,8 @@ DEFAULT_ARCHS = {
 def __read(*names, **kwargs):
    """Reads in file"""
    with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
        encoding=kwargs.get("encoding", "utf8"),
    ) as fp:
        return fp.read()
@@ -79,7 +86,10 @@ def create_parser(parser=None):
    # Add arguments
    parser.add_argument(
-        "-V", "--version", action="version", version="%(prog)s " + __find_version("__init__.py")
+        "-V",
        "--version",
        action="version",
        version="%(prog)s " + __find_version("__init__.py"),
    )
    parser.add_argument(
        "--arch",
@@ -167,7 +177,9 @@ def create_parser(parser=None):
        help="Write analysis to this file (default to stdout).",
    )
    parser.add_argument(
-        "file", type=argparse.FileType("r"), help="Path to object (ASM or instruction file)."
+        "file",
        type=argparse.FileType("r"),
        help="Path to object (ASM or instruction file).",
    )
    return parser
@@ -347,7 +359,10 @@ def run(args, output_file=sys.stdout):
        # Sanity check on DB
        verbose = True if args.verbose > 0 else False
        sanity_check(
-            args.arch, verbose=verbose, internet_check=args.internet_check, output_file=output_file
+            args.arch,
            verbose=verbose,
            internet_check=args.internet_check,
            output_file=output_file,
        )
    elif "import_data" in args:
        # Import microbench output file into DB
--- a/osaca/parser/parser_AArch64.py
+++ b/osaca/parser/parser_AArch64.py
@@ -26,9 +26,9 @@ class ParserAArch64(BaseParser):
            pp.ZeroOrMore(pp.Word(pp.printables))
        ).setResultsName(self.COMMENT_ID)
        # Define ARM assembly identifier
-        decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
+        decimal_number = pp.Combine(
-            "value"
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
-        )
+        ).setResultsName("value")
        hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value")
        relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":"))
        first = pp.Word(pp.alphas + "_.", exact=1)
@@ -152,7 +152,9 @@ class ParserAArch64(BaseParser):
            pp.Literal("{")
            + (
                pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list")
-                ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName("range")
+                ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName(
                    "range"
                )
            )
            + pp.Literal("}")
            + pp.Optional(index)
@@ -256,9 +258,7 @@ class ParserAArch64(BaseParser):
        # 2. Parse label
        if result is None:
            try:
-                result = self.process_operand(
+                result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
                    self.label.parseString(line, parseAll=True).asDict()
                )
                result = AttrDict.convert_dict(result)
                instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
                if self.COMMENT_ID in result[self.LABEL_ID]:
@@ -293,7 +293,9 @@ class ParserAArch64(BaseParser):
            try:
                result = self.parse_instruction(line)
            except (pp.ParseException, KeyError) as e:
-                raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
+                raise ValueError(
                    "Unable to parse {!r} on line {}".format(line, line_number)
                ) from e
            instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
            instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
            instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
@@ -390,9 +392,9 @@ class ParserAArch64(BaseParser):
            new_dict["pre_indexed"] = True
        if "post_indexed" in memory_address:
            if "value" in memory_address["post_indexed"]:
-                new_dict["post_indexed"] = {"value": int(
+                new_dict["post_indexed"] = {
-                    memory_address["post_indexed"]["value"], 0
+                    "value": int(memory_address["post_indexed"]["value"], 0)
-                )}
+                }
            else:
                new_dict["post_indexed"] = memory_address["post_indexed"]
        return AttrDict({self.MEMORY_ID: new_dict})
@@ -408,27 +410,27 @@ class ParserAArch64(BaseParser):
        Resolve range or list register operand to list of registers.
        Returns None if neither list nor range
        """
-        if 'register' in operand:
+        if "register" in operand:
-            if 'list' in operand.register:
+            if "list" in operand.register:
-                index = operand.register.get('index')
+                index = operand.register.get("index")
                range_list = []
                for reg in operand.register.list:
                    reg = deepcopy(reg)
                    if index is not None:
-                        reg['index'] = int(index, 0)
+                        reg["index"] = int(index, 0)
                    range_list.append(AttrDict({self.REGISTER_ID: reg}))
                return range_list
-            elif 'range' in operand.register:
+            elif "range" in operand.register:
                base_register = operand.register.range[0]
-                index = operand.register.get('index')
+                index = operand.register.get("index")
                range_list = []
                start_name = base_register.name
                end_name = operand.register.range[1].name
                for name in range(int(start_name), int(end_name) + 1):
                    reg = deepcopy(base_register)
                    if index is not None:
-                        reg['index'] = int(index, 0)
+                        reg["index"] = int(index, 0)
-                    reg['name'] = str(name)
+                    reg["name"] = str(name)
                    range_list.append(AttrDict({self.REGISTER_ID: reg}))
                return range_list
        # neither register list nor range, return unmodified
@@ -482,10 +484,12 @@ class ParserAArch64(BaseParser):
            return AttrDict({self.IMMEDIATE_ID: immediate})
        else:
            # change 'mantissa' key to 'value'
-            return AttrDict({
+            return AttrDict(
-                self.IMMEDIATE_ID: AttrDict({
+                {
-                    "value": immediate[dict_name]["mantissa"],
+                    self.IMMEDIATE_ID: AttrDict(
-                    "type": dict_name})}
+                        {"value": immediate[dict_name]["mantissa"], "type": dict_name}
                    )
                }
            )
    def process_label(self, label):
--- a/osaca/parser/parser_x86att.py
+++ b/osaca/parser/parser_x86att.py
@@ -23,9 +23,9 @@ class ParserX86ATT(BaseParser):
    def construct_parser(self):
        """Create parser for ARM AArch64 ISA."""
-        decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
+        decimal_number = pp.Combine(
-            "value"
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
-        )
+        ).setResultsName("value")
        hex_number = pp.Combine(
            pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
        ).setResultsName("value")
@@ -41,7 +41,8 @@ class ParserX86ATT(BaseParser):
        identifier = pp.Group(
            pp.Optional(id_offset).setResultsName("offset")
            + pp.Combine(
-                pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), joinString="::"
+                pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"),
                joinString="::",
            ).setResultsName("name")
            + pp.Optional(relocation).setResultsName("relocation")
        ).setResultsName("identifier")
@@ -443,7 +444,12 @@ class ParserX86ATT(BaseParser):
        """Check if register is a vector register"""
        if register is None:
            return False
-        if register["name"].rstrip(string.digits).lower() in ["mm", "xmm", "ymm", "zmm"]:
+        if register["name"].rstrip(string.digits).lower() in [
            "mm",
            "xmm",
            "ymm",
            "zmm",
        ]:
            return True
        return False
--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -47,7 +47,9 @@ class ArchSemantics(ISASemantics):
                indices = [port_list.index(p) for p in ports]
                # check if port sum of used ports for uop are unbalanced
                port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
-                instr_ports = self._to_list(itemgetter(*indices)(instruction_form["port_pressure"]))
+                instr_ports = self._to_list(
                    itemgetter(*indices)(instruction_form["port_pressure"])
                )
                if len(set(port_sums)) > 1:
                    # balance ports
                    # init list for keeping track of the current change
@@ -270,7 +272,8 @@ class ArchSemantics(ISASemantics):
                                    reg_type
                                ]
                                st_data_port_pressure = [
-                                    pp * multiplier for pp in st_data_port_pressure]
+                                    pp * multiplier for pp in st_data_port_pressure
                                ]
                            data_port_pressure = [
                                sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
                            ]
@@ -343,7 +346,9 @@ class ArchSemantics(ISASemantics):
    def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
        """Apply performance data to instruction if it was found in the archDB"""
        throughput = instruction_data["throughput"]
-        port_pressure = self._machine_model.average_port_pressure(instruction_data["port_pressure"])
+        port_pressure = self._machine_model.average_port_pressure(
            instruction_data["port_pressure"]
        )
        instruction_form["port_uops"] = instruction_data["port_pressure"]
        try:
            assert isinstance(port_pressure, list)
--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -1,20 +1,19 @@
 #!/usr/bin/env python3
 import hashlib
 import os
 import pickle
 import re
 import string
 from collections import defaultdict
 from copy import deepcopy
 from itertools import product
 import hashlib
 from pathlib import Path
 from collections import defaultdict
 import ruamel.yaml
 from ruamel.yaml.compat import StringIO
 from osaca import __version__, utils
 from osaca.parser import ParserX86ATT
 from ruamel.yaml.compat import StringIO
 class MachineModel(object):
@@ -37,7 +36,13 @@ class MachineModel(object):
                "hidden_loads": None,
                "load_latency": {},
                "load_throughput": [
-                    {"base": b, "index": i, "offset": o, "scale": s, "port_pressure": []}
+                    {
                        "base": b,
                        "index": i,
                        "offset": o,
                        "scale": s,
                        "port_pressure": [],
                    }
                    for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
                ],
                "load_throughput_default": [],
@@ -128,7 +133,8 @@ class MachineModel(object):
                instruction_form
                for instruction_form in name_matched_iforms
                if self._match_operands(
-                    instruction_form["operands"] if "operands" in instruction_form else [], operands
+                    instruction_form["operands"] if "operands" in instruction_form else [],
                    operands,
                )
            )
        except StopIteration:
@@ -150,7 +156,13 @@ class MachineModel(object):
        return average_pressure
    def set_instruction(
-        self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None
+        self,
        name,
        operands=None,
        latency=None,
        port_pressure=None,
        throughput=None,
        uops=None,
    ):
        """Import instruction form information."""
        # If it already exists. Overwrite information.
@@ -500,7 +512,11 @@ class MachineModel(object):
        """Check if the types of operand ``i_operand`` and ``operand`` match."""
        # check for wildcard
        if self.WILDCARD in operand:
-            if "class" in i_operand and i_operand["class"] == "register" or "register" in i_operand:
+            if (
                "class" in i_operand
                and i_operand["class"] == "register"
                or "register" in i_operand
            ):
                return True
            else:
                return False
@@ -527,20 +543,27 @@ class MachineModel(object):
            return self._is_AArch64_mem_type(i_operand, operand["memory"])
        # immediate
        if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD:
-            return "value" in operand or \
+            return "value" in operand or (
-                ("immediate" in operand and "value" in operand["immediate"]) 
+                "immediate" in operand and "value" in operand["immediate"]
            )
        if i_operand["class"] == "immediate" and i_operand["imd"] == "int":
-            return ("value" in operand and operand.get("type", None) == "int") or \
+            return ("value" in operand and operand.get("type", None) == "int") or (
-                ("immediate" in operand and "value" in operand["immediate"] and
+                "immediate" in operand
-                 operand["immediate"].get("type", None) == "int")
+                and "value" in operand["immediate"]
                and operand["immediate"].get("type", None) == "int"
            )
        if i_operand["class"] == "immediate" and i_operand["imd"] == "float":
-            return ("float" in operand and operand.get("type", None) == "float") or \
+            return ("float" in operand and operand.get("type", None) == "float") or (
-                ("immediate" in operand and "float" in operand["immediate"] and
+                "immediate" in operand
-                 operand["immediate"].get("type", None) == "float")
+                and "float" in operand["immediate"]
                and operand["immediate"].get("type", None) == "float"
            )
        if i_operand["class"] == "immediate" and i_operand["imd"] == "double":
-            return ("double" in operand and operand.get("type", None) == "double") or \
+            return ("double" in operand and operand.get("type", None) == "double") or (
-                ("immediate" in operand and "double" in operand["immediate"] and
+                "immediate" in operand
-                 operand["immediate"].get("type", None) == "double")
+                and "double" in operand["immediate"]
                and operand["immediate"].get("type", None) == "double"
            )
        # identifier
        if "identifier" in operand or (
            "immediate" in operand and "identifier" in operand["immediate"]
@@ -577,7 +600,10 @@ class MachineModel(object):
    def _compare_db_entries(self, operand_1, operand_2):
        """Check if operand types in DB format (i.e., not parsed) match."""
        operand_attributes = list(
-            filter(lambda x: True if x != "source" and x != "destination" else False, operand_1)
+            filter(
                lambda x: True if x != "source" and x != "destination" else False,
                operand_1,
            )
        )
        for key in operand_attributes:
            try:
--- a/osaca/semantics/isa_semantics.py
+++ b/osaca/semantics/isa_semantics.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 from itertools import chain
 from copy import deepcopy
 from osaca import utils
 from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
@@ -100,40 +99,51 @@ class ISASemantics(object):
        # post-process pre- and post-indexing for aarch64 memory operands
        if self._isa == "aarch64":
            for operand in [op for op in op_dict["source"] if "memory" in op]:
-                post_indexed = ("post_indexed" in operand["memory"] and 
+                post_indexed = (
-                                operand["memory"]["post_indexed"])
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
-                pre_indexed = ("pre_indexed" in operand["memory"] and
+                )
-                               operand["memory"]["pre_indexed"])
+                pre_indexed = (
                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
                )
                if post_indexed or pre_indexed:
                    op_dict["src_dst"].append(
-                        AttrDict.convert_dict({
+                        AttrDict.convert_dict(
-                            "register": operand["memory"]["base"],
+                            {
-                            "pre_indexed": pre_indexed,
+                                "register": operand["memory"]["base"],
-                            "post_indexed": post_indexed})
+                                "pre_indexed": pre_indexed,
                                "post_indexed": post_indexed,
                            }
                        )
                    )
            for operand in [op for op in op_dict["destination"] if "memory" in op]:
-                post_indexed = ("post_indexed" in operand["memory"] and 
+                post_indexed = (
-                                operand["memory"]["post_indexed"])
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
-                pre_indexed = ("pre_indexed" in operand["memory"] and
+                )
-                               operand["memory"]["pre_indexed"])
+                pre_indexed = (
                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
                )
                if post_indexed or pre_indexed:
                    op_dict["src_dst"].append(
-                        AttrDict.convert_dict({
+                        AttrDict.convert_dict(
-                            "register": operand["memory"]["base"],
+                            {
-                            "pre_indexed": pre_indexed,
+                                "register": operand["memory"]["base"],
-                            "post_indexed": post_indexed})
+                                "pre_indexed": pre_indexed,
                                "post_indexed": post_indexed,
                            }
                        )
                    )
        # store operand list in dict and reassign operand key/value pair
        instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
        # assign LD/ST flags
-        instruction_form["flags"] = instruction_form["flags"] if "flags" in instruction_form else []
+        instruction_form["flags"] = (
            instruction_form["flags"] if "flags" in instruction_form else []
        )
        if self._has_load(instruction_form):
            instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
        if self._has_store(instruction_form):
            instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
    def get_reg_changes(self, instruction_form, only_postindexed=False):
        """
        Returns register changes, as dict, for insruction_form, based on operation defined in isa.
@@ -141,12 +151,16 @@ class ISASemantics(object):
        Empty dict if no changes of registers occured. None for registers with unknown changes.
        If only_postindexed is True, only considers changes due to post_indexed memory references.
        """
-        if instruction_form.get('instruction') is None:
+        if instruction_form.get("instruction") is None:
            return {}
-        dest_reg_names = [op.register.get('prefix', '') + op.register.name
+        dest_reg_names = [
-                          for op in chain(instruction_form.semantic_operands.destination,
+            op.register.get("prefix", "") + op.register.name
-                                          instruction_form.semantic_operands.src_dst)
+            for op in chain(
-                          if 'register' in op]
+                instruction_form.semantic_operands.destination,
                instruction_form.semantic_operands.src_dst,
            )
            if "register" in op
        ]
        isa_data = self._isa_model.get_instruction(
            instruction_form["instruction"], instruction_form["operands"]
        )
@@ -162,50 +176,50 @@ class ISASemantics(object):
        if only_postindexed:
            for o in instruction_form.operands:
-                if 'post_indexed' in o.get('memory', {}):
+                if "post_indexed" in o.get("memory", {}):
-                    base_name = o.memory.base.get('prefix', '') + o.memory.base.name
+                    base_name = o.memory.base.get("prefix", "") + o.memory.base.name
-                    return {base_name: {
+                    return {
-                        'name': o.memory.base.get('prefix', '') + o.memory.base.name,
+                        base_name: {
-                        'value': o.memory.post_indexed.value
+                            "name": o.memory.base.get("prefix", "") + o.memory.base.name,
-                    }}
+                            "value": o.memory.post_indexed.value,
                        }
                    }
            return {}
        reg_operand_names = {}  # e.g., {'rax': 'op1'}
        operand_state = {}  # e.g., {'op1': {'name': 'rax', 'value': 0}}  0 means unchanged
        for o in instruction_form.operands:
-            if 'pre_indexed' in o.get('memory', {}):
+            if "pre_indexed" in o.get("memory", {}):
                # Assuming no isa_data.operation
                if isa_data.get("operation", None) is not None:
                    raise ValueError(
                        "ISA information for pre-indexed instruction {!r} has operation set."
-                        "This is currently not supprted.".format(instruction_form.line))
+                        "This is currently not supprted.".format(instruction_form.line)
-                base_name = o.memory.base.get('prefix', '') + o.memory.base.name
+                    )
-                reg_operand_names = {base_name: 'op1'}
+                base_name = o.memory.base.get("prefix", "") + o.memory.base.name
-                operand_state = {'op1': {
+                reg_operand_names = {base_name: "op1"}
-                    'name': base_name,
+                operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
                    'value': o.memory.offset.value
                }}
-        if isa_data is not None and 'operation' in isa_data:
+        if isa_data is not None and "operation" in isa_data:
            for i, o in enumerate(instruction_form.operands):
                operand_name = "op{}".format(i + 1)
                if "register" in o:
-                    o_reg_name = o["register"].get('prefix', '') + o["register"]["name"]
+                    o_reg_name = o["register"].get("prefix", "") + o["register"]["name"]
                    reg_operand_names[o_reg_name] = operand_name
-                    operand_state[operand_name] = {
+                    operand_state[operand_name] = {"name": o_reg_name, "value": 0}
                        'name': o_reg_name,
                        'value': 0}
                elif "immediate" in o:
-                    operand_state[operand_name] = {'value': o["immediate"]["value"]}
+                    operand_state[operand_name] = {"value": o["immediate"]["value"]}
                elif "memory" in o:
                    # TODO lea needs some thinking about
                    pass
-            operand_changes = exec(isa_data['operation'], {}, operand_state)
+            exec(isa_data["operation"], {}, operand_state)
-        change_dict = {reg_name: operand_state.get(reg_operand_names.get(reg_name))
+        change_dict = {
-                       for reg_name in dest_reg_names}
+            reg_name: operand_state.get(reg_operand_names.get(reg_name))
            for reg_name in dest_reg_names
        }
        return change_dict
    def _apply_found_ISA_data(self, isa_data, operands):
@@ -231,8 +245,10 @@ class ISASemantics(object):
            if "hidden_operands" in isa_data:
                op_dict["destination"] += [
                    AttrDict.convert_dict(
-                        {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}})
+                        {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}}
-                     for hop in isa_data["hidden_operands"]]
+                    )
                    for hop in isa_data["hidden_operands"]
                ]
            return op_dict
        for i, op in enumerate(isa_data["operands"]):
--- a/osaca/semantics/kernel_dg.py
+++ b/osaca/semantics/kernel_dg.py
@@ -16,7 +16,12 @@ class KernelDG(nx.DiGraph):
    INSTRUCTION_THRESHOLD = 50
    def __init__(
-        self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics, timeout=10
+        self,
        parsed_kernel,
        parser,
        hw_model: MachineModel,
        semantics: ArchSemantics,
        timeout=10,
    ):
        self.timed_out = False
        self.kernel = parsed_kernel
@@ -73,7 +78,7 @@ class KernelDG(nx.DiGraph):
                    else instruction_form["latency_wo_load"]
                )
                if "storeload_dep" in dep_flags:
-                    edge_weight += self.model.get('store_to_load_forward_latency', 0)
+                    edge_weight += self.model.get("store_to_load_forward_latency", 0)
                dg.add_edge(
                    instruction_form["line_number"],
                    dep["line_number"],
@@ -98,7 +103,7 @@ class KernelDG(nx.DiGraph):
        tmp_kernel = [] + kernel
        for orig_iform in kernel:
            temp_iform = copy.copy(orig_iform)
-            temp_iform['line_number'] += offset
+            temp_iform["line_number"] += offset
            tmp_kernel.append(temp_iform)
        # get dependency graph
        dg = self.create_DG(tmp_kernel)
@@ -118,12 +123,15 @@ class KernelDG(nx.DiGraph):
            with Manager() as manager:
                all_paths = manager.list()
                processes = [
-                    Process(target=self._extend_path, args=(all_paths, instr_section, dg, offset))
+                    Process(
                        target=self._extend_path,
                        args=(all_paths, instr_section, dg, offset),
                    )
                    for instr_section in instrs
                ]
                for p in processes:
                    p.start()
-                if (timeout == -1):
+                if timeout == -1:
                    # no timeout
                    for p in processes:
                        p.join()
@@ -162,7 +170,7 @@ class KernelDG(nx.DiGraph):
            # extend path by edge bound latencies (e.g., store-to-load latency)
            lat_path = []
            for s, d in nx.utils.pairwise(path):
-                edge_lat = dg.edges[s, d]['latency']
+                edge_lat = dg.edges[s, d]["latency"]
                # map source node back to original line numbers
                if s >= offset:
                    s -= offset
@@ -310,17 +318,17 @@ class KernelDG(nx.DiGraph):
            if change is None or reg_state.get(reg, {}) is None:
                reg_state[reg] = None
            else:
-                reg_state.setdefault(reg, {'name': reg, 'value': 0})
+                reg_state.setdefault(reg, {"name": reg, "value": 0})
-                if change['name'] != reg:
+                if change["name"] != reg:
                    # renaming occured, ovrwrite value with up-to-now change of source register
-                    reg_state[reg]['name'] = change['name']
+                    reg_state[reg]["name"] = change["name"]
-                    src_reg_state = reg_state.get(change['name'], {'value': 0})
+                    src_reg_state = reg_state.get(change["name"], {"value": 0})
                    if src_reg_state is None:
                        # original register's state was changed beyond reconstruction
                        reg_state[reg] = None
                        continue
-                    reg_state[reg]['value'] = src_reg_state['value']
+                    reg_state[reg]["value"] = src_reg_state["value"]
-                reg_state[reg]['value'] += change['value']
+                reg_state[reg]["value"] += change["value"]
        return reg_state
    def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
@@ -340,7 +348,8 @@ class KernelDG(nx.DiGraph):
        if instruction_form.semantic_operands is None:
            return is_read
        for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
            instruction_form.semantic_operands.src_dst,
        ):
            if "register" in src:
                is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
@@ -372,7 +381,8 @@ class KernelDG(nx.DiGraph):
        if instruction_form.semantic_operands is None:
            return False
        for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
            instruction_form.semantic_operands.src_dst,
        ):
            # Here we check for mem dependecies only
            if "memory" not in src:
@@ -387,23 +397,23 @@ class KernelDG(nx.DiGraph):
                addr_change -= mem.offset.value
            if mem.base and src.base:
                base_change = register_changes.get(
-                    src.base.get('prefix', '') + src.base.name,
+                    src.base.get("prefix", "") + src.base.name,
-                    {'name': src.base.get('prefix', '') + src.base.name, 'value': 0},
+                    {"name": src.base.get("prefix", "") + src.base.name, "value": 0},
                )
                if base_change is None:
                    # Unknown change occurred
                    continue
-                if mem.base.get('prefix', '') + mem.base['name'] != base_change['name']:
+                if mem.base.get("prefix", "") + mem.base["name"] != base_change["name"]:
                    # base registers do not match
                    continue
-                addr_change += base_change['value']
+                addr_change += base_change["value"]
            elif mem.base or src.base:
                # base registers do not match
                continue
            if mem.index and src.index:
                index_change = register_changes.get(
-                    src.index.get('prefix', '') + src.index.name,
+                    src.index.get("prefix", "") + src.index.name,
-                    {'name': src.index.get('prefix', '') + src.index.name, 'value': 0},
+                    {"name": src.index.get("prefix", "") + src.index.name, "value": 0},
                )
                if index_change is None:
                    # Unknown change occurred
@@ -411,10 +421,10 @@ class KernelDG(nx.DiGraph):
                if mem.scale != src.scale:
                    # scale factors do not match
                    continue
-                if mem.index.get('prefix', '') + mem.index['name'] != index_change['name']:
+                if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
                    # index registers do not match
                    continue
-                addr_change += index_change['value'] * src.scale
+                addr_change += index_change["value"] * src.scale
            elif mem.index or src.index:
                # index registers do not match
                continue
@@ -443,7 +453,8 @@ class KernelDG(nx.DiGraph):
                    )
        # Check also for possible pre- or post-indexing in memory addresses
        for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
            instruction_form.semantic_operands.src_dst,
        ):
            if "memory" in src:
                if "pre_indexed" in src.memory or "post_indexed" in src.memory:
--- a/osaca/utils.py
+++ b/osaca/utils.py
@@ -1,7 +1,10 @@
 #!/usr/bin/env python3
 import os.path
-DATA_DIRS = [os.path.expanduser("~/.osaca/data"), os.path.join(os.path.dirname(__file__), "data")]
+DATA_DIRS = [
    os.path.expanduser("~/.osaca/data"),
    os.path.join(os.path.dirname(__file__), "data"),
 ]
 CACHE_DIR = os.path.expanduser("~/.osaca/cache")
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,8 @@ here = os.path.abspath(os.path.dirname(__file__))
 # Stolen from pip
 def read(*names, **kwargs):
    with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
        encoding=kwargs.get("encoding", "utf8"),
    ) as fp:
        return fp.read()
@@ -38,13 +39,20 @@ def _run_build_cache(dir):
    # This is run inside the install staging directory (that had no .pyc files)
    # We don't want to generate any.
    # https://github.com/eliben/pycparser/pull/135
-    check_call([sys.executable, "-B", "_build_cache.py"], cwd=os.path.join(dir, "osaca", "data"))
+    check_call(
        [sys.executable, "-B", "_build_cache.py"],
        cwd=os.path.join(dir, "osaca", "data"),
    )
 class install(_install):
    def run(self):
        _install.run(self)
-        self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
+        self.execute(
            _run_build_cache,
            (self.install_lib,),
            msg="Build ISA and architecture cache",
        )
 class sdist(_sdist):
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -33,7 +33,13 @@ class TestCLI(unittest.TestCase):
        with self.assertRaises(ValueError):
            osaca.check_arguments(args, parser)
        args = parser.parse_args(
-            ["--arch", "csx", "--import", "WRONG_BENCH", self._find_file("gs", "csx", "gcc")]
+            [
                "--arch",
                "csx",
                "--import",
                "WRONG_BENCH",
                self._find_file("gs", "csx", "gcc"),
            ]
        )
        with self.assertRaises(ValueError):
            osaca.check_arguments(args, parser)
@@ -65,7 +71,13 @@ class TestCLI(unittest.TestCase):
    def test_check_db(self):
        parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
        args = parser.parse_args(
-            ["--arch", "tx2", "--db-check", "--verbose", self._find_test_file("triad_x86_iaca.s")]
+            [
                "--arch",
                "tx2",
                "--db-check",
                "--verbose",
                self._find_test_file("triad_x86_iaca.s"),
            ]
        )
        output = StringIO()
        osaca.run(args, output_file=output)
@@ -134,7 +146,13 @@ class TestCLI(unittest.TestCase):
                for c in comps[a]:
                    with self.subTest(kernel=k, arch=a, comp=c):
                        args = parser.parse_args(
-                            ["--arch", a, self._find_file(k, a, c), "--export-graph", "/dev/null"]
+                            [
                                "--arch",
                                a,
                                self._find_file(k, a, c),
                                "--export-graph",
                                "/dev/null",
                            ]
                        )
                        output = StringIO()
                        osaca.run(args, output_file=output)
@@ -204,17 +222,13 @@ class TestCLI(unittest.TestCase):
        )
        output = StringIO()
        osaca.run(args, output_file=output)
-        self.assertTrue(
+        self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 1)
            output.getvalue().count("WARNING: LCD analysis timed out") == 1
        )
        args = parser.parse_args(
            ["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
        )
        output = StringIO()
        osaca.run(args, output_file=output)
-        self.assertTrue(
+        self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 0)
            output.getvalue().count("WARNING: LCD analysis timed out") == 0
        )
    def test_lines_arg(self):
        # Run tests with --lines option
@@ -227,12 +241,24 @@ class TestCLI(unittest.TestCase):
        args = []
        args.append(
            parser.parse_args(
-                ["--lines", "146-154", "--arch", "csx", self._find_test_file(kernel_x86)]
+                [
                    "--lines",
                    "146-154",
                    "--arch",
                    "csx",
                    self._find_test_file(kernel_x86),
                ]
            )
        )
        args.append(
            parser.parse_args(
-                ["--lines", "146:154", "--arch", "csx", self._find_test_file(kernel_x86)]
+                [
                    "--lines",
                    "146:154",
                    "--arch",
                    "csx",
                    self._find_test_file(kernel_x86),
                ]
            )
        )
        args.append(
--- a/tests/test_db_interface.py
+++ b/tests/test_db_interface.py
@@ -17,7 +17,13 @@ class TestDBInterface(unittest.TestCase):
        sample_entry = {
            "name": "DoItRightAndDoItFast",
            "operands": [
-                {"class": "memory", "offset": "imd", "base": "gpr", "index": "gpr", "scale": 8},
+                {
                    "class": "memory",
                    "offset": "imd",
                    "base": "gpr",
                    "index": "gpr",
                    "scale": 8,
                },
                {"class": "register", "name": "xmm"},
            ],
            "throughput": 1.25,
@@ -35,7 +41,12 @@ class TestDBInterface(unittest.TestCase):
        del self.entry_tx2["operands"][1]["name"]
        self.entry_tx2["operands"][1]["prefix"] = "x"
        # self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
-        self.entry_zen1["port_pressure"] = [[4, "0123"], [1, "4"], [1, "89"], [2, ["8D", "9D"]]]
+        self.entry_zen1["port_pressure"] = [
            [4, "0123"],
            [1, "4"],
            [1, "89"],
            [2, ["8D", "9D"]],
        ]
    ###########
    # Tests
--- a/tests/test_files/kernel_x86_memdep.s
+++ b/tests/test_files/kernel_x86_memdep.s
@@ -1,15 +1,15 @@
 # OSACA-BEGIN
 .L4:
-	vmovsd %xmm0, 8(%rax)
+	vmovsd %xmm0, 8(%rax)         # line 3          <----------------------------------+
-	addq $8, %rax
+	addq $8, %rax                 #                                                    |
-	vmovsd %xmm0, 8(%rax,%rcx,8)
+	vmovsd %xmm0, 8(%rax,%rcx,8)  # line 5          <-----------------------------------------------+
-	vaddsd (%rax), %xmm0, %xmm0  # depends on line 3, 8(%rax) == (%rax+8)
+	vaddsd (%rax), %xmm0, %xmm0         # depends on line 3, 8(%rax) == (%rax+8)    ---+            |
-	subq $-8, %rax
+	subq $-8, %rax                      #                                              |            |
-	vaddsd -8(%rax), %xmm0, %xmm0  # depends on line 3, 8(%rax) == -8(%rax+16)
+	vaddsd -8(%rax), %xmm0, %xmm0       # depends on line 3, 8(%rax) == -8(%rax+16) ---+            |
-	dec %rcx
+	dec %rcx                            #                                                           |
-	vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0  # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
+	vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
-	movq %rcx, %rdx
+	movq %rcx, %rdx                     #                                                           |
-	vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0  # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
+	vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
 	vmulsd %xmm1, %xmm0, %xmm0
 	addq $8, %rax
 	cmpq %rsi, %rax
--- a/tests/test_frontend.py
+++ b/tests/test_frontend.py
@@ -34,7 +34,8 @@ class TestFrontend(unittest.TestCase):
        )
        self.machine_model_tx2 = MachineModel(arch="tx2")
        self.semantics_csx = ArchSemantics(
-            self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml")
+            self.machine_model_csx,
            path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"),
        )
        self.semantics_tx2 = ArchSemantics(
            self.machine_model_tx2,
@@ -71,7 +72,11 @@ class TestFrontend(unittest.TestCase):
    def test_frontend_AArch64(self):
        dg = KernelDG(
-            self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2)
+            self.kernel_AArch64,
            self.parser_AArch64,
            self.machine_model_tx2,
            self.semantics_tx2,
        )
        fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
        fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
        # TODO compare output with checked string
--- a/tests/test_marker_utils.py
+++ b/tests/test_marker_utils.py
@@ -109,7 +109,8 @@ class TestMarkerUtils(unittest.TestCase):
                            kernel_start = len(
                                list(
                                    filter(
-                                        None, (prologue + mov_start_var + bytes_var_1).split("\n")
+                                        None,
                                        (prologue + mov_start_var + bytes_var_1).split("\n"),
                                    )
                                )
                            )
@@ -142,7 +143,12 @@ class TestMarkerUtils(unittest.TestCase):
        epilogue = ".LE9:\t\t#12.2\n" "call    dummy\n"
        kernel_length = len(list(filter(None, kernel.split("\n"))))
-        bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines]
+        bytes_variations = [
            bytes_1_line,
            bytes_2_lines_1,
            bytes_2_lines_2,
            bytes_3_lines,
        ]
        mov_start_variations = [mov_start_1, mov_start_2]
        mov_end_variations = [mov_end_1, mov_end_2]
        # actual tests
@@ -171,7 +177,8 @@ class TestMarkerUtils(unittest.TestCase):
                            kernel_start = len(
                                list(
                                    filter(
-                                        None, (prologue + mov_start_var + bytes_var_1).split("\n")
+                                        None,
                                        (prologue + mov_start_var + bytes_var_1).split("\n"),
                                    )
                                )
                            )
--- a/tests/test_parser_AArch64.py
+++ b/tests/test_parser_AArch64.py
@@ -24,7 +24,9 @@ class TestParserAArch64(unittest.TestCase):
    def test_comment_parser(self):
        self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments")
-        self.assertEqual(self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end")
+        self.assertEqual(
            self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end"
        )
        self.assertEqual(
            self._get_comment(self.parser, "\t//// comment //// comment"),
            "// comment //// comment",
@@ -36,7 +38,8 @@ class TestParserAArch64(unittest.TestCase):
        self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
        self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1")
        self.assertEqual(
-            " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), "label1"
+            " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment),
            "label1",
        )
        with self.assertRaises(ParseException):
            self._get_label(self.parser, "\t.cfi_startproc")
@@ -316,7 +319,8 @@ class TestParserAArch64(unittest.TestCase):
        value1 = self.parser.normalize_imd(imd_decimal_1)
        self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
        self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
+            self.parser.normalize_imd(imd_decimal_2),
            self.parser.normalize_imd(imd_hex_2),
        )
        self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
        self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
--- a/tests/test_parser_x86att.py
+++ b/tests/test_parser_x86att.py
@@ -26,7 +26,8 @@ class TestParserX86ATT(unittest.TestCase):
        self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments")
        self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end")
        self.assertEqual(
-            self._get_comment(self.parser, "\t## comment ## comment"), "# comment ## comment"
+            self._get_comment(self.parser, "\t## comment ## comment"),
            "# comment ## comment",
        )
    def test_label_parser(self):
@@ -35,7 +36,8 @@ class TestParserX86ATT(unittest.TestCase):
        self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
        self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1")
        self.assertEqual(
-            " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), "label1"
+            " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment),
            "label1",
        )
        with self.assertRaises(ParseException):
            self._get_label(self.parser, "\t.cfi_startproc")
@@ -47,7 +49,8 @@ class TestParserX86ATT(unittest.TestCase):
        self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2)
        self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0)
        self.assertEqual(
-            len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
+            len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters),
            2,
        )
        self.assertEqual(
            self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
@@ -62,7 +65,12 @@ class TestParserX86ATT(unittest.TestCase):
                self.parser,
                "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support",
            ).parameters,
-            ["__TEXT", "__eh_frame", "coalesced", "no_toc+strip_static_syms+live_support"],
+            [
                "__TEXT",
                "__eh_frame",
                "coalesced",
                "no_toc+strip_static_syms+live_support",
            ],
        )
        self.assertEqual(
            self._get_directive(
@@ -74,7 +82,9 @@ class TestParserX86ATT(unittest.TestCase):
            self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90"
        )
        self.assertEqual(
-            self._get_directive(self.parser, "        .byte 100,103,144       #IACA START")["name"],
+            self._get_directive(self.parser, "        .byte 100,103,144       #IACA START")[
                "name"
            ],
            "byte",
        )
        self.assertEqual(
@@ -242,10 +252,12 @@ class TestParserX86ATT(unittest.TestCase):
        imd_decimal_2 = {"value": "8"}
        imd_hex_2 = {"value": "8"}
        self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
+            self.parser.normalize_imd(imd_decimal_1),
            self.parser.normalize_imd(imd_hex_1),
        )
        self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
+            self.parser.normalize_imd(imd_decimal_2),
            self.parser.normalize_imd(imd_hex_2),
        )
    def test_reg_dependency(self):
--- a/tests/test_semantics.py
+++ b/tests/test_semantics.py
@@ -11,8 +11,14 @@ from copy import deepcopy
 import networkx as nx
 from osaca.osaca import get_unmatched_instruction_ratio
 from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
-from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics,
+from osaca.semantics import (
-                             KernelDG, MachineModel, reduce_to_section)
+    INSTR_FLAGS,
    ArchSemantics,
    ISASemantics,
    KernelDG,
    MachineModel,
    reduce_to_section,
 )
 class TestSemanticTools(unittest.TestCase):
@@ -66,7 +72,8 @@ class TestSemanticTools(unittest.TestCase):
        )
        cls.semantics_x86 = ISASemantics("x86")
        cls.semantics_csx = ArchSemantics(
-            cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
+            cls.machine_model_csx,
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
        )
        cls.semantics_aarch64 = ISASemantics("aarch64")
        cls.semantics_tx2 = ArchSemantics(
@@ -173,7 +180,12 @@ class TestSemanticTools(unittest.TestCase):
        )
        self.assertEqual(
            test_mm_x86.get_store_throughput(
-                {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": "NOT_NONE", "scale": 1}
+                {
                    "base": {"prefix": "NOT_IN_DB"},
                    "offset": None,
                    "index": "NOT_NONE",
                    "scale": 1,
                }
            ),
            [[1, "23"], [1, "4"]],
        )
@@ -185,7 +197,12 @@ class TestSemanticTools(unittest.TestCase):
        )
        self.assertEqual(
            test_mm_arm.get_store_throughput(
-                {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": None, "scale": 1}
+                {
                    "base": {"prefix": "NOT_IN_DB"},
                    "offset": None,
                    "index": None,
                    "scale": 1,
                }
            ),
            [[1, "34"], [1, "5"]],
        )
@@ -310,7 +327,10 @@ class TestSemanticTools(unittest.TestCase):
    def test_memdependency_x86(self):
        dg = KernelDG(
-            self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx
+            self.kernel_x86_memdep,
            self.parser_x86,
            self.machine_model_csx,
            self.semantics_csx,
        )
        self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
        self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
@@ -322,7 +342,10 @@ class TestSemanticTools(unittest.TestCase):
    def test_kernelDG_AArch64(self):
        dg = KernelDG(
-            self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2
+            self.kernel_AArch64,
            self.parser_AArch64,
            self.machine_model_tx2,
            self.semantics_tx2,
        )
        self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
        self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
@@ -400,7 +423,7 @@ class TestSemanticTools(unittest.TestCase):
        # based on line 6
        self.assertEqual(lc_deps[6]["latency"], 28.0)
        self.assertEqual(
-            [(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
+            [(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
            [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
        )
@@ -423,7 +446,8 @@ class TestSemanticTools(unittest.TestCase):
        # w/o flag dependencies: ID 5 w/ len=1
        # TODO discuss
        self.assertEqual(
-            lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"]
+            lc_deps[lcd_id2]["root"],
            dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
        )
        self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
        self.assertEqual(
@@ -438,7 +462,7 @@ class TestSemanticTools(unittest.TestCase):
            self.parser_x86,
            self.machine_model_csx,
            self.semantics_x86,
-            timeout=10
+            timeout=10,
        )
        end_time = time.perf_counter()
        time_10 = end_time - start_time
@@ -448,7 +472,7 @@ class TestSemanticTools(unittest.TestCase):
            self.parser_x86,
            self.machine_model_csx,
            self.semantics_x86,
-            timeout=2
+            timeout=2,
        )
        end_time = time.perf_counter()
        time_2 = end_time - start_time
--- a/validation/build_and_run.py
+++ b/validation/build_and_run.py
@@ -1,33 +1,26 @@
 #!/usr/bin/env python3
 import sys
 import os
 import re
 from subprocess import check_call, check_output, CalledProcessError, STDOUT
 from itertools import chain
 import shutil
 from functools import lru_cache
 from glob import glob
 from pathlib import Path
 from pprint import pprint
 import socket
 import pickle
 import re
 import shutil
 import socket
 import sys
 from copy import deepcopy
 from glob import glob
 from itertools import chain
 from pathlib import Path
 from subprocess import STDOUT, CalledProcessError, check_call, check_output
 import requests
 import numpy as np
 import pandas as pd
 from osaca.osaca import reduce_to_section
 from kerncraft.models import benchmark
 from kerncraft.incore_model import (
    parse_asm,
    asm_instrumentation,
    iaca_analyse_instrumented_binary,
    llvm_mca_analyse_instrumented_assembly,
    osaca_analyse_instrumented_assembly,
-    llvm_mca_analyse_instrumented_assembly
+    parse_asm,
 )
-
+from kerncraft.models import benchmark
 from osaca.osaca import reduce_to_section
 # Scaling of inner dimension for 1D, 2D and 3D kernels
 #  * consider kernels to be compiled with multiple compilers and different options
@@ -39,37 +32,50 @@ from kerncraft.incore_model import (
 # Collect inner loop body assembly for each kernel/compiler/options combination
 #  * analyze with OSACA, IACA and LLVM-MCA
-hosts_arch_map = {r"skylakesp2": "SKX",
+hosts_arch_map = {
-                  r"ivyep1": "IVB",
+    r"skylakesp2": "SKX",
-                  r"naples1": "ZEN",
+    r"ivyep1": "IVB",
-                  r"rome1": "ZEN2",
+    r"naples1": "ZEN",
-                  r"warmup": "TX2",
+    r"rome1": "ZEN2",
-                  r"qp4-node-[0-9]+": "A64FX"}
+    r"warmup": "TX2",
    r"qp4-node-[0-9]+": "A64FX",
 }
 arch_info = {
-    'SKX': {
+    "SKX": {
-        'prepare': ['likwid-setFrequencies -f 2.4 -t 0'.split()],
+        "prepare": ["likwid-setFrequencies -f 2.4 -t 0".split()],
-        'IACA': 'SKX',
+        "IACA": "SKX",
-        'OSACA': 'SKX',
+        "OSACA": "SKX",
-        'LLVM-MCA': '-mcpu=skylake-avx512',
+        "LLVM-MCA": "-mcpu=skylake-avx512",
-        'Ithemal': 'skl',
+        "Ithemal": "skl",
-        'isa': 'x86',
+        "isa": "x86",
-        'perfevents': [],
+        "perfevents": [],
        "cflags": {
-            'icc': {
+            "icc": {
-                "Ofast": "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
-                "O3": "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
+                    "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
-                "O2": "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
+                    "-ffreestanding -falign-loops"
-                "O1": "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
+                ).split(),
                "O3": (
                    "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
                    "-ffreestanding -falign-loops"
                ).split(),
                "O2": (
                    "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
                    "-ffreestanding -falign-loops"
                ).split(),
                "O1": (
                    "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
                    "-ffreestanding -falign-loops"
                ).split(),
            },
-            'clang': {
+            "clang": {
                "Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(),
                "O3": "-O3 -march=skylake-avx512 -ffreestanding".split(),
                "O2": "-O2 -march=skylake-avx512 -ffreestanding".split(),
                "O1": "-O1 -march=skylake-avx512 -ffreestanding".split(),
            },
-            'gcc': {
+            "gcc": {
                "Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
                "O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
                "O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
@@ -77,17 +83,19 @@ arch_info = {
            },
        },
    },
-    'IVB': {
+    "IVB": {
-        'prepare': ['likwid-setFrequencies -f 3.0 -t 0'.split()],
+        "prepare": ["likwid-setFrequencies -f 3.0 -t 0".split()],
-        'IACA': 'IVB',
+        "IACA": "IVB",
-        'OSACA': 'IVB',
+        "OSACA": "IVB",
-        'LLVM-MCA': '-mcpu=ivybridge',
+        "LLVM-MCA": "-mcpu=ivybridge",
-        'Ithemal': 'ivb',
+        "Ithemal": "ivb",
-        'isa': 'x86',
+        "isa": "x86",
-        'perfevents': [],
+        "perfevents": [],
        "cflags": {
            "icc": {
-                "Ofast": "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
                    "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops"
                ).split(),
                "O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
@@ -106,14 +114,14 @@ arch_info = {
            },
        },
    },
-    'ZEN': {
+    "ZEN": {
-        'prepare': ['likwid-setFrequencies -f 2.3 -t 0'.split()],
+        "prepare": ["likwid-setFrequencies -f 2.3 -t 0".split()],
-        'IACA': None,
+        "IACA": None,
-        'OSACA': 'ZEN1',
+        "OSACA": "ZEN1",
-        'LLVM-MCA': '-mcpu=znver1',
+        "LLVM-MCA": "-mcpu=znver1",
-        'Ithemal': None,
+        "Ithemal": None,
-        'isa': 'x86',
+        "isa": "x86",
-        'perfevents': [],
+        "perfevents": [],
        "cflags": {
            "clang": {
                "Ofast": "-Ofast -march=znver1 -ffreestanding".split(),
@@ -128,21 +136,23 @@ arch_info = {
                "O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(),
            },
            "icc": {
-                "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
                    "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
                ).split(),
                "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
            },
        },
    },
-    'ZEN2': {
+    "ZEN2": {
-        'prepare': ['likwid-setFrequencies -f 2.35 -t 0'.split()],
+        "prepare": ["likwid-setFrequencies -f 2.35 -t 0".split()],
-        'IACA': None,
+        "IACA": None,
-        'OSACA': 'ZEN2',
+        "OSACA": "ZEN2",
-        'LLVM-MCA': '-mcpu=znver2',
+        "LLVM-MCA": "-mcpu=znver2",
-        'Ithemal': None,
+        "Ithemal": None,
-        'isa': 'x86',
+        "isa": "x86",
-        'perfevents': [],
+        "perfevents": [],
        "cflags": {
            "clang": {
                "Ofast": "-Ofast -march=znver2 -ffreestanding".split(),
@@ -157,22 +167,24 @@ arch_info = {
                "O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(),
            },
            "icc": {
-                "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
                    "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
                ).split(),
                "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
            },
        },
    },
-    'TX2': {
+    "TX2": {
-        'Clock [MHz]': 2200,  # reading out via perf. counters is not supported
+        "Clock [MHz]": 2200,  # reading out via perf. counters is not supported
-        'IACA': None,
+        "IACA": None,
-        'OSACA': 'TX2',
+        "OSACA": "TX2",
-        'assign_optimal_throughput': True,
+        "assign_optimal_throughput": True,
-        'LLVM-MCA': '-mcpu=thunderx2t99 -march=aarch64',
+        "LLVM-MCA": "-mcpu=thunderx2t99 -march=aarch64",
-        'Ithemal': None,
+        "Ithemal": None,
-        'isa': 'aarch64',
+        "isa": "aarch64",
-        'perfevents': [],
+        "perfevents": [],
        "cflags": {
            "clang": {
                "Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(),
@@ -188,16 +200,16 @@ arch_info = {
            },
        },
    },
-    'A64FX': {
+    "A64FX": {
-        'Clock [MHz]': 1800,  # reading out via perf. counters is not supported
+        "Clock [MHz]": 1800,  # reading out via perf. counters is not supported
-        'L2_volume_metric': 'L1<->L2 data volume [GBytes]',
+        "L2_volume_metric": "L1<->L2 data volume [GBytes]",
-        'IACA': None,
+        "IACA": None,
-        'OSACA': 'A64FX',
+        "OSACA": "A64FX",
-        'assign_optimal_throughput': False,
+        "assign_optimal_throughput": False,
-        'LLVM-MCA': '-mcpu=a64fx -march=aarch64',
+        "LLVM-MCA": "-mcpu=a64fx -march=aarch64",
-        'Ithemal': None,
+        "Ithemal": None,
-        'isa': 'aarch64',
+        "isa": "aarch64",
-        'perfevents': [],
+        "perfevents": [],
        "cflags": {
            "gcc": {
                "Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(),
@@ -211,7 +223,7 @@ arch_info = {
                "O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
                "O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
            },
-        }
+        },
    },
 }
@@ -231,12 +243,13 @@ def get_kernels(kernels=None):
    if kernels is None:
        kernels = []
        for f in glob("kernels/*.c"):
-            f = f.rsplit('.', 1)[0].split('/', 1)[1]
+            f = f.rsplit(".", 1)[0].split("/", 1)[1]
            if f == "dummy":
                continue
            kernels.append(f)
    return kernels
 # Columns:
 # arch
 # kernel
@@ -259,6 +272,7 @@ def get_kernels(kernels=None):
 # allruns [list (length, repetitions, cy/it, L2 B/it)]
 # perfevents [dict event: counter/it]
 def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True):
    arch = get_current_arch()
    if arch is None:
@@ -268,90 +282,132 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
        islocal = True
        arches = [arch]
        ainfo = arch_info.get(arch)
-        if 'prepare' in ainfo:
+        if "prepare" in ainfo:
-            for cmd in ainfo['prepare']:
+            for cmd in ainfo["prepare"]:
                check_call(cmd)
    for arch in arches:
        ainfo = arch_info.get(arch)
        print(arch)
        data_path = Path(f"build/{arch}/data.pkl")
        if data_path.exists():
-            with data_path.open('rb') as f:
+            with data_path.open("rb") as f:
                data = pickle.load(f)
        else:
            data = []
        data_lastsaved = deepcopy(data)
-        for compiler, compiler_cflags in ainfo['cflags'].items():
+        for compiler, compiler_cflags in ainfo["cflags"].items():
            if not shutil.which(compiler) and islocal:
                print(compiler, "not found in path! Skipping...")
                continue
            for cflags_name, cflags in compiler_cflags.items():
                for kernel in get_kernels():
-                    print(f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
+                    print(
-                        end=": ", flush=True)
+                        f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
-                    row = list([r for r in data
+                        end=": ",
-                                if r['arch'] == arch and r['kernel'] == kernel and
+                        flush=True,
-                                r['compiler'] == compiler and r['cflags_name'] == cflags_name])
+                    )
                    row = list(
                        [
                            r
                            for r in data
                            if r["arch"] == arch
                            and r["kernel"] == kernel
                            and r["compiler"] == compiler
                            and r["cflags_name"] == cflags_name
                        ]
                    )
                    if row:
                        row = row[0]
                    else:
                        orig_row = None
                        row = {
-                            'arch': arch,
+                            "arch": arch,
-                            'kernel': kernel,
+                            "kernel": kernel,
-                            'compiler': compiler,
+                            "compiler": compiler,
-                            'cflags_name': cflags_name,
+                            "cflags_name": cflags_name,
-                            'element_size': 8,
+                            "element_size": 8,
                        }
                        data.append(row)
                    # Build
                    print("build", end="", flush=True)
                    asm_path, exec_path, overwrite = build_kernel(
-                        kernel, arch, compiler, cflags, cflags_name, dontbuild=not islocal)
+                        kernel,
                        arch,
                        compiler,
                        cflags,
                        cflags_name,
                        dontbuild=not islocal,
                    )
                    if overwrite:
                        # clear all measurment information
-                        row['best_length'] = None
+                        row["best_length"] = None
-                        row['best_runtime'] = None
+                        row["best_runtime"] = None
-                        row['L2_traffic'] = None
+                        row["L2_traffic"] = None
-                        row['allruns'] = None
+                        row["allruns"] = None
-                        row['perfevents'] = None
+                        row["perfevents"] = None
                    # Mark for IACA, OSACA and LLVM-MCA
                    print("mark", end="", flush=True)
                    try:
-                        marked_asmfile, marked_objfile, row['pointer_increment'], overwrite = mark(
+                        (
-                            asm_path, compiler, cflags, isa=ainfo['isa'], overwrite=overwrite)
+                            marked_asmfile,
-                        row['marking_error'] = None
+                            marked_objfile,
                            row["pointer_increment"],
                            overwrite,
                        ) = mark(
                            asm_path,
                            compiler,
                            cflags,
                            isa=ainfo["isa"],
                            overwrite=overwrite,
                        )
                        row["marking_error"] = None
                    except ValueError as e:
-                        row['marking_error'] = str(e)
+                        row["marking_error"] = str(e)
                        print(":", e)
                        continue
                    if overwrite:
                        # clear all model generated information
-                        for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
+                        for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
-                            for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
+                            for k in [
-                                row[model+'_'+k] = None
+                                "ports",
                                "prediction",
                                "throughput",
                                "cp",
                                "lcd",
                                "raw",
                            ]:
                                row[model + "_" + k] = None
-                    for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
+                    for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
-                        for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
+                        for k in [
-                            if model+'_'+k not in row:
+                            "ports",
-                                row[model+'_'+k] = None
+                            "prediction",
                            "throughput",
                            "cp",
                            "lcd",
                            "raw",
                        ]:
                            if model + "_" + k not in row:
                                row[model + "_" + k] = None
                    # Analyze with IACA, if requested and configured
-                    if iaca and ainfo['IACA'] is not None:
+                    if iaca and ainfo["IACA"] is not None:
                        print("IACA", end="", flush=True)
-                        if not row.get('IACA_ports'):
+                        if not row.get("IACA_ports"):
-                            row['IACA_raw'] = iaca_analyse_instrumented_binary(
+                            row["IACA_raw"] = iaca_analyse_instrumented_binary(
-                                marked_objfile, micro_architecture=ainfo['IACA'])
+                                marked_objfile, micro_architecture=ainfo["IACA"]
-                            row['IACA_ports'] = \
+                            )
-                                {k: v/(row['pointer_increment']/row['element_size'])
+                            row["IACA_ports"] = {
-                                for k,v in row['IACA_raw']['port cycles'].items()}
+                                k: v / (row["pointer_increment"] / row["element_size"])
-                            row['IACA_prediction'] = row['IACA_raw']['throughput']/(
+                                for k, v in row["IACA_raw"]["port cycles"].items()
-                                row['pointer_increment']/row['element_size'])
+                            }
-                            row['IACA_throughput'] = max(row['IACA_ports'].values())
+                            row["IACA_prediction"] = row["IACA_raw"]["throughput"] / (
                                row["pointer_increment"] / row["element_size"]
                            )
                            row["IACA_throughput"] = max(row["IACA_ports"].values())
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)
@@ -359,56 +415,70 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
                    # Analyze with OSACA, if requested
                    if osaca:
                        print("OSACA", end="", flush=True)
-                        if not row.get('OSACA_ports'):
+                        if not row.get("OSACA_ports"):
-                            row['OSACA_raw'] = osaca_analyse_instrumented_assembly(
+                            row["OSACA_raw"] = osaca_analyse_instrumented_assembly(
-                                marked_asmfile, micro_architecture=ainfo['OSACA'],
+                                marked_asmfile,
-                                assign_optimal_throughput=ainfo.get('assign_optimal_throughput',
+                                micro_architecture=ainfo["OSACA"],
-                                                                    True))
+                                assign_optimal_throughput=ainfo.get(
-                            row['OSACA_ports'] = \
+                                    "assign_optimal_throughput", True
-                                {k: v/(row['pointer_increment']/row['element_size'])
+                                ),
-                                for k,v in row['OSACA_raw']['port cycles'].items()}
+                            )
-                            row['OSACA_prediction'] = row['OSACA_raw']['throughput']/(
+                            row["OSACA_ports"] = {
-                                row['pointer_increment']/row['element_size'])
+                                k: v / (row["pointer_increment"] / row["element_size"])
-                            row['OSACA_throughput'] = max(row['OSACA_ports'].values())
+                                for k, v in row["OSACA_raw"]["port cycles"].items()
-                            row['OSACA_cp'] = row['OSACA_raw']['cp_latency']/(
+                            }
-                                row['pointer_increment']/row['element_size'])
+                            row["OSACA_prediction"] = row["OSACA_raw"]["throughput"] / (
-                            row['OSACA_lcd'] = row['OSACA_raw']['lcd']/(
+                                row["pointer_increment"] / row["element_size"]
-                                row['pointer_increment']/row['element_size'])
+                            )
                            row["OSACA_throughput"] = max(row["OSACA_ports"].values())
                            row["OSACA_cp"] = row["OSACA_raw"]["cp_latency"] / (
                                row["pointer_increment"] / row["element_size"]
                            )
                            row["OSACA_lcd"] = row["OSACA_raw"]["lcd"] / (
                                row["pointer_increment"] / row["element_size"]
                            )
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)
                    # Analyze with LLVM-MCA, if requested and configured
-                    if llvm_mca and ainfo['LLVM-MCA'] is not None:
+                    if llvm_mca and ainfo["LLVM-MCA"] is not None:
                        print("LLVM-MCA", end="", flush=True)
-                        if not row.get('LLVM-MCA_ports'):
+                        if not row.get("LLVM-MCA_ports"):
-                            row['LLVM-MCA_raw'] = llvm_mca_analyse_instrumented_assembly(
+                            row["LLVM-MCA_raw"] = llvm_mca_analyse_instrumented_assembly(
                                marked_asmfile,
-                                micro_architecture=ainfo['LLVM-MCA'],
+                                micro_architecture=ainfo["LLVM-MCA"],
-                                isa=ainfo['isa'])
+                                isa=ainfo["isa"],
-                            row['LLVM-MCA_ports'] = \
+                            )
-                                {k: v/(row['pointer_increment']/row['element_size'])
+                            row["LLVM-MCA_ports"] = {
-                                for k,v in row['LLVM-MCA_raw']['port cycles'].items()}
+                                k: v / (row["pointer_increment"] / row["element_size"])
-                            row['LLVM-MCA_prediction'] =row['LLVM-MCA_raw']['throughput']/(
+                                for k, v in row["LLVM-MCA_raw"]["port cycles"].items()
-                                row['pointer_increment']/row['element_size'])
+                            }
-                            row['LLVM-MCA_throughput'] = max(row['LLVM-MCA_ports'].values())
+                            row["LLVM-MCA_prediction"] = row["LLVM-MCA_raw"]["throughput"] / (
-                            row['LLVM-MCA_cp'] = row['LLVM-MCA_raw']['cp_latency']/(
+                                row["pointer_increment"] / row["element_size"]
-                                row['pointer_increment']/row['element_size'])
+                            )
-                            row['LLVM-MCA_lcd'] = row['LLVM-MCA_raw']['lcd']/(
+                            row["LLVM-MCA_throughput"] = max(row["LLVM-MCA_ports"].values())
-                                row['pointer_increment']/row['element_size'])
+                            row["LLVM-MCA_cp"] = row["LLVM-MCA_raw"]["cp_latency"] / (
                                row["pointer_increment"] / row["element_size"]
                            )
                            row["LLVM-MCA_lcd"] = row["LLVM-MCA_raw"]["lcd"] / (
                                row["pointer_increment"] / row["element_size"]
                            )
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)
                    # Analyze with Ithemal, if not running local and configured
-                    if ainfo['Ithemal'] is not None and not islocal:
+                    if ainfo["Ithemal"] is not None and not islocal:
                        print("Ithemal", end="", flush=True)
-                        if not row.get('Ithemal_prediction'):
+                        if not row.get("Ithemal_prediction"):
                            with open(marked_asmfile) as f:
-                                parsed_code = parse_asm(f.read(), ainfo['isa'])
+                                parsed_code = parse_asm(f.read(), ainfo["isa"])
-                            kernel = reduce_to_section(parsed_code, ainfo['isa'])
+                            kernel = reduce_to_section(parsed_code, ainfo["isa"])
-                            row['Ithemal_prediction'] = get_ithemal_prediction(
+                            row["Ithemal_prediction"] = get_ithemal_prediction(
-                                get_intel_style_code(marked_objfile), model=ainfo['Ithemal'])
+                                get_intel_style_code(marked_objfile),
                                model=ainfo["Ithemal"],
                            )
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)
@@ -416,43 +486,45 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
                    if measurements and islocal:
                        # run measurements if on same hardware
                        print("scale", end="", flush=True)
-                        if not row.get('allruns'):
+                        if not row.get("allruns"):
                            # find best length with concurrent L2 measurement
                            scaling_runs, best = scalingrun(exec_path)
-                            row['best_length'] = best[0]
+                            row["best_length"] = best[0]
-                            row['best_runtime'] = best[2]
+                            row["best_runtime"] = best[2]
-                            row['L2_traffic'] = best[3]
+                            row["L2_traffic"] = best[3]
-                            row['allruns'] = scaling_runs
+                            row["allruns"] = scaling_runs
                            print(f"({best[0]}). ", end="", flush=True)
                        else:
-                            print(f"({row.get('best_length', None)})! ", end="", flush=True)
+                            print(
                                f"({row.get('best_length', None)})! ",
                                end="",
                                flush=True,
                            )
                    print()
                # dump to file
                if data != data_lastsaved:
-                    print('saving... ', end="", flush=True)
+                    print("saving... ", end="", flush=True)
-                    with data_path.open('wb') as f:
+                    with data_path.open("wb") as f:
                        try:
                            pickle.dump(data, f)
                            data_lastsaved = deepcopy(data)
-                            print('saved!')
+                            print("saved!")
                        except KeyboardInterrupt:
                            f.seek(0)
                            pickle.dump(data, f)
-                            print('saved!')
+                            print("saved!")
                            sys.exit()
-
+def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1 * 1024 + 1)):
-def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1)):
+    # print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
-    #print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
+    parameters = chain(*[[total_iterations // i, i] for i in lengths])
    parameters = chain(*[[total_iterations//i, i] for i in lengths])
    # TODO use arch specific events and grooup
-    r, o = perfctr(chain([kernel_exec], map(str, parameters)),
+    r, o = perfctr(chain([kernel_exec], map(str, parameters)), 1, group="L2")
                1, group="L2")
    global_infos = {}
-    for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", l) for l in o]:
+    for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", line) for line in o]:
        if m is not None:
            try:
                v = int(m.group(4))
@@ -464,37 +536,45 @@ def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1
                r[m.group(2)][m.group(3)] = v
    results = []
-    best = (float('inf'), None)
+    best = (float("inf"), None)
    for markername, mmetrics in r.items():
-        kernelname, repetitions, *_, xlength = markername.split('_')
+        kernelname, repetitions, *_, xlength = markername.split("_")
        repetitions = int(repetitions)
        xlength = int(xlength)
-        total_iterations = mmetrics['repetitions'] * mmetrics['iterations']
+        total_iterations = mmetrics["repetitions"] * mmetrics["iterations"]
-        if 'Clock [MHz]' in mmetrics:
+        if "Clock [MHz]" in mmetrics:
-            clock_hz = mmetrics['Clock [MHz]']*1e6
+            clock_hz = mmetrics["Clock [MHz]"] * 1e6
        else:
-            clock_hz = arch_info[get_current_arch()]['Clock [MHz]']*1e6
+            clock_hz = arch_info[get_current_arch()]["Clock [MHz]"] * 1e6
-        cyperit = mmetrics['Runtime (RDTSC) [s]'] * clock_hz / total_iterations
+        cyperit = mmetrics["Runtime (RDTSC) [s]"] * clock_hz / total_iterations
        # TODO use arch specific events and grooup
-        if 'L2D load data volume [GBytes]' in mmetrics:
+        if "L2D load data volume [GBytes]" in mmetrics:
-            l2perit = (mmetrics['L2D load data volume [GBytes]'] +
+            l2perit = (
-                       mmetrics.get('L2D evict data volume [GBytes]', 0))*1e9 / total_iterations
+                (
                    mmetrics["L2D load data volume [GBytes]"]
                    + mmetrics.get("L2D evict data volume [GBytes]", 0)
                )
                * 1e9
                / total_iterations
            )
        else:
-            l2perit = \
+            l2perit = (
-                mmetrics[arch_info[get_current_arch()]['L2_volume_metric']]*1e9 / total_iterations
+                mmetrics[arch_info[get_current_arch()]["L2_volume_metric"]]
-        results.append(
+                * 1e9
-            (xlength, repetitions, cyperit, l2perit)
+                / total_iterations
-        )
+            )
        results.append((xlength, repetitions, cyperit, l2perit))
        if cyperit < best[0]:
            best = cyperit, results[-1]
    return results, best[1]
 def mark(asm_path, compiler, cflags, isa, overwrite=False):
    # Mark assembly for IACA, OSACA and LLVM-MCA
    marked_asm_path = Path(asm_path).with_suffix(".marked.s")
    if not marked_asm_path.exists() or overwrite:
        overwrite = True
-        with open(asm_path) as fa, open(marked_asm_path, 'w') as fm:
+        with open(asm_path) as fa, open(marked_asm_path, "w") as fm:
            try:
                _, pointer_increment = asm_instrumentation(fa, fm, isa=isa)
            except KeyboardInterrupt:
@@ -505,37 +585,46 @@ def mark(asm_path, compiler, cflags, isa, overwrite=False):
        # use maked assembly and extract asm_block and pointer_increment
        with open(marked_asm_path) as f:
            marked_asm = f.read()
-        m = re.search(r'pointer_increment=([0-9]+)', marked_asm)
+        m = re.search(r"pointer_increment=([0-9]+)", marked_asm)
        if m:
            pointer_increment = int(m.group(1))
        else:
            os.unlink(marked_asm_path)
            raise ValueError(
-                "Could not find `pointer_increment=<byte increment>`. Plase place into file.")
+                "Could not find `pointer_increment=<byte increment>`. Plase place into file."
            )
        print("! ", end="", flush=True)
    # Compile marked assembly to object for IACA
    marked_obj = Path(asm_path).with_suffix(".marked.o")
    if not marked_obj.exists():
-        check_call([compiler] + ['-c', str(marked_asm_path), '-o', str(marked_obj)])
+        check_call([compiler] + ["-c", str(marked_asm_path), "-o", str(marked_obj)])
    return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite
-def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=False,
+def build_kernel(
-                 dontbuild=False):
+    kernel,
    architecture,
    compiler,
    cflags,
    cflags_name,
    overwrite=False,
    dontbuild=False,
 ):
    build_path = f"build/{architecture}/{compiler}/{cflags_name}"
    kernel_assembly = f"{build_path}/{kernel}.s"
-    kernel_object= f"{build_path}/{kernel}.o"
+    kernel_object = f"{build_path}/{kernel}.o"
    executable = f"{build_path}/{kernel}"
    Path(build_path).mkdir(parents=True, exist_ok=True)
    if not overwrite:
        # Overwrite if any kernel specific file is missing
        overwrite = (
-            not os.path.exists(kernel_object) or 
+            not os.path.exists(kernel_object)
-            not os.path.exists(kernel_assembly) or
+            or not os.path.exists(kernel_assembly)
-            not os.path.exists(executable))
+            or not os.path.exists(executable)
        )
    if dontbuild and overwrite:
        raise ValueError("Must build, but not allowed.")
@@ -545,31 +634,35 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=
    if not Path(f"{build_path}/compiler_version").exists():
        # Document compiler version
-        with open(f"{build_path}/compiler_version", 'w') as f:
+        with open(f"{build_path}/compiler_version", "w") as f:
-            f.write(check_output([compiler, "-v"], encoding='utf8', stderr=STDOUT))
+            f.write(check_output([compiler, "-v"], encoding="utf8", stderr=STDOUT))
    if overwrite:
        # build object + assembly
-        check_call([compiler] +
+        check_call([compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
-                   cflags +
+        check_call(
-                   ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
+            [compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly]
-        check_call([compiler] +
+        )
                   cflags +
                   ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly])
        # build main and link executable
        executable_cflags = [
            os.environ["LIKWID_DEFINES"],
            os.environ["LIKWID_INC"],
-            os.environ["LIKWID_LIB"]
+            os.environ["LIKWID_LIB"],
-        ] + ['-Ofast']
+        ] + ["-Ofast"]
-        check_call([compiler] + executable_cflags + [
+        check_call(
-            f"{build_path}/dummy.o",
+            [compiler]
-            kernel_object,
+            + executable_cflags
-            "-DMAIN",
+            + [
-            f"kernels/{kernel}.c",
+                f"{build_path}/dummy.o",
-            "-llikwid",
+                kernel_object,
-            "-o", executable])
+                "-DMAIN",
                f"kernels/{kernel}.c",
                "-llikwid",
                "-o",
                executable,
            ]
        )
        print(". ", end="", flush=True)
    else:
        print("! ", end="", flush=True)
@@ -577,7 +670,7 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=
    return kernel_assembly, executable, overwrite
-def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
+def perfctr(cmd, cores, group="MEM", code_markers=True, verbose=0):
    """
    Run *cmd* with likwid-perfctr and returns result as dict.
@@ -586,30 +679,32 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
    if CLI argument cores > 1, running with multi-core, otherwise single-core
    """
    # Making sure likwid-perfctr is available:
-    if benchmark.find_executable('likwid-perfctr') is None:
+    if benchmark.find_executable("likwid-perfctr") is None:
-        print("likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
+        print(
-                file=sys.stderr)
+            "likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
            file=sys.stderr,
        )
        sys.exit(1)
    # FIXME currently only single core measurements support!
-    perf_cmd = ['likwid-perfctr', '-f', '-O', '-g', group]
+    perf_cmd = ["likwid-perfctr", "-f", "-O", "-g", group]
-    cpu = 'S0:0'
+    cpu = "S0:0"
    if cores > 1:
-        cpu += '-'+str(cores-1)
+        cpu += "-" + str(cores - 1)
    # Pinned and measured on cpu
-    perf_cmd += ['-C', cpu]
+    perf_cmd += ["-C", cpu]
    # code must be marked using likwid markers
-    perf_cmd.append('-m')
+    perf_cmd.append("-m")
    perf_cmd += cmd
    if verbose > 1:
-        print(' '.join(perf_cmd))
+        print(" ".join(perf_cmd))
    try:
-        with benchmark.fix_env_variable('OMP_NUM_THREADS', None):
+        with benchmark.fix_env_variable("OMP_NUM_THREADS", None):
-            output = check_output(perf_cmd).decode('utf-8').split('\n')
+            output = check_output(perf_cmd).decode("utf-8").split("\n")
    except CalledProcessError as e:
        print("Executing benchmark failed: {!s}".format(e), file=sys.stderr)
        sys.exit(1)
@@ -626,7 +721,7 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
        m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line)
        if m:
            cur_region_name = m.group(1)
-        line = line.split(',')
+        line = line.split(",")
        try:
            # Metrics
            cur_region_data[line[0]] = float(line[1])
@@ -639,12 +734,13 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
            continue
        try:
            # Event counters
-            if line[2] == '-' or line[2] == 'nan':
+            if line[2] == "-" or line[2] == "nan":
                counter_value = 0
            else:
                counter_value = int(line[2])
-            if re.fullmatch(r'[A-Z0-9_]+', line[0]) and \
+            if re.fullmatch(r"[A-Z0-9_]+", line[0]) and re.fullmatch(
-                    re.fullmatch(r'[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*', line[1]):
+                r"[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*", line[1]
            ):
                cur_region_data.setdefault(line[0], {})
                cur_region_data[line[0]][line[1]] = counter_value
                continue
@@ -659,49 +755,52 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
 def remove_html_tags(text):
-    return re.sub('<.*?>', '', text)
+    return re.sub("<.*?>", "", text)
 def get_intel_style_code(marked_objfile):
    # Disassembl with Intel syntax
-    cmd = ("objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
+    cmd = (
-           "--x86-asm-syntax=intel").split(" ") + [marked_objfile]
+        "objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
        "--x86-asm-syntax=intel"
    ).split(" ") + [marked_objfile]
    asm_raw = check_output(cmd).decode()
-    asm_raw = '\n'.join([l.strip() for l in asm_raw.split('\n')])
+    asm_raw = "\n".join([line.strip() for line in asm_raw.split("\n")])
    kernel_raw = asm_raw[
-        asm_raw.index('mov\tebx, 111\nnop')+len('mov\tebx, 111\nnop') : 
+        asm_raw.index("mov\tebx, 111\nnop")
-        asm_raw.index('mov\tebx, 222\nnop')
+        + len("mov\tebx, 111\nnop") : asm_raw.index("mov\tebx, 222\nnop")
    ]
-    kernel_lines = kernel_raw.split('\n')
+    kernel_lines = kernel_raw.split("\n")
    # Ignore label and jump
-    return '\n'.join(kernel_lines[:-2])
+    return "\n".join(kernel_lines[:-2])
-def get_ithemal_prediction(code, model='skl'):
+def get_ithemal_prediction(code, model="skl"):
    url = "http://3.18.198.23/predict"
-    assert model in ['skl', 'hsw', 'ivb']
+    assert model in ["skl", "hsw", "ivb"]
-    r = requests.post(url, {'code': code, 'model': model})
+    r = requests.post(url, {"code": code, "model": model})
    raw_text = remove_html_tags(r.text)
    m = re.search("Could not generate a prediction: (.*)", raw_text)
    if m:
-        print(" error:", m.group(1).strip(), end=' ')
+        print(" error:", m.group(1).strip(), end=" ")
-        return float('nan')
+        return float("nan")
-    m = re.search("Prediction: ([0-9\.]+) cycles per iteration", raw_text)
+    m = re.search("Prediction: ([0-9.]+) cycles per iteration", raw_text)
    if m:
        return float(m.group(1))
    else:
-        return float('nan')
+        return float("nan")
 def main():
    # Check for correct LLVM-MCA version
    try:
-        llvm_mca = 'LLVM version 12.0.0' in check_output(['llvm-mca', '-version']).decode()
+        llvm_mca = "LLVM version 12.0.0" in check_output(["llvm-mca", "-version"]).decode()
    except FileNotFoundError:
        llvm_mca = False
-    build_mark_run_all_kernels(measurements='--no-measurements' not in sys.argv, llvm_mca=llvm_mca)
+    build_mark_run_all_kernels(measurements="--no-measurements" not in sys.argv, llvm_mca=llvm_mca)
    sys.exit()
 if __name__ == "__main__":
    main()