black formatting

2026-01-07 03:30:06 +01:00 · 2021-10-04 14:33:28 +02:00
parent 566fbc6bc4
commit e6ce870ca0
4 changed files with 176 additions and 64 deletions
--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -46,7 +46,9 @@ class ArchSemantics(ISASemantics):
                ports = list(uop[1])
                indices = [port_list.index(p) for p in ports]
                # check if port sum of used ports for uop are unbalanced
-                port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
+                port_sums = self._to_list(
+                    itemgetter(*indices)(self.get_throughput_sum(kernel))
+                )
                instr_ports = self._to_list(
                    itemgetter(*indices)(instruction_form["port_pressure"])
                )
@@ -65,7 +67,9 @@ class ArchSemantics(ISASemantics):
                        differences[max_port_idx] -= INC
                        differences[min_port_idx] += INC
                        # instr_ports = [round(p, 2) for p in instr_ports]
-                        self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports)
+                        self._itemsetter(*indices)(
+                            instruction_form["port_pressure"], *instr_ports
+                        )
                        # check if min port is zero
                        if round(min(instr_ports), 2) <= 0:
                            # if port_pressure is not exactly 0.00, add the residual to
@@ -83,12 +87,15 @@ class ArchSemantics(ISASemantics):
                                zero_index = [
                                    p
                                    for p in indices
-                                    if round(instruction_form["port_pressure"][p], 2) == 0
+                                    if round(instruction_form["port_pressure"][p], 2)
+                                    == 0
                                ][0]
                                instruction_form["port_pressure"][zero_index] = 0.0
                            # Remove from further balancing
                            indices = [
-                                p for p in indices if instruction_form["port_pressure"][p] > 0
+                                p
+                                for p in indices
+                                if instruction_form["port_pressure"][p] > 0
                            ]
                            instr_ports = self._to_list(
                                itemgetter(*indices)(instruction_form["port_pressure"])
@@ -141,9 +148,11 @@ class ArchSemantics(ISASemantics):
                        if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"]
                    ]
                )
-                load = [instr for instr in kernel if instr["line_number"] == min_distance_load[1]][
-                    0
-                ]
+                load = [
+                    instr
+                    for instr in kernel
+                    if instr["line_number"] == min_distance_load[1]
+                ][0]
                # Hide load
                load["flags"] += [INSTR_FLAGS.HIDDEN_LD]
                load["port_pressure"] = self._nullify_data_ports(load["port_pressure"])
@@ -221,27 +230,39 @@ class ArchSemantics(ISASemantics):
                            data_port_uops = self._machine_model.get_load_throughput(
                                [
                                    x["memory"]
-                                    for x in instruction_form["semantic_operands"]["source"]
+                                    for x in instruction_form["semantic_operands"][
+                                        "source"
+                                    ]
                                    + instruction_form["semantic_operands"]["src_dst"]
                                    if "memory" in x
                                ][0]
                            )
-                            data_port_pressure = self._machine_model.average_port_pressure(
-                                data_port_uops
+                            data_port_pressure = (
+                                self._machine_model.average_port_pressure(
+                                    data_port_uops
+                                )
                            )
                            if "load_throughput_multiplier" in self._machine_model:
-                                multiplier = self._machine_model["load_throughput_multiplier"][
-                                    reg_type
+                                multiplier = self._machine_model[
+                                    "load_throughput_multiplier"
+                                ][reg_type]
+                                data_port_pressure = [
+                                    pp * multiplier for pp in data_port_pressure
                                ]
-                                data_port_pressure = [pp * multiplier for pp in data_port_pressure]
                        if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
                            # STORE performance data
                            destinations = (
                                instruction_form["semantic_operands"]["destination"]
                                + instruction_form["semantic_operands"]["src_dst"]
                            )
-                            st_data_port_uops = self._machine_model.get_store_throughput(
-                                [x["memory"] for x in destinations if "memory" in x][0]
+                            st_data_port_uops = (
+                                self._machine_model.get_store_throughput(
+                                    [
+                                        x["memory"]
+                                        for x in destinations
+                                        if "memory" in x
+                                    ][0]
+                                )
                            )
                            # zero data port pressure and remove HAS_ST flag if
                            #   - no mem operand in dst &&
@@ -250,12 +271,16 @@ class ArchSemantics(ISASemantics):
                            if (
                                self._isa == "aarch64"
                                and "memory"
-                                not in instruction_form["semantic_operands"]["destination"]
+                                not in instruction_form["semantic_operands"][
+                                    "destination"
+                                ]
                                and all(
                                    [
                                        "post_indexed" in op["memory"]
                                        or "pre_indexed" in op["memory"]
-                                        for op in instruction_form["semantic_operands"]["src_dst"]
+                                        for op in instruction_form["semantic_operands"][
+                                            "src_dst"
+                                        ]
                                        if "memory" in op
                                    ]
                                )
@@ -264,18 +289,21 @@ class ArchSemantics(ISASemantics):
                                instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)

                            # sum up all data ports in case for LOAD and STORE
-                            st_data_port_pressure = self._machine_model.average_port_pressure(
-                                st_data_port_uops
+                            st_data_port_pressure = (
+                                self._machine_model.average_port_pressure(
+                                    st_data_port_uops
+                                )
                            )
                            if "store_throughput_multiplier" in self._machine_model:
-                                multiplier = self._machine_model["store_throughput_multiplier"][
-                                    reg_type
-                                ]
+                                multiplier = self._machine_model[
+                                    "store_throughput_multiplier"
+                                ][reg_type]
                                st_data_port_pressure = [
                                    pp * multiplier for pp in st_data_port_pressure
                                ]
                            data_port_pressure = [
-                                sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
+                                sum(x)
+                                for x in zip(data_port_pressure, st_data_port_pressure)
                            ]
                            data_port_uops += st_data_port_uops
                        throughput = max(
@@ -327,7 +355,9 @@ class ArchSemantics(ISASemantics):
                    throughput = 0.0
                    latency = 0.0
                    latency_wo_load = latency
-                    instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
+                    instruction_form["port_pressure"] = [
+                        0.0 for i in range(port_number)
+                    ]
                    instruction_form["port_uops"] = []
                    flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
        # flatten flag list
@@ -343,7 +373,9 @@ class ArchSemantics(ISASemantics):
        instruction_form["latency_cp"] = 0
        instruction_form["latency_lcd"] = 0

-    def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
+    def _handle_instruction_found(
+        self, instruction_data, port_number, instruction_form, flags
+    ):
        """Apply performance data to instruction if it was found in the archDB"""
        throughput = instruction_data["throughput"]
        port_pressure = self._machine_model.average_port_pressure(
@@ -425,7 +457,9 @@ class ArchSemantics(ISASemantics):
        """Get the overall throughput sum separated by port of all instructions of a kernel."""
        # ignoring all lines with throughput == 0.0, because there won't be anything to sum up
        # typically comment, label and non-instruction lines
-        port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0]
+        port_pressures = [
+            instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0
+        ]
        # Essentially summing up each columns of port_pressures, where each column is one port
        # and each row is one line of the kernel
        # round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -18,7 +18,9 @@ from ruamel.yaml.compat import StringIO

 class MachineModel(object):
    WILDCARD = "*"
-    INTERNAL_VERSION = 1  # increase whenever self._data format changes to invalidate cache!
+    INTERNAL_VERSION = (
+        1  # increase whenever self._data format changes to invalidate cache!
+    )
    _runtime_cache = {}

    def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
@@ -43,7 +45,9 @@ class MachineModel(object):
                        "scale": s,
                        "port_pressure": [],
                    }
-                    for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
+                    for b, i, o, s in product(
+                        ["gpr"], ["gpr", None], ["imd", None], [1, 8]
+                    )
                ],
                "load_throughput_default": [],
                "store_throughput": [],
@@ -85,7 +89,9 @@ class MachineModel(object):
                        self._data["instruction_forms"] = []
                # separate multi-alias instruction forms
                for entry in [
-                    x for x in self._data["instruction_forms"] if isinstance(x["name"], list)
+                    x
+                    for x in self._data["instruction_forms"]
+                    if isinstance(x["name"], list)
                ]:
                    for name in entry["name"]:
                        new_entry = {"name": name}
@@ -133,7 +139,9 @@ class MachineModel(object):
                instruction_form
                for instruction_form in name_matched_iforms
                if self._match_operands(
-                    instruction_form["operands"] if "operands" in instruction_form else [],
+                    instruction_form["operands"]
+                    if "operands" in instruction_form
+                    else [],
                    operands,
                )
            )
@@ -215,11 +223,19 @@ class MachineModel(object):

    def get_load_latency(self, reg_type):
        """Return load latency for given register type."""
-        return self._data["load_latency"][reg_type] if self._data["load_latency"][reg_type] else 0
+        return (
+            self._data["load_latency"][reg_type]
+            if self._data["load_latency"][reg_type]
+            else 0
+        )

    def get_load_throughput(self, memory):
        """Return load thorughput for given register type."""
-        ld_tp = [m for m in self._data["load_throughput"] if self._match_mem_entries(memory, m)]
+        ld_tp = [
+            m
+            for m in self._data["load_throughput"]
+            if self._match_mem_entries(memory, m)
+        ]
        if len(ld_tp) > 0:
            return ld_tp[0]["port_pressure"].copy()
        return self._data["load_throughput_default"].copy()
@@ -231,7 +247,11 @@ class MachineModel(object):

    def get_store_throughput(self, memory):
        """Return store throughput for given register type."""
-        st_tp = [m for m in self._data["store_throughput"] if self._match_mem_entries(memory, m)]
+        st_tp = [
+            m
+            for m in self._data["store_throughput"]
+            if self._match_mem_entries(memory, m)
+        ]
        if len(st_tp) > 0:
            return st_tp[0]["port_pressure"].copy()
        return self._data["store_throughput_default"].copy()
@@ -299,7 +319,9 @@ class MachineModel(object):
        formatted_instruction_forms = deepcopy(self._data["instruction_forms"])
        for instruction_form in formatted_instruction_forms:
            if instruction_form["port_pressure"] is not None:
-                cs = ruamel.yaml.comments.CommentedSeq(instruction_form["port_pressure"])
+                cs = ruamel.yaml.comments.CommentedSeq(
+                    instruction_form["port_pressure"]
+                )
                cs.fa.set_flow_style()
                instruction_form["port_pressure"] = cs

@@ -349,7 +371,9 @@ class MachineModel(object):
        hexhash = hashlib.sha256(p.read_bytes()).hexdigest()

        # 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
-        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
+        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
+            ".pickle"
+        )
        if companion_cachefile.exists():
            # companion file (must be up-to-date, due to equal hash)
            with companion_cachefile.open("rb") as f:
@@ -358,7 +382,9 @@ class MachineModel(object):
                return data

        # 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
-        home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(".pickle")
+        home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(
+            ".pickle"
+        )
        if home_cachefile.exists():
            # home file (must be up-to-date, due to equal hash)
            with home_cachefile.open("rb") as f:
@@ -377,7 +403,9 @@ class MachineModel(object):
        p = Path(filepath)
        hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
        # 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
-        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
+        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
+            ".pickle"
+        )
        if os.access(str(companion_cachefile.parent), os.W_OK):
            with companion_cachefile.open("wb") as f:
                pickle.dump(self._data, f)
@@ -421,7 +449,9 @@ class MachineModel(object):
                operand_string += operand["prefix"]
                operand_string += operand["shape"] if "shape" in operand else ""
            elif "name" in operand:
-                operand_string += "r" if operand["name"] == "gpr" else operand["name"][0]
+                operand_string += (
+                    "r" if operand["name"] == "gpr" else operand["name"][0]
+                )
        elif opclass == "memory":
            # Memory
            operand_string += "m"
@@ -584,7 +614,9 @@ class MachineModel(object):
        if "register" in operand:
            if i_operand["class"] != "register":
                return False
-            return self._is_x86_reg_type(i_operand, operand["register"], consider_masking=False)
+            return self._is_x86_reg_type(
+                i_operand, operand["register"], consider_masking=False
+            )
        # memory
        if "memory" in operand:
            if i_operand["class"] != "memory":
@@ -632,7 +664,8 @@ class MachineModel(object):
            return False
        if "shape" in reg:
            if "shape" in i_reg and (
-                reg["shape"] == i_reg["shape"] or self.WILDCARD in (reg["shape"] + i_reg["shape"])
+                reg["shape"] == i_reg["shape"]
+                or self.WILDCARD in (reg["shape"] + i_reg["shape"])
            ):
                return True
            return False
@@ -662,7 +695,8 @@ class MachineModel(object):
                        if (
                            (
                                "mask" in reg
-                                and reg["mask"].rstrip(string.digits).lower() == i_reg.get("mask")
+                                and reg["mask"].rstrip(string.digits).lower()
+                                == i_reg.get("mask")
                            )
                            or reg.get("mask") == self.WILDCARD
                            or i_reg.get("mask") == self.WILDCARD
--- a/osaca/semantics/kernel_dg.py
+++ b/osaca/semantics/kernel_dg.py
@@ -54,7 +54,9 @@ class KernelDG(nx.DiGraph):
        dg = nx.DiGraph()
        for i, instruction_form in enumerate(kernel):
            dg.add_node(instruction_form["line_number"])
-            dg.nodes[instruction_form["line_number"]]["instruction_form"] = instruction_form
+            dg.nodes[instruction_form["line_number"]][
+                "instruction_form"
+            ] = instruction_form
            # add load as separate node if existent
            if (
                INSTR_FLAGS.HAS_LD in instruction_form["flags"]
@@ -69,12 +71,16 @@ class KernelDG(nx.DiGraph):
                dg.add_edge(
                    instruction_form["line_number"] + 0.1,
                    instruction_form["line_number"],
-                    latency=instruction_form["latency"] - instruction_form["latency_wo_load"],
+                    latency=instruction_form["latency"]
+                    - instruction_form["latency_wo_load"],
                )
-            for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]):
+            for dep, dep_flags in self.find_depending(
+                instruction_form, kernel[i + 1 :]
+            ):
                edge_weight = (
                    instruction_form["latency"]
-                    if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form
+                    if "mem_dep" in dep_flags
+                    or "latency_wo_load" not in instruction_form
                    else instruction_form["latency_wo_load"]
                )
                if "storeload_dep" in dep_flags:
@@ -306,7 +312,9 @@ class KernelDG(nx.DiGraph):
                    # store to same location (presumed)
                    if self.is_memstore(dst.memory, instr_form, register_changes):
                        break
-                self._update_reg_changes(instr_form, register_changes, only_postindexed=True)
+                self._update_reg_changes(
+                    instr_form, register_changes, only_postindexed=True
+                )

    def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False):
        if self.arch_sem is None:
@@ -314,7 +322,9 @@ class KernelDG(nx.DiGraph):
            return {}
        if reg_state is None:
            reg_state = {}
-        for reg, change in self.arch_sem.get_reg_changes(iform, only_postindexed).items():
+        for reg, change in self.arch_sem.get_reg_changes(
+            iform, only_postindexed
+        ).items():
            if change is None or reg_state.get(reg, {}) is None:
                reg_state[reg] = None
            else:
@@ -352,15 +362,23 @@ class KernelDG(nx.DiGraph):
            instruction_form.semantic_operands.src_dst,
        ):
            if "register" in src:
-                is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
+                is_read = (
+                    self.parser.is_reg_dependend_of(register, src.register) or is_read
+                )
            if "flag" in src:
-                is_read = self.parser.is_flag_dependend_of(register, src.flag) or is_read
+                is_read = (
+                    self.parser.is_flag_dependend_of(register, src.flag) or is_read
+                )
            if "memory" in src:
                if src.memory.base is not None:
-                    is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
+                    is_read = (
+                        self.parser.is_reg_dependend_of(register, src.memory.base)
+                        or is_read
+                    )
                if src.memory.index is not None:
                    is_read = (
-                        self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
+                        self.parser.is_reg_dependend_of(register, src.memory.index)
+                        or is_read
                    )
        # Check also if read in destination memory address
        for dst in chain(
@@ -369,10 +387,14 @@ class KernelDG(nx.DiGraph):
        ):
            if "memory" in dst:
                if dst.memory.base is not None:
-                    is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
+                    is_read = (
+                        self.parser.is_reg_dependend_of(register, dst.memory.base)
+                        or is_read
+                    )
                if dst.memory.index is not None:
                    is_read = (
-                        self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
+                        self.parser.is_reg_dependend_of(register, dst.memory.index)
+                        or is_read
                    )
        return is_read

@@ -421,7 +443,10 @@ class KernelDG(nx.DiGraph):
                if mem.scale != src.scale:
                    # scale factors do not match
                    continue
-                if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
+                if (
+                    mem.index.get("prefix", "") + mem.index["name"]
+                    != index_change["name"]
+                ):
                    # index registers do not match
                    continue
                addr_change += index_change["value"] * src.scale
@@ -443,13 +468,19 @@ class KernelDG(nx.DiGraph):
            instruction_form.semantic_operands.src_dst,
        ):
            if "register" in dst:
-                is_written = self.parser.is_reg_dependend_of(register, dst.register) or is_written
+                is_written = (
+                    self.parser.is_reg_dependend_of(register, dst.register)
+                    or is_written
+                )
            if "flag" in dst:
-                is_written = self.parser.is_flag_dependend_of(register, dst.flag) or is_written
+                is_written = (
+                    self.parser.is_flag_dependend_of(register, dst.flag) or is_written
+                )
            if "memory" in dst:
                if "pre_indexed" in dst.memory or "post_indexed" in dst.memory:
                    is_written = (
-                        self.parser.is_reg_dependend_of(register, dst.memory.base) or is_written
+                        self.parser.is_reg_dependend_of(register, dst.memory.base)
+                        or is_written
                    )
        # Check also for possible pre- or post-indexing in memory addresses
        for src in chain(
@@ -459,7 +490,8 @@ class KernelDG(nx.DiGraph):
            if "memory" in src:
                if "pre_indexed" in src.memory or "post_indexed" in src.memory:
                    is_written = (
-                        self.parser.is_reg_dependend_of(register, src.memory.base) or is_written
+                        self.parser.is_reg_dependend_of(register, src.memory.base)
+                        or is_written
                    )
        return is_written

@@ -490,7 +522,9 @@ class KernelDG(nx.DiGraph):
        lcd = self.get_loopcarried_dependencies()
        lcd_line_numbers = {}
        for dep in lcd:
-            lcd_line_numbers[dep] = [x["line_number"] for x, lat in lcd[dep]["dependencies"]]
+            lcd_line_numbers[dep] = [
+                x["line_number"] for x, lat in lcd[dep]["dependencies"]
+            ]
        # add color scheme
        graph.graph["node"] = {"colorscheme": "accent8"}
        graph.graph["edge"] = {"colorscheme": "accent8"}
@@ -501,7 +535,9 @@ class KernelDG(nx.DiGraph):
            max_line_number = max(lcd_line_numbers[dep])
            graph.add_edge(max_line_number, min_line_number)
            graph.edges[max_line_number, min_line_number]["latency"] = [
-                lat for x, lat in lcd[dep]["dependencies"] if x["line_number"] == max_line_number
+                lat
+                for x, lat in lcd[dep]["dependencies"]
+                if x["line_number"] == max_line_number
            ]

        # add label to edges
@@ -510,7 +546,9 @@ class KernelDG(nx.DiGraph):

        # add CP values to graph
        for n in cp:
-            graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n["latency_cp"]
+            graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n[
+                "latency_cp"
+            ]

        # color CP and LCD
        for n in graph.nodes:
@@ -530,7 +568,8 @@ class KernelDG(nx.DiGraph):
        for e in graph.edges:
            if (
                graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers
-                and graph.nodes[e[1]]["instruction_form"]["line_number"] in cp_line_numbers
+                and graph.nodes[e[1]]["instruction_form"]["line_number"]
+                in cp_line_numbers
                and e[0] < e[1]
            ):
                bold_edge = True
@@ -542,7 +581,8 @@ class KernelDG(nx.DiGraph):
                    graph.edges[e]["penwidth"] = 3
            for dep in lcd_line_numbers:
                if (
-                    graph.nodes[e[0]]["instruction_form"]["line_number"] in lcd_line_numbers[dep]
+                    graph.nodes[e[0]]["instruction_form"]["line_number"]
+                    in lcd_line_numbers[dep]
                    and graph.nodes[e[1]]["instruction_form"]["line_number"]
                    in lcd_line_numbers[dep]
                ):
--- a/osaca/semantics/marker_utils.py
+++ b/osaca/semantics/marker_utils.py
@@ -133,7 +133,11 @@ def find_marked_section(
    index_end = -1
    for i, line in enumerate(lines):
        try:
-            if line.instruction is None and comments is not None and line.comment is not None:
+            if (
+                line.instruction is None
+                and comments is not None
+                and line.comment is not None
+            ):
                if comments["start"] == line.comment:
                    index_start = i + 1
                elif comments["end"] == line.comment: