Added 2 operand types and made changes for attribute usage

2026-01-04 18:20:09 +01:00 · 2023-08-20 21:01:44 +02:00
parent eb09cbde42
commit 0a32c77751
16 changed files with 326 additions and 153 deletions
--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -46,10 +46,10 @@ class ArchSemantics(ISASemantics):
        for idx, instruction_form in enumerate(kernel[start:], start):
            multiple_assignments = False
            # if iform has multiple possible port assignments, check all in a DFS manner and take the best
-            if isinstance(instruction_form["port_uops"], dict):
+            if isinstance(instruction_form.port_uops, dict):
                best_kernel = None
                best_kernel_tp = sys.maxsize
-                for port_util_alt in list(instruction_form["port_uops"].values())[1:]:
+                for port_util_alt in list(instruction_form.port_uops.values())[1:]:
                    k_tmp = deepcopy(kernel)
                    k_tmp[idx]["port_uops"] = deepcopy(port_util_alt)
                    k_tmp[idx]["port_pressure"] = self._machine_model.average_port_pressure(
@@ -62,15 +62,15 @@ class ArchSemantics(ISASemantics):
                        best_kernel_tp = max(self.get_throughput_sum(best_kernel))
                # check the first option in the main branch and compare against the best option later
                multiple_assignments = True
-                kernel[idx]["port_uops"] = list(instruction_form["port_uops"].values())[0]
-            for uop in instruction_form["port_uops"]:
+                kernel[idx]["port_uops"] = list(instruction_form.port_uops.values())[0]
+            for uop in instruction_form.port_uops:
                cycles = uop[0]
                ports = list(uop[1])
                indices = [port_list.index(p) for p in ports]
                # check if port sum of used ports for uop are unbalanced
                port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
                instr_ports = self._to_list(
-                    itemgetter(*indices)(instruction_form["port_pressure"])
+                    itemgetter(*indices)(instruction_form.port_pressure)
                )
                if len(set(port_sums)) > 1:
                    # balance ports
@@ -87,7 +87,7 @@ class ArchSemantics(ISASemantics):
                        differences[max_port_idx] -= INC
                        differences[min_port_idx] += INC
                        # instr_ports = [round(p, 2) for p in instr_ports]
-                        self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports)
+                        self._itemsetter(*indices)(instruction_form.port_pressure, *instr_ports)
                        # check if min port is zero
                        if round(min(instr_ports), 2) <= 0:
                            # if port_pressure is not exactly 0.00, add the residual to
@@ -100,21 +100,21 @@ class ArchSemantics(ISASemantics):
                                # delete it
                                del differences[instr_ports.index(min(instr_ports))]
                                self._itemsetter(*indices)(
-                                    instruction_form["port_pressure"], *instr_ports
+                                    instruction_form.port_pressure, *instr_ports
                                )
                                zero_index = [
                                    p
                                    for p in indices
-                                    if round(instruction_form["port_pressure"][p], 2) == 0
-                                    or instruction_form["port_pressure"][p] < 0.00
+                                    if round(instruction_form.port_pressure[p], 2) == 0
+                                    or instruction_form.port_pressure[p] < 0.00
                                ][0]
-                                instruction_form["port_pressure"][zero_index] = 0.0
+                                instruction_form.port_pressure[zero_index] = 0.0
                            # Remove from further balancing
                            indices = [
-                                p for p in indices if instruction_form["port_pressure"][p] > 0
+                                p for p in indices if instruction_form.port_pressure[p] > 0
                            ]
                            instr_ports = self._to_list(
-                                itemgetter(*indices)(instruction_form["port_pressure"])
+                                itemgetter(*indices)(instruction_form.port_pressure)
                            )
                        # never remove more than the fixed utilization per uop and port, i.e.,
                        # cycles/len(ports)
@@ -124,7 +124,7 @@ class ArchSemantics(ISASemantics):
                            # pressure is not 0
                            del indices[differences.index(min(differences))]
                            instr_ports = self._to_list(
-                                itemgetter(*indices)(instruction_form["port_pressure"])
+                                itemgetter(*indices)(instruction_form.port_pressure)
                            )
                            del differences[differences.index(min(differences))]
                        port_sums = self._to_list(
@@ -139,14 +139,14 @@ class ArchSemantics(ISASemantics):

    def set_hidden_loads(self, kernel):
        """Hide loads behind stores if architecture supports hidden loads (depricated)"""
-        loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr["flags"]]
-        stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr["flags"]]
+        loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr.flags]
+        stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr.flags]
        # Filter instructions including load and store
-        load_ids = [instr["line_number"] for instr in loads]
-        store_ids = [instr["line_number"] for instr in stores]
+        load_ids = [instr.line_number for instr in loads]
+        store_ids = [instr.line_number for instr in stores]
        shared_ldst = list(set(load_ids).intersection(set(store_ids)))
-        loads = [instr for instr in loads if instr["line_number"] not in shared_ldst]
-        stores = [instr for instr in stores if instr["line_number"] not in shared_ldst]
+        loads = [instr for instr in loads if instr.line_number not in shared_ldst]
+        stores = [instr for instr in stores if instr.line_number not in shared_ldst]

        if len(stores) == 0 or len(loads) == 0:
            # nothing to do
@@ -182,35 +182,35 @@ class ArchSemantics(ISASemantics):
        """Assign throughput and latency to an instruction form."""
        flags = []
        port_number = len(self._machine_model["ports"])
-        if instruction_form["instruction"] is None:
+        if instruction_form.instruction is None:
            # No instruction (label, comment, ...) --> ignore
            throughput = 0.0
            latency = 0.0
            latency_wo_load = latency
-            instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
-            instruction_form["port_uops"] = []
+            instruction_form.port_pressure = [0.0 for i in range(port_number)]
+            instruction_form.port_uops = []
        else:
            instruction_data = self._machine_model.get_instruction(
-                instruction_form["instruction"], instruction_form["operands"]
+                instruction_form.instruction, instruction_form.operands
            )
            if (
                not instruction_data
                and self._isa == "x86"
-                and instruction_form["instruction"][-1] in self.GAS_SUFFIXES
+                and instruction_form.instruction[-1] in self.GAS_SUFFIXES
            ):
                # check for instruction without GAS suffix
                instruction_data = self._machine_model.get_instruction(
-                    instruction_form["instruction"][:-1], instruction_form["operands"]
+                    instruction_form.instruction[:-1], instruction_form.operands
                )
            if (
                instruction_data is None
                and self._isa == "aarch64"
-                and "." in instruction_form["instruction"]
+                and "." in instruction_form.instruction
            ):
                # Check for instruction without shape/cc suffix
-                suffix_start = instruction_form["instruction"].index(".")
+                suffix_start = instruction_form.instruction.index(".")
                instruction_data = self._machine_model.get_instruction(
-                    instruction_form["instruction"][:suffix_start], instruction_form["operands"]
+                    instruction_form.instruction[:suffix_start], instruction_form.operands
                )
            if instruction_data:
                # instruction form in DB
@@ -227,33 +227,33 @@ class ArchSemantics(ISASemantics):
                assign_unknown = True
                # check for equivalent register-operands DB entry if LD
                if (
-                    INSTR_FLAGS.HAS_LD in instruction_form["flags"]
-                    or INSTR_FLAGS.HAS_ST in instruction_form["flags"]
+                    INSTR_FLAGS.HAS_LD in instruction_form.flags
+                    or INSTR_FLAGS.HAS_ST in instruction_form.flags
                ):
                    # dynamically combine LD/ST and reg form of instruction form
                    # substitute mem and look for reg-only variant
-                    operands = self.substitute_mem_address(instruction_form["operands"])
+                    operands = self.substitute_mem_address(instruction_form.operands)
                    instruction_data_reg = self._machine_model.get_instruction(
-                        instruction_form["instruction"], operands
+                        instruction_form.instruction, operands
                    )
                    if (
                        not instruction_data_reg
                        and self._isa == "x86"
-                        and instruction_form["instruction"][-1] in self.GAS_SUFFIXES
+                        and instruction_form.instruction[-1] in self.GAS_SUFFIXES
                    ):
                        # check for instruction without GAS suffix
                        instruction_data_reg = self._machine_model.get_instruction(
-                            instruction_form["instruction"][:-1], operands
+                            instruction_form.instruction[:-1], operands
                        )
                    if (
                        instruction_data_reg is None
                        and self._isa == "aarch64"
-                        and "." in instruction_form["instruction"]
+                        and "." in instruction_form.instruction
                    ):
                        # Check for instruction without shape/cc suffix
-                        suffix_start = instruction_form["instruction"].index(".")
+                        suffix_start = instruction_form.instruction.index(".")
                        instruction_data_reg = self._machine_model.get_instruction(
-                            instruction_form["instruction"][:suffix_start], operands
+                            instruction_form.instruction[:suffix_start], operands
                        )
                    if instruction_data_reg:
                        assign_unknown = False
@@ -265,13 +265,13 @@ class ArchSemantics(ISASemantics):
                        dummy_reg = {"class": "register", "name": reg_type}
                        data_port_pressure = [0.0 for _ in range(port_number)]
                        data_port_uops = []
-                        if INSTR_FLAGS.HAS_LD in instruction_form["flags"]:
+                        if INSTR_FLAGS.HAS_LD in instruction_form.flags:
                            # LOAD performance data
                            load_perf_data = self._machine_model.get_load_throughput(
                                [
                                    x["memory"]
-                                    for x in instruction_form["semantic_operands"]["source"]
-                                    + instruction_form["semantic_operands"]["src_dst"]
+                                    for x in instruction_form.semantic_operands["source"]
+                                    + instruction_form.semantic_operands["src_dst"]
                                    if "memory" in x
                                ][0]
                            )
@@ -296,11 +296,11 @@ class ArchSemantics(ISASemantics):
                                    reg_type
                                ]
                                data_port_pressure = [pp * multiplier for pp in data_port_pressure]
-                        if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
+                        if INSTR_FLAGS.HAS_ST in instruction_form.flags:
                            # STORE performance data
                            destinations = (
-                                instruction_form["semantic_operands"]["destination"]
-                                + instruction_form["semantic_operands"]["src_dst"]
+                                instruction_form.semantic_operands["destination"]
+                                + instruction_form.semantic_operands["src_dst"]
                            )
                            store_perf_data = self._machine_model.get_store_throughput(
                                [x["memory"] for x in destinations if "memory" in x][0], dummy_reg
@@ -314,18 +314,18 @@ class ArchSemantics(ISASemantics):
                            if (
                                self._isa == "aarch64"
                                and "memory"
-                                not in instruction_form["semantic_operands"]["destination"]
+                                not in instruction_form.semantic_operands["destination"]
                                and all(
                                    [
                                        "post_indexed" in op["memory"]
                                        or "pre_indexed" in op["memory"]
-                                        for op in instruction_form["semantic_operands"]["src_dst"]
+                                        for op in instruction_form.semantic_operands["src_dst"]
                                        if "memory" in op
                                    ]
                                )
                            ):
                                st_data_port_uops = []
-                                instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)
+                                instruction_form.flags.remove(INSTR_FLAGS.HAS_ST)

                            # sum up all data ports in case for LOAD and STORE
                            st_data_port_pressure = self._machine_model.average_port_pressure(
@@ -349,12 +349,12 @@ class ArchSemantics(ISASemantics):
                        # Add LD and ST latency
                        latency += (
                            self._machine_model.get_load_latency(reg_type)
-                            if INSTR_FLAGS.HAS_LD in instruction_form["flags"]
+                            if INSTR_FLAGS.HAS_LD in instruction_form.flags
                            else 0
                        )
                        latency += (
                            self._machine_model.get_store_latency(reg_type)
-                            if INSTR_FLAGS.HAS_ST in instruction_form["flags"]
+                            if INSTR_FLAGS.HAS_ST in instruction_form.flags
                            else 0
                        )
                        latency_wo_load = instruction_data_reg["latency"]
@@ -367,13 +367,13 @@ class ArchSemantics(ISASemantics):
                        #         [
                        #             'post_indexed' in op['memory'] or
                        #             'pre_indexed' in op['memory']
-                        #             for op in instruction_form['operands']
+                        #             for op in instruction_form.operands
                        #             if 'memory' in op
                        #         ]
                        #     )
                        # ):
                        #     latency_wo_load = 1.0
-                        instruction_form["port_pressure"] = [
+                        instruction_form.port_pressure = [
                            sum(x)
                            for x in zip(
                                data_port_pressure,
@@ -382,7 +382,7 @@ class ArchSemantics(ISASemantics):
                                ),
                            )
                        ]
-                        instruction_form["port_uops"] = list(
+                        instruction_form.port_uops = list(
                            chain(instruction_data_reg["port_pressure"], data_port_uops)
                        )

@@ -391,21 +391,21 @@ class ArchSemantics(ISASemantics):
                    throughput = 0.0
                    latency = 0.0
                    latency_wo_load = latency
-                    instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
-                    instruction_form["port_uops"] = []
+                    instruction_form.port_pressure = [0.0 for i in range(port_number)]
+                    instruction_formport_uops = []
                    flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
        # flatten flag list
        flags = list(set(flags))
-        if "flags" not in instruction_form:
-            instruction_form["flags"] = flags
+        if instruction_form.flags == []:
+            instruction_form.flags = flags
        else:
-            instruction_form["flags"] += flags
-        instruction_form["throughput"] = throughput
-        instruction_form["latency"] = latency
-        instruction_form["latency_wo_load"] = latency_wo_load
+            instruction_form.flags += flags
+        instruction_form.throughput = throughput
+        instruction_form.latency = latency
+        instruction_form.latency_wo_load = latency_wo_load
        # for later CP and loop-carried dependency analysis
-        instruction_form["latency_cp"] = 0
-        instruction_form["latency_lcd"] = 0
+        instruction_form.latency_cp = 0
+        instruction_form.latency_lcd = 0

    def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
        """Apply performance data to instruction if it was found in the archDB"""
@@ -413,11 +413,11 @@ class ArchSemantics(ISASemantics):
        port_pressure = self._machine_model.average_port_pressure(
            instruction_data["port_pressure"]
        )
-        instruction_form["port_uops"] = instruction_data["port_pressure"]
+        instruction_form.port_uops = instruction_data["port_pressure"]
        try:
            assert isinstance(port_pressure, list)
            assert len(port_pressure) == port_number
-            instruction_form["port_pressure"] = port_pressure
+            instruction_form.port_pressure = port_pressure
            if sum(port_pressure) == 0 and throughput is not None:
                # port pressure on all ports 0 --> not bound to a port
                flags.append(INSTR_FLAGS.NOT_BOUND)
@@ -426,8 +426,8 @@ class ArchSemantics(ISASemantics):
                "Port pressure could not be imported correctly from database. "
                + "Please check entry for:\n {}".format(instruction_form)
            )
-            instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
-            instruction_form["port_uops"] = []
+            instruction_form.port_pressure = [0.0 for i in range(port_number)]
+            instruction_form.port_uops = []
            flags.append(INSTR_FLAGS.TP_UNKWN)
        if throughput is None:
            # assume 0 cy and mark as unknown
@@ -440,7 +440,7 @@ class ArchSemantics(ISASemantics):
            latency = 0.0
            latency_wo_load = latency
            flags.append(INSTR_FLAGS.LT_UNKWN)
-        if INSTR_FLAGS.HAS_LD in instruction_form["flags"]:
+        if INSTR_FLAGS.HAS_LD in instruction_form.flags:
            flags.append(INSTR_FLAGS.LD)
        return throughput, port_pressure, latency, latency_wo_load

@@ -489,7 +489,7 @@ class ArchSemantics(ISASemantics):
        """Get the overall throughput sum separated by port of all instructions of a kernel."""
        # ignoring all lines with throughput == 0.0, because there won't be anything to sum up
        # typically comment, label and non-instruction lines
-        port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0]
+        port_pressures = [instr.port_pressure for instr in kernel if instr.throughput != 0.0]
        # Essentially summing up each columns of port_pressures, where each column is one port
        # and each row is one line of the kernel
        # round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput