fixed formatting with correct line length

2026-01-05 02:30:08 +01:00 · 2021-10-04 15:00:17 +02:00
parent e6ce870ca0
commit 5205cb5cc6
5 changed files with 76 additions and 208 deletions
--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -46,9 +46,7 @@ class ArchSemantics(ISASemantics):
                ports = list(uop[1])
                indices = [port_list.index(p) for p in ports]
                # check if port sum of used ports for uop are unbalanced
-                port_sums = self._to_list(
+                port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
                    itemgetter(*indices)(self.get_throughput_sum(kernel))
                )
                instr_ports = self._to_list(
                    itemgetter(*indices)(instruction_form["port_pressure"])
                )
@@ -67,9 +65,7 @@ class ArchSemantics(ISASemantics):
                        differences[max_port_idx] -= INC
                        differences[min_port_idx] += INC
                        # instr_ports = [round(p, 2) for p in instr_ports]
-                        self._itemsetter(*indices)(
+                        self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports)
                            instruction_form["port_pressure"], *instr_ports
                        )
                        # check if min port is zero
                        if round(min(instr_ports), 2) <= 0:
                            # if port_pressure is not exactly 0.00, add the residual to
@@ -87,15 +83,12 @@ class ArchSemantics(ISASemantics):
                                zero_index = [
                                    p
                                    for p in indices
-                                    if round(instruction_form["port_pressure"][p], 2)
+                                    if round(instruction_form["port_pressure"][p], 2) == 0
                                    == 0
                                ][0]
                                instruction_form["port_pressure"][zero_index] = 0.0
                            # Remove from further balancing
                            indices = [
-                                p
+                                p for p in indices if instruction_form["port_pressure"][p] > 0
                                for p in indices
                                if instruction_form["port_pressure"][p] > 0
                            ]
                            instr_ports = self._to_list(
                                itemgetter(*indices)(instruction_form["port_pressure"])
@@ -148,11 +141,9 @@ class ArchSemantics(ISASemantics):
                        if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"]
                    ]
                )
-                load = [
+                load = [instr for instr in kernel if instr["line_number"] == min_distance_load[1]][
-                    instr
+                    0
-                    for instr in kernel
+                ]
                    if instr["line_number"] == min_distance_load[1]
                ][0]
                # Hide load
                load["flags"] += [INSTR_FLAGS.HIDDEN_LD]
                load["port_pressure"] = self._nullify_data_ports(load["port_pressure"])
@@ -230,39 +221,27 @@ class ArchSemantics(ISASemantics):
                            data_port_uops = self._machine_model.get_load_throughput(
                                [
                                    x["memory"]
-                                    for x in instruction_form["semantic_operands"][
+                                    for x in instruction_form["semantic_operands"]["source"]
                                        "source"
                                    ]
                                    + instruction_form["semantic_operands"]["src_dst"]
                                    if "memory" in x
                                ][0]
                            )
-                            data_port_pressure = (
+                            data_port_pressure = self._machine_model.average_port_pressure(
-                                self._machine_model.average_port_pressure(
+                                data_port_uops
                                    data_port_uops
                                )
                            )
                            if "load_throughput_multiplier" in self._machine_model:
-                                multiplier = self._machine_model[
+                                multiplier = self._machine_model["load_throughput_multiplier"][
-                                    "load_throughput_multiplier"
+                                    reg_type
                                ][reg_type]
                                data_port_pressure = [
                                    pp * multiplier for pp in data_port_pressure
                                ]
                                data_port_pressure = [pp * multiplier for pp in data_port_pressure]
                        if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
                            # STORE performance data
                            destinations = (
                                instruction_form["semantic_operands"]["destination"]
                                + instruction_form["semantic_operands"]["src_dst"]
                            )
-                            st_data_port_uops = (
+                            st_data_port_uops = self._machine_model.get_store_throughput(
-                                self._machine_model.get_store_throughput(
+                                [x["memory"] for x in destinations if "memory" in x][0]
                                    [
                                        x["memory"]
                                        for x in destinations
                                        if "memory" in x
                                    ][0]
                                )
                            )
                            # zero data port pressure and remove HAS_ST flag if
                            #   - no mem operand in dst &&
@@ -271,16 +250,12 @@ class ArchSemantics(ISASemantics):
                            if (
                                self._isa == "aarch64"
                                and "memory"
-                                not in instruction_form["semantic_operands"][
+                                not in instruction_form["semantic_operands"]["destination"]
                                    "destination"
                                ]
                                and all(
                                    [
                                        "post_indexed" in op["memory"]
                                        or "pre_indexed" in op["memory"]
-                                        for op in instruction_form["semantic_operands"][
+                                        for op in instruction_form["semantic_operands"]["src_dst"]
                                            "src_dst"
                                        ]
                                        if "memory" in op
                                    ]
                                )
@@ -289,21 +264,18 @@ class ArchSemantics(ISASemantics):
                                instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)
                            # sum up all data ports in case for LOAD and STORE
-                            st_data_port_pressure = (
+                            st_data_port_pressure = self._machine_model.average_port_pressure(
-                                self._machine_model.average_port_pressure(
+                                st_data_port_uops
                                    st_data_port_uops
                                )
                            )
                            if "store_throughput_multiplier" in self._machine_model:
-                                multiplier = self._machine_model[
+                                multiplier = self._machine_model["store_throughput_multiplier"][
-                                    "store_throughput_multiplier"
+                                    reg_type
-                                ][reg_type]
+                                ]
                                st_data_port_pressure = [
                                    pp * multiplier for pp in st_data_port_pressure
                                ]
                            data_port_pressure = [
-                                sum(x)
+                                sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
                                for x in zip(data_port_pressure, st_data_port_pressure)
                            ]
                            data_port_uops += st_data_port_uops
                        throughput = max(
@@ -355,9 +327,7 @@ class ArchSemantics(ISASemantics):
                    throughput = 0.0
                    latency = 0.0
                    latency_wo_load = latency
-                    instruction_form["port_pressure"] = [
+                    instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
                        0.0 for i in range(port_number)
                    ]
                    instruction_form["port_uops"] = []
                    flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
        # flatten flag list
@@ -373,9 +343,7 @@ class ArchSemantics(ISASemantics):
        instruction_form["latency_cp"] = 0
        instruction_form["latency_lcd"] = 0
-    def _handle_instruction_found(
+    def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
        self, instruction_data, port_number, instruction_form, flags
    ):
        """Apply performance data to instruction if it was found in the archDB"""
        throughput = instruction_data["throughput"]
        port_pressure = self._machine_model.average_port_pressure(
@@ -457,9 +425,7 @@ class ArchSemantics(ISASemantics):
        """Get the overall throughput sum separated by port of all instructions of a kernel."""
        # ignoring all lines with throughput == 0.0, because there won't be anything to sum up
        # typically comment, label and non-instruction lines
-        port_pressures = [
+        port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0]
            instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0
        ]
        # Essentially summing up each columns of port_pressures, where each column is one port
        # and each row is one line of the kernel
        # round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -18,9 +18,7 @@ from ruamel.yaml.compat import StringIO
 class MachineModel(object):
    WILDCARD = "*"
-    INTERNAL_VERSION = (
+    INTERNAL_VERSION = 1  # increase whenever self._data format changes to invalidate cache!
        1  # increase whenever self._data format changes to invalidate cache!
    )
    _runtime_cache = {}
    def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
@@ -45,9 +43,7 @@ class MachineModel(object):
                        "scale": s,
                        "port_pressure": [],
                    }
-                    for b, i, o, s in product(
+                    for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
                        ["gpr"], ["gpr", None], ["imd", None], [1, 8]
                    )
                ],
                "load_throughput_default": [],
                "store_throughput": [],
@@ -89,9 +85,7 @@ class MachineModel(object):
                        self._data["instruction_forms"] = []
                # separate multi-alias instruction forms
                for entry in [
-                    x
+                    x for x in self._data["instruction_forms"] if isinstance(x["name"], list)
                    for x in self._data["instruction_forms"]
                    if isinstance(x["name"], list)
                ]:
                    for name in entry["name"]:
                        new_entry = {"name": name}
@@ -139,9 +133,7 @@ class MachineModel(object):
                instruction_form
                for instruction_form in name_matched_iforms
                if self._match_operands(
-                    instruction_form["operands"]
+                    instruction_form["operands"] if "operands" in instruction_form else [],
                    if "operands" in instruction_form
                    else [],
                    operands,
                )
            )
@@ -223,19 +215,11 @@ class MachineModel(object):
    def get_load_latency(self, reg_type):
        """Return load latency for given register type."""
-        return (
+        return self._data["load_latency"][reg_type] if self._data["load_latency"][reg_type] else 0
            self._data["load_latency"][reg_type]
            if self._data["load_latency"][reg_type]
            else 0
        )
    def get_load_throughput(self, memory):
        """Return load thorughput for given register type."""
-        ld_tp = [
+        ld_tp = [m for m in self._data["load_throughput"] if self._match_mem_entries(memory, m)]
            m
            for m in self._data["load_throughput"]
            if self._match_mem_entries(memory, m)
        ]
        if len(ld_tp) > 0:
            return ld_tp[0]["port_pressure"].copy()
        return self._data["load_throughput_default"].copy()
@@ -247,11 +231,7 @@ class MachineModel(object):
    def get_store_throughput(self, memory):
        """Return store throughput for given register type."""
-        st_tp = [
+        st_tp = [m for m in self._data["store_throughput"] if self._match_mem_entries(memory, m)]
            m
            for m in self._data["store_throughput"]
            if self._match_mem_entries(memory, m)
        ]
        if len(st_tp) > 0:
            return st_tp[0]["port_pressure"].copy()
        return self._data["store_throughput_default"].copy()
@@ -319,9 +299,7 @@ class MachineModel(object):
        formatted_instruction_forms = deepcopy(self._data["instruction_forms"])
        for instruction_form in formatted_instruction_forms:
            if instruction_form["port_pressure"] is not None:
-                cs = ruamel.yaml.comments.CommentedSeq(
+                cs = ruamel.yaml.comments.CommentedSeq(instruction_form["port_pressure"])
                    instruction_form["port_pressure"]
                )
                cs.fa.set_flow_style()
                instruction_form["port_pressure"] = cs
@@ -371,9 +349,7 @@ class MachineModel(object):
        hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
        # 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
-        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
+        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
            ".pickle"
        )
        if companion_cachefile.exists():
            # companion file (must be up-to-date, due to equal hash)
            with companion_cachefile.open("rb") as f:
@@ -382,9 +358,7 @@ class MachineModel(object):
                return data
        # 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
-        home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(
+        home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(".pickle")
            ".pickle"
        )
        if home_cachefile.exists():
            # home file (must be up-to-date, due to equal hash)
            with home_cachefile.open("rb") as f:
@@ -403,9 +377,7 @@ class MachineModel(object):
        p = Path(filepath)
        hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
        # 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
-        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
+        companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
            ".pickle"
        )
        if os.access(str(companion_cachefile.parent), os.W_OK):
            with companion_cachefile.open("wb") as f:
                pickle.dump(self._data, f)
@@ -449,9 +421,7 @@ class MachineModel(object):
                operand_string += operand["prefix"]
                operand_string += operand["shape"] if "shape" in operand else ""
            elif "name" in operand:
-                operand_string += (
+                operand_string += "r" if operand["name"] == "gpr" else operand["name"][0]
                    "r" if operand["name"] == "gpr" else operand["name"][0]
                )
        elif opclass == "memory":
            # Memory
            operand_string += "m"
@@ -614,9 +584,7 @@ class MachineModel(object):
        if "register" in operand:
            if i_operand["class"] != "register":
                return False
-            return self._is_x86_reg_type(
+            return self._is_x86_reg_type(i_operand, operand["register"], consider_masking=False)
                i_operand, operand["register"], consider_masking=False
            )
        # memory
        if "memory" in operand:
            if i_operand["class"] != "memory":
@@ -664,8 +632,7 @@ class MachineModel(object):
            return False
        if "shape" in reg:
            if "shape" in i_reg and (
-                reg["shape"] == i_reg["shape"]
+                reg["shape"] == i_reg["shape"] or self.WILDCARD in (reg["shape"] + i_reg["shape"])
                or self.WILDCARD in (reg["shape"] + i_reg["shape"])
            ):
                return True
            return False
@@ -695,8 +662,7 @@ class MachineModel(object):
                        if (
                            (
                                "mask" in reg
-                                and reg["mask"].rstrip(string.digits).lower()
+                                and reg["mask"].rstrip(string.digits).lower() == i_reg.get("mask")
                                == i_reg.get("mask")
                            )
                            or reg.get("mask") == self.WILDCARD
                            or i_reg.get("mask") == self.WILDCARD
--- a/osaca/semantics/isa_semantics.py
+++ b/osaca/semantics/isa_semantics.py
@@ -45,10 +45,7 @@ class ISASemantics(object):
    def assign_src_dst(self, instruction_form):
        """Update instruction form dictionary with source, destination and flag information."""
        # if the instruction form doesn't have operands or is None, there's nothing to do
-        if (
+        if instruction_form["operands"] is None or instruction_form["instruction"] is None:
            instruction_form["operands"] is None
            or instruction_form["instruction"] is None
        ):
            instruction_form["semantic_operands"] = AttrDict(
                {"source": [], "destination": [], "src_dst": []}
            )
@@ -97,20 +94,16 @@ class ISASemantics(object):
        if assign_default:
            # no irregular operand structure, apply default
            op_dict["source"] = self._get_regular_source_operands(instruction_form)
-            op_dict["destination"] = self._get_regular_destination_operands(
+            op_dict["destination"] = self._get_regular_destination_operands(instruction_form)
                instruction_form
            )
            op_dict["src_dst"] = []
        # post-process pre- and post-indexing for aarch64 memory operands
        if self._isa == "aarch64":
            for operand in [op for op in op_dict["source"] if "memory" in op]:
                post_indexed = (
-                    "post_indexed" in operand["memory"]
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
                    and operand["memory"]["post_indexed"]
                )
                pre_indexed = (
-                    "pre_indexed" in operand["memory"]
+                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
                    and operand["memory"]["pre_indexed"]
                )
                if post_indexed or pre_indexed:
                    op_dict["src_dst"].append(
@@ -124,12 +117,10 @@ class ISASemantics(object):
                    )
            for operand in [op for op in op_dict["destination"] if "memory" in op]:
                post_indexed = (
-                    "post_indexed" in operand["memory"]
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
                    and operand["memory"]["post_indexed"]
                )
                pre_indexed = (
-                    "pre_indexed" in operand["memory"]
+                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
                    and operand["memory"]["pre_indexed"]
                )
                if post_indexed or pre_indexed:
                    op_dict["src_dst"].append(
@@ -189,17 +180,14 @@ class ISASemantics(object):
                    base_name = o.memory.base.get("prefix", "") + o.memory.base.name
                    return {
                        base_name: {
-                            "name": o.memory.base.get("prefix", "")
+                            "name": o.memory.base.get("prefix", "") + o.memory.base.name,
                            + o.memory.base.name,
                            "value": o.memory.post_indexed.value,
                        }
                    }
            return {}
        reg_operand_names = {}  # e.g., {'rax': 'op1'}
-        operand_state = (
+        operand_state = {}  # e.g., {'op1': {'name': 'rax', 'value': 0}}  0 means unchanged
            {}
        )  # e.g., {'op1': {'name': 'rax', 'value': 0}}  0 means unchanged
        for o in instruction_form.operands:
            if "pre_indexed" in o.get("memory", {}):
@@ -211,9 +199,7 @@ class ISASemantics(object):
                    )
                base_name = o.memory.base.get("prefix", "") + o.memory.base.name
                reg_operand_names = {base_name: "op1"}
-                operand_state = {
+                operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
                    "op1": {"name": base_name, "value": o.memory.offset.value}
                }
        if isa_data is not None and "operation" in isa_data:
            for i, o in enumerate(instruction_form.operands):
@@ -254,18 +240,14 @@ class ISASemantics(object):
        op_dict["src_dst"] = []
        # handle dependency breaking instructions
-        if (
+        if "breaks_dependency_on_equal_operands" in isa_data and operands[1:] == operands[:-1]:
            "breaks_dependency_on_equal_operands" in isa_data
            and operands[1:] == operands[:-1]
        ):
            op_dict["destination"] += operands
            if "hidden_operands" in isa_data:
                op_dict["destination"] += [
                    AttrDict.convert_dict(
                        {
                            hop["class"]: {
-                                k: hop[k]
+                                k: hop[k] for k in ["name", "class", "source", "destination"]
                                for k in ["name", "class", "source", "destination"]
                            }
                        }
                    )
@@ -351,9 +333,7 @@ class ISASemantics(object):
    def substitute_mem_address(self, operands):
        """Create memory wildcard for all memory operands"""
-        return [
+        return [self._create_reg_wildcard() if "memory" in op else op for op in operands]
            self._create_reg_wildcard() if "memory" in op else op for op in operands
        ]
    def _create_reg_wildcard(self):
        """Wildcard constructor"""
--- a/osaca/semantics/kernel_dg.py
+++ b/osaca/semantics/kernel_dg.py
@@ -54,9 +54,7 @@ class KernelDG(nx.DiGraph):
        dg = nx.DiGraph()
        for i, instruction_form in enumerate(kernel):
            dg.add_node(instruction_form["line_number"])
-            dg.nodes[instruction_form["line_number"]][
+            dg.nodes[instruction_form["line_number"]]["instruction_form"] = instruction_form
                "instruction_form"
            ] = instruction_form
            # add load as separate node if existent
            if (
                INSTR_FLAGS.HAS_LD in instruction_form["flags"]
@@ -71,16 +69,12 @@ class KernelDG(nx.DiGraph):
                dg.add_edge(
                    instruction_form["line_number"] + 0.1,
                    instruction_form["line_number"],
-                    latency=instruction_form["latency"]
+                    latency=instruction_form["latency"] - instruction_form["latency_wo_load"],
                    - instruction_form["latency_wo_load"],
                )
-            for dep, dep_flags in self.find_depending(
+            for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]):
                instruction_form, kernel[i + 1 :]
            ):
                edge_weight = (
                    instruction_form["latency"]
-                    if "mem_dep" in dep_flags
+                    if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form
                    or "latency_wo_load" not in instruction_form
                    else instruction_form["latency_wo_load"]
                )
                if "storeload_dep" in dep_flags:
@@ -312,9 +306,7 @@ class KernelDG(nx.DiGraph):
                    # store to same location (presumed)
                    if self.is_memstore(dst.memory, instr_form, register_changes):
                        break
-                self._update_reg_changes(
+                self._update_reg_changes(instr_form, register_changes, only_postindexed=True)
                    instr_form, register_changes, only_postindexed=True
                )
    def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False):
        if self.arch_sem is None:
@@ -322,9 +314,7 @@ class KernelDG(nx.DiGraph):
            return {}
        if reg_state is None:
            reg_state = {}
-        for reg, change in self.arch_sem.get_reg_changes(
+        for reg, change in self.arch_sem.get_reg_changes(iform, only_postindexed).items():
            iform, only_postindexed
        ).items():
            if change is None or reg_state.get(reg, {}) is None:
                reg_state[reg] = None
            else:
@@ -362,23 +352,15 @@ class KernelDG(nx.DiGraph):
            instruction_form.semantic_operands.src_dst,
        ):
            if "register" in src:
-                is_read = (
+                is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
                    self.parser.is_reg_dependend_of(register, src.register) or is_read
                )
            if "flag" in src:
-                is_read = (
+                is_read = self.parser.is_flag_dependend_of(register, src.flag) or is_read
                    self.parser.is_flag_dependend_of(register, src.flag) or is_read
                )
            if "memory" in src:
                if src.memory.base is not None:
-                    is_read = (
+                    is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
                        self.parser.is_reg_dependend_of(register, src.memory.base)
                        or is_read
                    )
                if src.memory.index is not None:
                    is_read = (
-                        self.parser.is_reg_dependend_of(register, src.memory.index)
+                        self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
                        or is_read
                    )
        # Check also if read in destination memory address
        for dst in chain(
@@ -387,14 +369,10 @@ class KernelDG(nx.DiGraph):
        ):
            if "memory" in dst:
                if dst.memory.base is not None:
-                    is_read = (
+                    is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
                        self.parser.is_reg_dependend_of(register, dst.memory.base)
                        or is_read
                    )
                if dst.memory.index is not None:
                    is_read = (
-                        self.parser.is_reg_dependend_of(register, dst.memory.index)
+                        self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
                        or is_read
                    )
        return is_read
@@ -443,10 +421,7 @@ class KernelDG(nx.DiGraph):
                if mem.scale != src.scale:
                    # scale factors do not match
                    continue
-                if (
+                if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
                    mem.index.get("prefix", "") + mem.index["name"]
                    != index_change["name"]
                ):
                    # index registers do not match
                    continue
                addr_change += index_change["value"] * src.scale
@@ -468,19 +443,13 @@ class KernelDG(nx.DiGraph):
            instruction_form.semantic_operands.src_dst,
        ):
            if "register" in dst:
-                is_written = (
+                is_written = self.parser.is_reg_dependend_of(register, dst.register) or is_written
                    self.parser.is_reg_dependend_of(register, dst.register)
                    or is_written
                )
            if "flag" in dst:
-                is_written = (
+                is_written = self.parser.is_flag_dependend_of(register, dst.flag) or is_written
                    self.parser.is_flag_dependend_of(register, dst.flag) or is_written
                )
            if "memory" in dst:
                if "pre_indexed" in dst.memory or "post_indexed" in dst.memory:
                    is_written = (
-                        self.parser.is_reg_dependend_of(register, dst.memory.base)
+                        self.parser.is_reg_dependend_of(register, dst.memory.base) or is_written
                        or is_written
                    )
        # Check also for possible pre- or post-indexing in memory addresses
        for src in chain(
@@ -490,8 +459,7 @@ class KernelDG(nx.DiGraph):
            if "memory" in src:
                if "pre_indexed" in src.memory or "post_indexed" in src.memory:
                    is_written = (
-                        self.parser.is_reg_dependend_of(register, src.memory.base)
+                        self.parser.is_reg_dependend_of(register, src.memory.base) or is_written
                        or is_written
                    )
        return is_written
@@ -522,9 +490,7 @@ class KernelDG(nx.DiGraph):
        lcd = self.get_loopcarried_dependencies()
        lcd_line_numbers = {}
        for dep in lcd:
-            lcd_line_numbers[dep] = [
+            lcd_line_numbers[dep] = [x["line_number"] for x, lat in lcd[dep]["dependencies"]]
                x["line_number"] for x, lat in lcd[dep]["dependencies"]
            ]
        # add color scheme
        graph.graph["node"] = {"colorscheme": "accent8"}
        graph.graph["edge"] = {"colorscheme": "accent8"}
@@ -535,9 +501,7 @@ class KernelDG(nx.DiGraph):
            max_line_number = max(lcd_line_numbers[dep])
            graph.add_edge(max_line_number, min_line_number)
            graph.edges[max_line_number, min_line_number]["latency"] = [
-                lat
+                lat for x, lat in lcd[dep]["dependencies"] if x["line_number"] == max_line_number
                for x, lat in lcd[dep]["dependencies"]
                if x["line_number"] == max_line_number
            ]
        # add label to edges
@@ -546,9 +510,7 @@ class KernelDG(nx.DiGraph):
        # add CP values to graph
        for n in cp:
-            graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n[
+            graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n["latency_cp"]
                "latency_cp"
            ]
        # color CP and LCD
        for n in graph.nodes:
@@ -568,8 +530,7 @@ class KernelDG(nx.DiGraph):
        for e in graph.edges:
            if (
                graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers
-                and graph.nodes[e[1]]["instruction_form"]["line_number"]
+                and graph.nodes[e[1]]["instruction_form"]["line_number"] in cp_line_numbers
                in cp_line_numbers
                and e[0] < e[1]
            ):
                bold_edge = True
@@ -581,8 +542,7 @@ class KernelDG(nx.DiGraph):
                    graph.edges[e]["penwidth"] = 3
            for dep in lcd_line_numbers:
                if (
-                    graph.nodes[e[0]]["instruction_form"]["line_number"]
+                    graph.nodes[e[0]]["instruction_form"]["line_number"] in lcd_line_numbers[dep]
                    in lcd_line_numbers[dep]
                    and graph.nodes[e[1]]["instruction_form"]["line_number"]
                    in lcd_line_numbers[dep]
                ):
--- a/osaca/semantics/marker_utils.py
+++ b/osaca/semantics/marker_utils.py
@@ -133,11 +133,7 @@ def find_marked_section(
    index_end = -1
    for i, line in enumerate(lines):
        try:
-            if (
+            if line.instruction is None and comments is not None and line.comment is not None:
                line.instruction is None
                and comments is not None
                and line.comment is not None
            ):
                if comments["start"] == line.comment:
                    index_start = i + 1
                elif comments["end"] == line.comment: