mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-04 18:20:09 +01:00
fixed formatting with correct line length
This commit is contained in:
@@ -46,9 +46,7 @@ class ArchSemantics(ISASemantics):
|
||||
ports = list(uop[1])
|
||||
indices = [port_list.index(p) for p in ports]
|
||||
# check if port sum of used ports for uop are unbalanced
|
||||
port_sums = self._to_list(
|
||||
itemgetter(*indices)(self.get_throughput_sum(kernel))
|
||||
)
|
||||
port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
|
||||
instr_ports = self._to_list(
|
||||
itemgetter(*indices)(instruction_form["port_pressure"])
|
||||
)
|
||||
@@ -67,9 +65,7 @@ class ArchSemantics(ISASemantics):
|
||||
differences[max_port_idx] -= INC
|
||||
differences[min_port_idx] += INC
|
||||
# instr_ports = [round(p, 2) for p in instr_ports]
|
||||
self._itemsetter(*indices)(
|
||||
instruction_form["port_pressure"], *instr_ports
|
||||
)
|
||||
self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports)
|
||||
# check if min port is zero
|
||||
if round(min(instr_ports), 2) <= 0:
|
||||
# if port_pressure is not exactly 0.00, add the residual to
|
||||
@@ -87,15 +83,12 @@ class ArchSemantics(ISASemantics):
|
||||
zero_index = [
|
||||
p
|
||||
for p in indices
|
||||
if round(instruction_form["port_pressure"][p], 2)
|
||||
== 0
|
||||
if round(instruction_form["port_pressure"][p], 2) == 0
|
||||
][0]
|
||||
instruction_form["port_pressure"][zero_index] = 0.0
|
||||
# Remove from further balancing
|
||||
indices = [
|
||||
p
|
||||
for p in indices
|
||||
if instruction_form["port_pressure"][p] > 0
|
||||
p for p in indices if instruction_form["port_pressure"][p] > 0
|
||||
]
|
||||
instr_ports = self._to_list(
|
||||
itemgetter(*indices)(instruction_form["port_pressure"])
|
||||
@@ -148,11 +141,9 @@ class ArchSemantics(ISASemantics):
|
||||
if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"]
|
||||
]
|
||||
)
|
||||
load = [
|
||||
instr
|
||||
for instr in kernel
|
||||
if instr["line_number"] == min_distance_load[1]
|
||||
][0]
|
||||
load = [instr for instr in kernel if instr["line_number"] == min_distance_load[1]][
|
||||
0
|
||||
]
|
||||
# Hide load
|
||||
load["flags"] += [INSTR_FLAGS.HIDDEN_LD]
|
||||
load["port_pressure"] = self._nullify_data_ports(load["port_pressure"])
|
||||
@@ -230,39 +221,27 @@ class ArchSemantics(ISASemantics):
|
||||
data_port_uops = self._machine_model.get_load_throughput(
|
||||
[
|
||||
x["memory"]
|
||||
for x in instruction_form["semantic_operands"][
|
||||
"source"
|
||||
]
|
||||
for x in instruction_form["semantic_operands"]["source"]
|
||||
+ instruction_form["semantic_operands"]["src_dst"]
|
||||
if "memory" in x
|
||||
][0]
|
||||
)
|
||||
data_port_pressure = (
|
||||
self._machine_model.average_port_pressure(
|
||||
data_port_uops
|
||||
)
|
||||
data_port_pressure = self._machine_model.average_port_pressure(
|
||||
data_port_uops
|
||||
)
|
||||
if "load_throughput_multiplier" in self._machine_model:
|
||||
multiplier = self._machine_model[
|
||||
"load_throughput_multiplier"
|
||||
][reg_type]
|
||||
data_port_pressure = [
|
||||
pp * multiplier for pp in data_port_pressure
|
||||
multiplier = self._machine_model["load_throughput_multiplier"][
|
||||
reg_type
|
||||
]
|
||||
data_port_pressure = [pp * multiplier for pp in data_port_pressure]
|
||||
if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
|
||||
# STORE performance data
|
||||
destinations = (
|
||||
instruction_form["semantic_operands"]["destination"]
|
||||
+ instruction_form["semantic_operands"]["src_dst"]
|
||||
)
|
||||
st_data_port_uops = (
|
||||
self._machine_model.get_store_throughput(
|
||||
[
|
||||
x["memory"]
|
||||
for x in destinations
|
||||
if "memory" in x
|
||||
][0]
|
||||
)
|
||||
st_data_port_uops = self._machine_model.get_store_throughput(
|
||||
[x["memory"] for x in destinations if "memory" in x][0]
|
||||
)
|
||||
# zero data port pressure and remove HAS_ST flag if
|
||||
# - no mem operand in dst &&
|
||||
@@ -271,16 +250,12 @@ class ArchSemantics(ISASemantics):
|
||||
if (
|
||||
self._isa == "aarch64"
|
||||
and "memory"
|
||||
not in instruction_form["semantic_operands"][
|
||||
"destination"
|
||||
]
|
||||
not in instruction_form["semantic_operands"]["destination"]
|
||||
and all(
|
||||
[
|
||||
"post_indexed" in op["memory"]
|
||||
or "pre_indexed" in op["memory"]
|
||||
for op in instruction_form["semantic_operands"][
|
||||
"src_dst"
|
||||
]
|
||||
for op in instruction_form["semantic_operands"]["src_dst"]
|
||||
if "memory" in op
|
||||
]
|
||||
)
|
||||
@@ -289,21 +264,18 @@ class ArchSemantics(ISASemantics):
|
||||
instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)
|
||||
|
||||
# sum up all data ports in case for LOAD and STORE
|
||||
st_data_port_pressure = (
|
||||
self._machine_model.average_port_pressure(
|
||||
st_data_port_uops
|
||||
)
|
||||
st_data_port_pressure = self._machine_model.average_port_pressure(
|
||||
st_data_port_uops
|
||||
)
|
||||
if "store_throughput_multiplier" in self._machine_model:
|
||||
multiplier = self._machine_model[
|
||||
"store_throughput_multiplier"
|
||||
][reg_type]
|
||||
multiplier = self._machine_model["store_throughput_multiplier"][
|
||||
reg_type
|
||||
]
|
||||
st_data_port_pressure = [
|
||||
pp * multiplier for pp in st_data_port_pressure
|
||||
]
|
||||
data_port_pressure = [
|
||||
sum(x)
|
||||
for x in zip(data_port_pressure, st_data_port_pressure)
|
||||
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
|
||||
]
|
||||
data_port_uops += st_data_port_uops
|
||||
throughput = max(
|
||||
@@ -355,9 +327,7 @@ class ArchSemantics(ISASemantics):
|
||||
throughput = 0.0
|
||||
latency = 0.0
|
||||
latency_wo_load = latency
|
||||
instruction_form["port_pressure"] = [
|
||||
0.0 for i in range(port_number)
|
||||
]
|
||||
instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
|
||||
instruction_form["port_uops"] = []
|
||||
flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
|
||||
# flatten flag list
|
||||
@@ -373,9 +343,7 @@ class ArchSemantics(ISASemantics):
|
||||
instruction_form["latency_cp"] = 0
|
||||
instruction_form["latency_lcd"] = 0
|
||||
|
||||
def _handle_instruction_found(
|
||||
self, instruction_data, port_number, instruction_form, flags
|
||||
):
|
||||
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
|
||||
"""Apply performance data to instruction if it was found in the archDB"""
|
||||
throughput = instruction_data["throughput"]
|
||||
port_pressure = self._machine_model.average_port_pressure(
|
||||
@@ -457,9 +425,7 @@ class ArchSemantics(ISASemantics):
|
||||
"""Get the overall throughput sum separated by port of all instructions of a kernel."""
|
||||
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up
|
||||
# typically comment, label and non-instruction lines
|
||||
port_pressures = [
|
||||
instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0
|
||||
]
|
||||
port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0]
|
||||
# Essentially summing up each columns of port_pressures, where each column is one port
|
||||
# and each row is one line of the kernel
|
||||
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
|
||||
|
||||
@@ -18,9 +18,7 @@ from ruamel.yaml.compat import StringIO
|
||||
|
||||
class MachineModel(object):
|
||||
WILDCARD = "*"
|
||||
INTERNAL_VERSION = (
|
||||
1 # increase whenever self._data format changes to invalidate cache!
|
||||
)
|
||||
INTERNAL_VERSION = 1 # increase whenever self._data format changes to invalidate cache!
|
||||
_runtime_cache = {}
|
||||
|
||||
def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
|
||||
@@ -45,9 +43,7 @@ class MachineModel(object):
|
||||
"scale": s,
|
||||
"port_pressure": [],
|
||||
}
|
||||
for b, i, o, s in product(
|
||||
["gpr"], ["gpr", None], ["imd", None], [1, 8]
|
||||
)
|
||||
for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
|
||||
],
|
||||
"load_throughput_default": [],
|
||||
"store_throughput": [],
|
||||
@@ -89,9 +85,7 @@ class MachineModel(object):
|
||||
self._data["instruction_forms"] = []
|
||||
# separate multi-alias instruction forms
|
||||
for entry in [
|
||||
x
|
||||
for x in self._data["instruction_forms"]
|
||||
if isinstance(x["name"], list)
|
||||
x for x in self._data["instruction_forms"] if isinstance(x["name"], list)
|
||||
]:
|
||||
for name in entry["name"]:
|
||||
new_entry = {"name": name}
|
||||
@@ -139,9 +133,7 @@ class MachineModel(object):
|
||||
instruction_form
|
||||
for instruction_form in name_matched_iforms
|
||||
if self._match_operands(
|
||||
instruction_form["operands"]
|
||||
if "operands" in instruction_form
|
||||
else [],
|
||||
instruction_form["operands"] if "operands" in instruction_form else [],
|
||||
operands,
|
||||
)
|
||||
)
|
||||
@@ -223,19 +215,11 @@ class MachineModel(object):
|
||||
|
||||
def get_load_latency(self, reg_type):
|
||||
"""Return load latency for given register type."""
|
||||
return (
|
||||
self._data["load_latency"][reg_type]
|
||||
if self._data["load_latency"][reg_type]
|
||||
else 0
|
||||
)
|
||||
return self._data["load_latency"][reg_type] if self._data["load_latency"][reg_type] else 0
|
||||
|
||||
def get_load_throughput(self, memory):
|
||||
"""Return load thorughput for given register type."""
|
||||
ld_tp = [
|
||||
m
|
||||
for m in self._data["load_throughput"]
|
||||
if self._match_mem_entries(memory, m)
|
||||
]
|
||||
ld_tp = [m for m in self._data["load_throughput"] if self._match_mem_entries(memory, m)]
|
||||
if len(ld_tp) > 0:
|
||||
return ld_tp[0]["port_pressure"].copy()
|
||||
return self._data["load_throughput_default"].copy()
|
||||
@@ -247,11 +231,7 @@ class MachineModel(object):
|
||||
|
||||
def get_store_throughput(self, memory):
|
||||
"""Return store throughput for given register type."""
|
||||
st_tp = [
|
||||
m
|
||||
for m in self._data["store_throughput"]
|
||||
if self._match_mem_entries(memory, m)
|
||||
]
|
||||
st_tp = [m for m in self._data["store_throughput"] if self._match_mem_entries(memory, m)]
|
||||
if len(st_tp) > 0:
|
||||
return st_tp[0]["port_pressure"].copy()
|
||||
return self._data["store_throughput_default"].copy()
|
||||
@@ -319,9 +299,7 @@ class MachineModel(object):
|
||||
formatted_instruction_forms = deepcopy(self._data["instruction_forms"])
|
||||
for instruction_form in formatted_instruction_forms:
|
||||
if instruction_form["port_pressure"] is not None:
|
||||
cs = ruamel.yaml.comments.CommentedSeq(
|
||||
instruction_form["port_pressure"]
|
||||
)
|
||||
cs = ruamel.yaml.comments.CommentedSeq(instruction_form["port_pressure"])
|
||||
cs.fa.set_flow_style()
|
||||
instruction_form["port_pressure"] = cs
|
||||
|
||||
@@ -371,9 +349,7 @@ class MachineModel(object):
|
||||
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
|
||||
|
||||
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
|
||||
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
|
||||
".pickle"
|
||||
)
|
||||
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
|
||||
if companion_cachefile.exists():
|
||||
# companion file (must be up-to-date, due to equal hash)
|
||||
with companion_cachefile.open("rb") as f:
|
||||
@@ -382,9 +358,7 @@ class MachineModel(object):
|
||||
return data
|
||||
|
||||
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
|
||||
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(
|
||||
".pickle"
|
||||
)
|
||||
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(".pickle")
|
||||
if home_cachefile.exists():
|
||||
# home file (must be up-to-date, due to equal hash)
|
||||
with home_cachefile.open("rb") as f:
|
||||
@@ -403,9 +377,7 @@ class MachineModel(object):
|
||||
p = Path(filepath)
|
||||
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
|
||||
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
|
||||
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
|
||||
".pickle"
|
||||
)
|
||||
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
|
||||
if os.access(str(companion_cachefile.parent), os.W_OK):
|
||||
with companion_cachefile.open("wb") as f:
|
||||
pickle.dump(self._data, f)
|
||||
@@ -449,9 +421,7 @@ class MachineModel(object):
|
||||
operand_string += operand["prefix"]
|
||||
operand_string += operand["shape"] if "shape" in operand else ""
|
||||
elif "name" in operand:
|
||||
operand_string += (
|
||||
"r" if operand["name"] == "gpr" else operand["name"][0]
|
||||
)
|
||||
operand_string += "r" if operand["name"] == "gpr" else operand["name"][0]
|
||||
elif opclass == "memory":
|
||||
# Memory
|
||||
operand_string += "m"
|
||||
@@ -614,9 +584,7 @@ class MachineModel(object):
|
||||
if "register" in operand:
|
||||
if i_operand["class"] != "register":
|
||||
return False
|
||||
return self._is_x86_reg_type(
|
||||
i_operand, operand["register"], consider_masking=False
|
||||
)
|
||||
return self._is_x86_reg_type(i_operand, operand["register"], consider_masking=False)
|
||||
# memory
|
||||
if "memory" in operand:
|
||||
if i_operand["class"] != "memory":
|
||||
@@ -664,8 +632,7 @@ class MachineModel(object):
|
||||
return False
|
||||
if "shape" in reg:
|
||||
if "shape" in i_reg and (
|
||||
reg["shape"] == i_reg["shape"]
|
||||
or self.WILDCARD in (reg["shape"] + i_reg["shape"])
|
||||
reg["shape"] == i_reg["shape"] or self.WILDCARD in (reg["shape"] + i_reg["shape"])
|
||||
):
|
||||
return True
|
||||
return False
|
||||
@@ -695,8 +662,7 @@ class MachineModel(object):
|
||||
if (
|
||||
(
|
||||
"mask" in reg
|
||||
and reg["mask"].rstrip(string.digits).lower()
|
||||
== i_reg.get("mask")
|
||||
and reg["mask"].rstrip(string.digits).lower() == i_reg.get("mask")
|
||||
)
|
||||
or reg.get("mask") == self.WILDCARD
|
||||
or i_reg.get("mask") == self.WILDCARD
|
||||
|
||||
@@ -45,10 +45,7 @@ class ISASemantics(object):
|
||||
def assign_src_dst(self, instruction_form):
|
||||
"""Update instruction form dictionary with source, destination and flag information."""
|
||||
# if the instruction form doesn't have operands or is None, there's nothing to do
|
||||
if (
|
||||
instruction_form["operands"] is None
|
||||
or instruction_form["instruction"] is None
|
||||
):
|
||||
if instruction_form["operands"] is None or instruction_form["instruction"] is None:
|
||||
instruction_form["semantic_operands"] = AttrDict(
|
||||
{"source": [], "destination": [], "src_dst": []}
|
||||
)
|
||||
@@ -97,20 +94,16 @@ class ISASemantics(object):
|
||||
if assign_default:
|
||||
# no irregular operand structure, apply default
|
||||
op_dict["source"] = self._get_regular_source_operands(instruction_form)
|
||||
op_dict["destination"] = self._get_regular_destination_operands(
|
||||
instruction_form
|
||||
)
|
||||
op_dict["destination"] = self._get_regular_destination_operands(instruction_form)
|
||||
op_dict["src_dst"] = []
|
||||
# post-process pre- and post-indexing for aarch64 memory operands
|
||||
if self._isa == "aarch64":
|
||||
for operand in [op for op in op_dict["source"] if "memory" in op]:
|
||||
post_indexed = (
|
||||
"post_indexed" in operand["memory"]
|
||||
and operand["memory"]["post_indexed"]
|
||||
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||
)
|
||||
pre_indexed = (
|
||||
"pre_indexed" in operand["memory"]
|
||||
and operand["memory"]["pre_indexed"]
|
||||
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||
)
|
||||
if post_indexed or pre_indexed:
|
||||
op_dict["src_dst"].append(
|
||||
@@ -124,12 +117,10 @@ class ISASemantics(object):
|
||||
)
|
||||
for operand in [op for op in op_dict["destination"] if "memory" in op]:
|
||||
post_indexed = (
|
||||
"post_indexed" in operand["memory"]
|
||||
and operand["memory"]["post_indexed"]
|
||||
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||
)
|
||||
pre_indexed = (
|
||||
"pre_indexed" in operand["memory"]
|
||||
and operand["memory"]["pre_indexed"]
|
||||
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||
)
|
||||
if post_indexed or pre_indexed:
|
||||
op_dict["src_dst"].append(
|
||||
@@ -189,17 +180,14 @@ class ISASemantics(object):
|
||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||
return {
|
||||
base_name: {
|
||||
"name": o.memory.base.get("prefix", "")
|
||||
+ o.memory.base.name,
|
||||
"name": o.memory.base.get("prefix", "") + o.memory.base.name,
|
||||
"value": o.memory.post_indexed.value,
|
||||
}
|
||||
}
|
||||
return {}
|
||||
|
||||
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
||||
operand_state = (
|
||||
{}
|
||||
) # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
||||
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
||||
|
||||
for o in instruction_form.operands:
|
||||
if "pre_indexed" in o.get("memory", {}):
|
||||
@@ -211,9 +199,7 @@ class ISASemantics(object):
|
||||
)
|
||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||
reg_operand_names = {base_name: "op1"}
|
||||
operand_state = {
|
||||
"op1": {"name": base_name, "value": o.memory.offset.value}
|
||||
}
|
||||
operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
|
||||
|
||||
if isa_data is not None and "operation" in isa_data:
|
||||
for i, o in enumerate(instruction_form.operands):
|
||||
@@ -254,18 +240,14 @@ class ISASemantics(object):
|
||||
op_dict["src_dst"] = []
|
||||
|
||||
# handle dependency breaking instructions
|
||||
if (
|
||||
"breaks_dependency_on_equal_operands" in isa_data
|
||||
and operands[1:] == operands[:-1]
|
||||
):
|
||||
if "breaks_dependency_on_equal_operands" in isa_data and operands[1:] == operands[:-1]:
|
||||
op_dict["destination"] += operands
|
||||
if "hidden_operands" in isa_data:
|
||||
op_dict["destination"] += [
|
||||
AttrDict.convert_dict(
|
||||
{
|
||||
hop["class"]: {
|
||||
k: hop[k]
|
||||
for k in ["name", "class", "source", "destination"]
|
||||
k: hop[k] for k in ["name", "class", "source", "destination"]
|
||||
}
|
||||
}
|
||||
)
|
||||
@@ -351,9 +333,7 @@ class ISASemantics(object):
|
||||
|
||||
def substitute_mem_address(self, operands):
|
||||
"""Create memory wildcard for all memory operands"""
|
||||
return [
|
||||
self._create_reg_wildcard() if "memory" in op else op for op in operands
|
||||
]
|
||||
return [self._create_reg_wildcard() if "memory" in op else op for op in operands]
|
||||
|
||||
def _create_reg_wildcard(self):
|
||||
"""Wildcard constructor"""
|
||||
|
||||
@@ -54,9 +54,7 @@ class KernelDG(nx.DiGraph):
|
||||
dg = nx.DiGraph()
|
||||
for i, instruction_form in enumerate(kernel):
|
||||
dg.add_node(instruction_form["line_number"])
|
||||
dg.nodes[instruction_form["line_number"]][
|
||||
"instruction_form"
|
||||
] = instruction_form
|
||||
dg.nodes[instruction_form["line_number"]]["instruction_form"] = instruction_form
|
||||
# add load as separate node if existent
|
||||
if (
|
||||
INSTR_FLAGS.HAS_LD in instruction_form["flags"]
|
||||
@@ -71,16 +69,12 @@ class KernelDG(nx.DiGraph):
|
||||
dg.add_edge(
|
||||
instruction_form["line_number"] + 0.1,
|
||||
instruction_form["line_number"],
|
||||
latency=instruction_form["latency"]
|
||||
- instruction_form["latency_wo_load"],
|
||||
latency=instruction_form["latency"] - instruction_form["latency_wo_load"],
|
||||
)
|
||||
for dep, dep_flags in self.find_depending(
|
||||
instruction_form, kernel[i + 1 :]
|
||||
):
|
||||
for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]):
|
||||
edge_weight = (
|
||||
instruction_form["latency"]
|
||||
if "mem_dep" in dep_flags
|
||||
or "latency_wo_load" not in instruction_form
|
||||
if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form
|
||||
else instruction_form["latency_wo_load"]
|
||||
)
|
||||
if "storeload_dep" in dep_flags:
|
||||
@@ -312,9 +306,7 @@ class KernelDG(nx.DiGraph):
|
||||
# store to same location (presumed)
|
||||
if self.is_memstore(dst.memory, instr_form, register_changes):
|
||||
break
|
||||
self._update_reg_changes(
|
||||
instr_form, register_changes, only_postindexed=True
|
||||
)
|
||||
self._update_reg_changes(instr_form, register_changes, only_postindexed=True)
|
||||
|
||||
def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False):
|
||||
if self.arch_sem is None:
|
||||
@@ -322,9 +314,7 @@ class KernelDG(nx.DiGraph):
|
||||
return {}
|
||||
if reg_state is None:
|
||||
reg_state = {}
|
||||
for reg, change in self.arch_sem.get_reg_changes(
|
||||
iform, only_postindexed
|
||||
).items():
|
||||
for reg, change in self.arch_sem.get_reg_changes(iform, only_postindexed).items():
|
||||
if change is None or reg_state.get(reg, {}) is None:
|
||||
reg_state[reg] = None
|
||||
else:
|
||||
@@ -362,23 +352,15 @@ class KernelDG(nx.DiGraph):
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
if "register" in src:
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, src.register) or is_read
|
||||
)
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
|
||||
if "flag" in src:
|
||||
is_read = (
|
||||
self.parser.is_flag_dependend_of(register, src.flag) or is_read
|
||||
)
|
||||
is_read = self.parser.is_flag_dependend_of(register, src.flag) or is_read
|
||||
if "memory" in src:
|
||||
if src.memory.base is not None:
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, src.memory.base)
|
||||
or is_read
|
||||
)
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
|
||||
if src.memory.index is not None:
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, src.memory.index)
|
||||
or is_read
|
||||
self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
|
||||
)
|
||||
# Check also if read in destination memory address
|
||||
for dst in chain(
|
||||
@@ -387,14 +369,10 @@ class KernelDG(nx.DiGraph):
|
||||
):
|
||||
if "memory" in dst:
|
||||
if dst.memory.base is not None:
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.base)
|
||||
or is_read
|
||||
)
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
|
||||
if dst.memory.index is not None:
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.index)
|
||||
or is_read
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
|
||||
)
|
||||
return is_read
|
||||
|
||||
@@ -443,10 +421,7 @@ class KernelDG(nx.DiGraph):
|
||||
if mem.scale != src.scale:
|
||||
# scale factors do not match
|
||||
continue
|
||||
if (
|
||||
mem.index.get("prefix", "") + mem.index["name"]
|
||||
!= index_change["name"]
|
||||
):
|
||||
if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
|
||||
# index registers do not match
|
||||
continue
|
||||
addr_change += index_change["value"] * src.scale
|
||||
@@ -468,19 +443,13 @@ class KernelDG(nx.DiGraph):
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
if "register" in dst:
|
||||
is_written = (
|
||||
self.parser.is_reg_dependend_of(register, dst.register)
|
||||
or is_written
|
||||
)
|
||||
is_written = self.parser.is_reg_dependend_of(register, dst.register) or is_written
|
||||
if "flag" in dst:
|
||||
is_written = (
|
||||
self.parser.is_flag_dependend_of(register, dst.flag) or is_written
|
||||
)
|
||||
is_written = self.parser.is_flag_dependend_of(register, dst.flag) or is_written
|
||||
if "memory" in dst:
|
||||
if "pre_indexed" in dst.memory or "post_indexed" in dst.memory:
|
||||
is_written = (
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.base)
|
||||
or is_written
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.base) or is_written
|
||||
)
|
||||
# Check also for possible pre- or post-indexing in memory addresses
|
||||
for src in chain(
|
||||
@@ -490,8 +459,7 @@ class KernelDG(nx.DiGraph):
|
||||
if "memory" in src:
|
||||
if "pre_indexed" in src.memory or "post_indexed" in src.memory:
|
||||
is_written = (
|
||||
self.parser.is_reg_dependend_of(register, src.memory.base)
|
||||
or is_written
|
||||
self.parser.is_reg_dependend_of(register, src.memory.base) or is_written
|
||||
)
|
||||
return is_written
|
||||
|
||||
@@ -522,9 +490,7 @@ class KernelDG(nx.DiGraph):
|
||||
lcd = self.get_loopcarried_dependencies()
|
||||
lcd_line_numbers = {}
|
||||
for dep in lcd:
|
||||
lcd_line_numbers[dep] = [
|
||||
x["line_number"] for x, lat in lcd[dep]["dependencies"]
|
||||
]
|
||||
lcd_line_numbers[dep] = [x["line_number"] for x, lat in lcd[dep]["dependencies"]]
|
||||
# add color scheme
|
||||
graph.graph["node"] = {"colorscheme": "accent8"}
|
||||
graph.graph["edge"] = {"colorscheme": "accent8"}
|
||||
@@ -535,9 +501,7 @@ class KernelDG(nx.DiGraph):
|
||||
max_line_number = max(lcd_line_numbers[dep])
|
||||
graph.add_edge(max_line_number, min_line_number)
|
||||
graph.edges[max_line_number, min_line_number]["latency"] = [
|
||||
lat
|
||||
for x, lat in lcd[dep]["dependencies"]
|
||||
if x["line_number"] == max_line_number
|
||||
lat for x, lat in lcd[dep]["dependencies"] if x["line_number"] == max_line_number
|
||||
]
|
||||
|
||||
# add label to edges
|
||||
@@ -546,9 +510,7 @@ class KernelDG(nx.DiGraph):
|
||||
|
||||
# add CP values to graph
|
||||
for n in cp:
|
||||
graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n[
|
||||
"latency_cp"
|
||||
]
|
||||
graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n["latency_cp"]
|
||||
|
||||
# color CP and LCD
|
||||
for n in graph.nodes:
|
||||
@@ -568,8 +530,7 @@ class KernelDG(nx.DiGraph):
|
||||
for e in graph.edges:
|
||||
if (
|
||||
graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers
|
||||
and graph.nodes[e[1]]["instruction_form"]["line_number"]
|
||||
in cp_line_numbers
|
||||
and graph.nodes[e[1]]["instruction_form"]["line_number"] in cp_line_numbers
|
||||
and e[0] < e[1]
|
||||
):
|
||||
bold_edge = True
|
||||
@@ -581,8 +542,7 @@ class KernelDG(nx.DiGraph):
|
||||
graph.edges[e]["penwidth"] = 3
|
||||
for dep in lcd_line_numbers:
|
||||
if (
|
||||
graph.nodes[e[0]]["instruction_form"]["line_number"]
|
||||
in lcd_line_numbers[dep]
|
||||
graph.nodes[e[0]]["instruction_form"]["line_number"] in lcd_line_numbers[dep]
|
||||
and graph.nodes[e[1]]["instruction_form"]["line_number"]
|
||||
in lcd_line_numbers[dep]
|
||||
):
|
||||
|
||||
@@ -133,11 +133,7 @@ def find_marked_section(
|
||||
index_end = -1
|
||||
for i, line in enumerate(lines):
|
||||
try:
|
||||
if (
|
||||
line.instruction is None
|
||||
and comments is not None
|
||||
and line.comment is not None
|
||||
):
|
||||
if line.instruction is None and comments is not None and line.comment is not None:
|
||||
if comments["start"] == line.comment:
|
||||
index_start = i + 1
|
||||
elif comments["end"] == line.comment:
|
||||
|
||||
Reference in New Issue
Block a user