fixed formatting with correct line length

This commit is contained in:
JanLJL
2021-10-04 15:00:17 +02:00
parent e6ce870ca0
commit 5205cb5cc6
5 changed files with 76 additions and 208 deletions

View File

@@ -46,9 +46,7 @@ class ArchSemantics(ISASemantics):
ports = list(uop[1]) ports = list(uop[1])
indices = [port_list.index(p) for p in ports] indices = [port_list.index(p) for p in ports]
# check if port sum of used ports for uop are unbalanced # check if port sum of used ports for uop are unbalanced
port_sums = self._to_list( port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
itemgetter(*indices)(self.get_throughput_sum(kernel))
)
instr_ports = self._to_list( instr_ports = self._to_list(
itemgetter(*indices)(instruction_form["port_pressure"]) itemgetter(*indices)(instruction_form["port_pressure"])
) )
@@ -67,9 +65,7 @@ class ArchSemantics(ISASemantics):
differences[max_port_idx] -= INC differences[max_port_idx] -= INC
differences[min_port_idx] += INC differences[min_port_idx] += INC
# instr_ports = [round(p, 2) for p in instr_ports] # instr_ports = [round(p, 2) for p in instr_ports]
self._itemsetter(*indices)( self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports)
instruction_form["port_pressure"], *instr_ports
)
# check if min port is zero # check if min port is zero
if round(min(instr_ports), 2) <= 0: if round(min(instr_ports), 2) <= 0:
# if port_pressure is not exactly 0.00, add the residual to # if port_pressure is not exactly 0.00, add the residual to
@@ -87,15 +83,12 @@ class ArchSemantics(ISASemantics):
zero_index = [ zero_index = [
p p
for p in indices for p in indices
if round(instruction_form["port_pressure"][p], 2) if round(instruction_form["port_pressure"][p], 2) == 0
== 0
][0] ][0]
instruction_form["port_pressure"][zero_index] = 0.0 instruction_form["port_pressure"][zero_index] = 0.0
# Remove from further balancing # Remove from further balancing
indices = [ indices = [
p p for p in indices if instruction_form["port_pressure"][p] > 0
for p in indices
if instruction_form["port_pressure"][p] > 0
] ]
instr_ports = self._to_list( instr_ports = self._to_list(
itemgetter(*indices)(instruction_form["port_pressure"]) itemgetter(*indices)(instruction_form["port_pressure"])
@@ -148,11 +141,9 @@ class ArchSemantics(ISASemantics):
if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"] if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"]
] ]
) )
load = [ load = [instr for instr in kernel if instr["line_number"] == min_distance_load[1]][
instr 0
for instr in kernel ]
if instr["line_number"] == min_distance_load[1]
][0]
# Hide load # Hide load
load["flags"] += [INSTR_FLAGS.HIDDEN_LD] load["flags"] += [INSTR_FLAGS.HIDDEN_LD]
load["port_pressure"] = self._nullify_data_ports(load["port_pressure"]) load["port_pressure"] = self._nullify_data_ports(load["port_pressure"])
@@ -230,39 +221,27 @@ class ArchSemantics(ISASemantics):
data_port_uops = self._machine_model.get_load_throughput( data_port_uops = self._machine_model.get_load_throughput(
[ [
x["memory"] x["memory"]
for x in instruction_form["semantic_operands"][ for x in instruction_form["semantic_operands"]["source"]
"source"
]
+ instruction_form["semantic_operands"]["src_dst"] + instruction_form["semantic_operands"]["src_dst"]
if "memory" in x if "memory" in x
][0] ][0]
) )
data_port_pressure = ( data_port_pressure = self._machine_model.average_port_pressure(
self._machine_model.average_port_pressure( data_port_uops
data_port_uops
)
) )
if "load_throughput_multiplier" in self._machine_model: if "load_throughput_multiplier" in self._machine_model:
multiplier = self._machine_model[ multiplier = self._machine_model["load_throughput_multiplier"][
"load_throughput_multiplier" reg_type
][reg_type]
data_port_pressure = [
pp * multiplier for pp in data_port_pressure
] ]
data_port_pressure = [pp * multiplier for pp in data_port_pressure]
if INSTR_FLAGS.HAS_ST in instruction_form["flags"]: if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
# STORE performance data # STORE performance data
destinations = ( destinations = (
instruction_form["semantic_operands"]["destination"] instruction_form["semantic_operands"]["destination"]
+ instruction_form["semantic_operands"]["src_dst"] + instruction_form["semantic_operands"]["src_dst"]
) )
st_data_port_uops = ( st_data_port_uops = self._machine_model.get_store_throughput(
self._machine_model.get_store_throughput( [x["memory"] for x in destinations if "memory" in x][0]
[
x["memory"]
for x in destinations
if "memory" in x
][0]
)
) )
# zero data port pressure and remove HAS_ST flag if # zero data port pressure and remove HAS_ST flag if
# - no mem operand in dst && # - no mem operand in dst &&
@@ -271,16 +250,12 @@ class ArchSemantics(ISASemantics):
if ( if (
self._isa == "aarch64" self._isa == "aarch64"
and "memory" and "memory"
not in instruction_form["semantic_operands"][ not in instruction_form["semantic_operands"]["destination"]
"destination"
]
and all( and all(
[ [
"post_indexed" in op["memory"] "post_indexed" in op["memory"]
or "pre_indexed" in op["memory"] or "pre_indexed" in op["memory"]
for op in instruction_form["semantic_operands"][ for op in instruction_form["semantic_operands"]["src_dst"]
"src_dst"
]
if "memory" in op if "memory" in op
] ]
) )
@@ -289,21 +264,18 @@ class ArchSemantics(ISASemantics):
instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST) instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)
# sum up all data ports in case for LOAD and STORE # sum up all data ports in case for LOAD and STORE
st_data_port_pressure = ( st_data_port_pressure = self._machine_model.average_port_pressure(
self._machine_model.average_port_pressure( st_data_port_uops
st_data_port_uops
)
) )
if "store_throughput_multiplier" in self._machine_model: if "store_throughput_multiplier" in self._machine_model:
multiplier = self._machine_model[ multiplier = self._machine_model["store_throughput_multiplier"][
"store_throughput_multiplier" reg_type
][reg_type] ]
st_data_port_pressure = [ st_data_port_pressure = [
pp * multiplier for pp in st_data_port_pressure pp * multiplier for pp in st_data_port_pressure
] ]
data_port_pressure = [ data_port_pressure = [
sum(x) sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
for x in zip(data_port_pressure, st_data_port_pressure)
] ]
data_port_uops += st_data_port_uops data_port_uops += st_data_port_uops
throughput = max( throughput = max(
@@ -355,9 +327,7 @@ class ArchSemantics(ISASemantics):
throughput = 0.0 throughput = 0.0
latency = 0.0 latency = 0.0
latency_wo_load = latency latency_wo_load = latency
instruction_form["port_pressure"] = [ instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
0.0 for i in range(port_number)
]
instruction_form["port_uops"] = [] instruction_form["port_uops"] = []
flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN] flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
# flatten flag list # flatten flag list
@@ -373,9 +343,7 @@ class ArchSemantics(ISASemantics):
instruction_form["latency_cp"] = 0 instruction_form["latency_cp"] = 0
instruction_form["latency_lcd"] = 0 instruction_form["latency_lcd"] = 0
def _handle_instruction_found( def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
self, instruction_data, port_number, instruction_form, flags
):
"""Apply performance data to instruction if it was found in the archDB""" """Apply performance data to instruction if it was found in the archDB"""
throughput = instruction_data["throughput"] throughput = instruction_data["throughput"]
port_pressure = self._machine_model.average_port_pressure( port_pressure = self._machine_model.average_port_pressure(
@@ -457,9 +425,7 @@ class ArchSemantics(ISASemantics):
"""Get the overall throughput sum separated by port of all instructions of a kernel.""" """Get the overall throughput sum separated by port of all instructions of a kernel."""
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up # ignoring all lines with throughput == 0.0, because there won't be anything to sum up
# typically comment, label and non-instruction lines # typically comment, label and non-instruction lines
port_pressures = [ port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0]
instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0
]
# Essentially summing up each columns of port_pressures, where each column is one port # Essentially summing up each columns of port_pressures, where each column is one port
# and each row is one line of the kernel # and each row is one line of the kernel
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput # round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput

View File

@@ -18,9 +18,7 @@ from ruamel.yaml.compat import StringIO
class MachineModel(object): class MachineModel(object):
WILDCARD = "*" WILDCARD = "*"
INTERNAL_VERSION = ( INTERNAL_VERSION = 1 # increase whenever self._data format changes to invalidate cache!
1 # increase whenever self._data format changes to invalidate cache!
)
_runtime_cache = {} _runtime_cache = {}
def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False): def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
@@ -45,9 +43,7 @@ class MachineModel(object):
"scale": s, "scale": s,
"port_pressure": [], "port_pressure": [],
} }
for b, i, o, s in product( for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
["gpr"], ["gpr", None], ["imd", None], [1, 8]
)
], ],
"load_throughput_default": [], "load_throughput_default": [],
"store_throughput": [], "store_throughput": [],
@@ -89,9 +85,7 @@ class MachineModel(object):
self._data["instruction_forms"] = [] self._data["instruction_forms"] = []
# separate multi-alias instruction forms # separate multi-alias instruction forms
for entry in [ for entry in [
x x for x in self._data["instruction_forms"] if isinstance(x["name"], list)
for x in self._data["instruction_forms"]
if isinstance(x["name"], list)
]: ]:
for name in entry["name"]: for name in entry["name"]:
new_entry = {"name": name} new_entry = {"name": name}
@@ -139,9 +133,7 @@ class MachineModel(object):
instruction_form instruction_form
for instruction_form in name_matched_iforms for instruction_form in name_matched_iforms
if self._match_operands( if self._match_operands(
instruction_form["operands"] instruction_form["operands"] if "operands" in instruction_form else [],
if "operands" in instruction_form
else [],
operands, operands,
) )
) )
@@ -223,19 +215,11 @@ class MachineModel(object):
def get_load_latency(self, reg_type): def get_load_latency(self, reg_type):
"""Return load latency for given register type.""" """Return load latency for given register type."""
return ( return self._data["load_latency"][reg_type] if self._data["load_latency"][reg_type] else 0
self._data["load_latency"][reg_type]
if self._data["load_latency"][reg_type]
else 0
)
def get_load_throughput(self, memory): def get_load_throughput(self, memory):
"""Return load thorughput for given register type.""" """Return load thorughput for given register type."""
ld_tp = [ ld_tp = [m for m in self._data["load_throughput"] if self._match_mem_entries(memory, m)]
m
for m in self._data["load_throughput"]
if self._match_mem_entries(memory, m)
]
if len(ld_tp) > 0: if len(ld_tp) > 0:
return ld_tp[0]["port_pressure"].copy() return ld_tp[0]["port_pressure"].copy()
return self._data["load_throughput_default"].copy() return self._data["load_throughput_default"].copy()
@@ -247,11 +231,7 @@ class MachineModel(object):
def get_store_throughput(self, memory): def get_store_throughput(self, memory):
"""Return store throughput for given register type.""" """Return store throughput for given register type."""
st_tp = [ st_tp = [m for m in self._data["store_throughput"] if self._match_mem_entries(memory, m)]
m
for m in self._data["store_throughput"]
if self._match_mem_entries(memory, m)
]
if len(st_tp) > 0: if len(st_tp) > 0:
return st_tp[0]["port_pressure"].copy() return st_tp[0]["port_pressure"].copy()
return self._data["store_throughput_default"].copy() return self._data["store_throughput_default"].copy()
@@ -319,9 +299,7 @@ class MachineModel(object):
formatted_instruction_forms = deepcopy(self._data["instruction_forms"]) formatted_instruction_forms = deepcopy(self._data["instruction_forms"])
for instruction_form in formatted_instruction_forms: for instruction_form in formatted_instruction_forms:
if instruction_form["port_pressure"] is not None: if instruction_form["port_pressure"] is not None:
cs = ruamel.yaml.comments.CommentedSeq( cs = ruamel.yaml.comments.CommentedSeq(instruction_form["port_pressure"])
instruction_form["port_pressure"]
)
cs.fa.set_flow_style() cs.fa.set_flow_style()
instruction_form["port_pressure"] = cs instruction_form["port_pressure"] = cs
@@ -371,9 +349,7 @@ class MachineModel(object):
hexhash = hashlib.sha256(p.read_bytes()).hexdigest() hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle' # 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix( companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
".pickle"
)
if companion_cachefile.exists(): if companion_cachefile.exists():
# companion file (must be up-to-date, due to equal hash) # companion file (must be up-to-date, due to equal hash)
with companion_cachefile.open("rb") as f: with companion_cachefile.open("rb") as f:
@@ -382,9 +358,7 @@ class MachineModel(object):
return data return data
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle # 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix( home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(".pickle")
".pickle"
)
if home_cachefile.exists(): if home_cachefile.exists():
# home file (must be up-to-date, due to equal hash) # home file (must be up-to-date, due to equal hash)
with home_cachefile.open("rb") as f: with home_cachefile.open("rb") as f:
@@ -403,9 +377,7 @@ class MachineModel(object):
p = Path(filepath) p = Path(filepath)
hexhash = hashlib.sha256(p.read_bytes()).hexdigest() hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle' # 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix( companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
".pickle"
)
if os.access(str(companion_cachefile.parent), os.W_OK): if os.access(str(companion_cachefile.parent), os.W_OK):
with companion_cachefile.open("wb") as f: with companion_cachefile.open("wb") as f:
pickle.dump(self._data, f) pickle.dump(self._data, f)
@@ -449,9 +421,7 @@ class MachineModel(object):
operand_string += operand["prefix"] operand_string += operand["prefix"]
operand_string += operand["shape"] if "shape" in operand else "" operand_string += operand["shape"] if "shape" in operand else ""
elif "name" in operand: elif "name" in operand:
operand_string += ( operand_string += "r" if operand["name"] == "gpr" else operand["name"][0]
"r" if operand["name"] == "gpr" else operand["name"][0]
)
elif opclass == "memory": elif opclass == "memory":
# Memory # Memory
operand_string += "m" operand_string += "m"
@@ -614,9 +584,7 @@ class MachineModel(object):
if "register" in operand: if "register" in operand:
if i_operand["class"] != "register": if i_operand["class"] != "register":
return False return False
return self._is_x86_reg_type( return self._is_x86_reg_type(i_operand, operand["register"], consider_masking=False)
i_operand, operand["register"], consider_masking=False
)
# memory # memory
if "memory" in operand: if "memory" in operand:
if i_operand["class"] != "memory": if i_operand["class"] != "memory":
@@ -664,8 +632,7 @@ class MachineModel(object):
return False return False
if "shape" in reg: if "shape" in reg:
if "shape" in i_reg and ( if "shape" in i_reg and (
reg["shape"] == i_reg["shape"] reg["shape"] == i_reg["shape"] or self.WILDCARD in (reg["shape"] + i_reg["shape"])
or self.WILDCARD in (reg["shape"] + i_reg["shape"])
): ):
return True return True
return False return False
@@ -695,8 +662,7 @@ class MachineModel(object):
if ( if (
( (
"mask" in reg "mask" in reg
and reg["mask"].rstrip(string.digits).lower() and reg["mask"].rstrip(string.digits).lower() == i_reg.get("mask")
== i_reg.get("mask")
) )
or reg.get("mask") == self.WILDCARD or reg.get("mask") == self.WILDCARD
or i_reg.get("mask") == self.WILDCARD or i_reg.get("mask") == self.WILDCARD

View File

@@ -45,10 +45,7 @@ class ISASemantics(object):
def assign_src_dst(self, instruction_form): def assign_src_dst(self, instruction_form):
"""Update instruction form dictionary with source, destination and flag information.""" """Update instruction form dictionary with source, destination and flag information."""
# if the instruction form doesn't have operands or is None, there's nothing to do # if the instruction form doesn't have operands or is None, there's nothing to do
if ( if instruction_form["operands"] is None or instruction_form["instruction"] is None:
instruction_form["operands"] is None
or instruction_form["instruction"] is None
):
instruction_form["semantic_operands"] = AttrDict( instruction_form["semantic_operands"] = AttrDict(
{"source": [], "destination": [], "src_dst": []} {"source": [], "destination": [], "src_dst": []}
) )
@@ -97,20 +94,16 @@ class ISASemantics(object):
if assign_default: if assign_default:
# no irregular operand structure, apply default # no irregular operand structure, apply default
op_dict["source"] = self._get_regular_source_operands(instruction_form) op_dict["source"] = self._get_regular_source_operands(instruction_form)
op_dict["destination"] = self._get_regular_destination_operands( op_dict["destination"] = self._get_regular_destination_operands(instruction_form)
instruction_form
)
op_dict["src_dst"] = [] op_dict["src_dst"] = []
# post-process pre- and post-indexing for aarch64 memory operands # post-process pre- and post-indexing for aarch64 memory operands
if self._isa == "aarch64": if self._isa == "aarch64":
for operand in [op for op in op_dict["source"] if "memory" in op]: for operand in [op for op in op_dict["source"] if "memory" in op]:
post_indexed = ( post_indexed = (
"post_indexed" in operand["memory"] "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
and operand["memory"]["post_indexed"]
) )
pre_indexed = ( pre_indexed = (
"pre_indexed" in operand["memory"] "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
and operand["memory"]["pre_indexed"]
) )
if post_indexed or pre_indexed: if post_indexed or pre_indexed:
op_dict["src_dst"].append( op_dict["src_dst"].append(
@@ -124,12 +117,10 @@ class ISASemantics(object):
) )
for operand in [op for op in op_dict["destination"] if "memory" in op]: for operand in [op for op in op_dict["destination"] if "memory" in op]:
post_indexed = ( post_indexed = (
"post_indexed" in operand["memory"] "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
and operand["memory"]["post_indexed"]
) )
pre_indexed = ( pre_indexed = (
"pre_indexed" in operand["memory"] "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
and operand["memory"]["pre_indexed"]
) )
if post_indexed or pre_indexed: if post_indexed or pre_indexed:
op_dict["src_dst"].append( op_dict["src_dst"].append(
@@ -189,17 +180,14 @@ class ISASemantics(object):
base_name = o.memory.base.get("prefix", "") + o.memory.base.name base_name = o.memory.base.get("prefix", "") + o.memory.base.name
return { return {
base_name: { base_name: {
"name": o.memory.base.get("prefix", "") "name": o.memory.base.get("prefix", "") + o.memory.base.name,
+ o.memory.base.name,
"value": o.memory.post_indexed.value, "value": o.memory.post_indexed.value,
} }
} }
return {} return {}
reg_operand_names = {} # e.g., {'rax': 'op1'} reg_operand_names = {} # e.g., {'rax': 'op1'}
operand_state = ( operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
{}
) # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
for o in instruction_form.operands: for o in instruction_form.operands:
if "pre_indexed" in o.get("memory", {}): if "pre_indexed" in o.get("memory", {}):
@@ -211,9 +199,7 @@ class ISASemantics(object):
) )
base_name = o.memory.base.get("prefix", "") + o.memory.base.name base_name = o.memory.base.get("prefix", "") + o.memory.base.name
reg_operand_names = {base_name: "op1"} reg_operand_names = {base_name: "op1"}
operand_state = { operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
"op1": {"name": base_name, "value": o.memory.offset.value}
}
if isa_data is not None and "operation" in isa_data: if isa_data is not None and "operation" in isa_data:
for i, o in enumerate(instruction_form.operands): for i, o in enumerate(instruction_form.operands):
@@ -254,18 +240,14 @@ class ISASemantics(object):
op_dict["src_dst"] = [] op_dict["src_dst"] = []
# handle dependency breaking instructions # handle dependency breaking instructions
if ( if "breaks_dependency_on_equal_operands" in isa_data and operands[1:] == operands[:-1]:
"breaks_dependency_on_equal_operands" in isa_data
and operands[1:] == operands[:-1]
):
op_dict["destination"] += operands op_dict["destination"] += operands
if "hidden_operands" in isa_data: if "hidden_operands" in isa_data:
op_dict["destination"] += [ op_dict["destination"] += [
AttrDict.convert_dict( AttrDict.convert_dict(
{ {
hop["class"]: { hop["class"]: {
k: hop[k] k: hop[k] for k in ["name", "class", "source", "destination"]
for k in ["name", "class", "source", "destination"]
} }
} }
) )
@@ -351,9 +333,7 @@ class ISASemantics(object):
def substitute_mem_address(self, operands): def substitute_mem_address(self, operands):
"""Create memory wildcard for all memory operands""" """Create memory wildcard for all memory operands"""
return [ return [self._create_reg_wildcard() if "memory" in op else op for op in operands]
self._create_reg_wildcard() if "memory" in op else op for op in operands
]
def _create_reg_wildcard(self): def _create_reg_wildcard(self):
"""Wildcard constructor""" """Wildcard constructor"""

View File

@@ -54,9 +54,7 @@ class KernelDG(nx.DiGraph):
dg = nx.DiGraph() dg = nx.DiGraph()
for i, instruction_form in enumerate(kernel): for i, instruction_form in enumerate(kernel):
dg.add_node(instruction_form["line_number"]) dg.add_node(instruction_form["line_number"])
dg.nodes[instruction_form["line_number"]][ dg.nodes[instruction_form["line_number"]]["instruction_form"] = instruction_form
"instruction_form"
] = instruction_form
# add load as separate node if existent # add load as separate node if existent
if ( if (
INSTR_FLAGS.HAS_LD in instruction_form["flags"] INSTR_FLAGS.HAS_LD in instruction_form["flags"]
@@ -71,16 +69,12 @@ class KernelDG(nx.DiGraph):
dg.add_edge( dg.add_edge(
instruction_form["line_number"] + 0.1, instruction_form["line_number"] + 0.1,
instruction_form["line_number"], instruction_form["line_number"],
latency=instruction_form["latency"] latency=instruction_form["latency"] - instruction_form["latency_wo_load"],
- instruction_form["latency_wo_load"],
) )
for dep, dep_flags in self.find_depending( for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]):
instruction_form, kernel[i + 1 :]
):
edge_weight = ( edge_weight = (
instruction_form["latency"] instruction_form["latency"]
if "mem_dep" in dep_flags if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form
or "latency_wo_load" not in instruction_form
else instruction_form["latency_wo_load"] else instruction_form["latency_wo_load"]
) )
if "storeload_dep" in dep_flags: if "storeload_dep" in dep_flags:
@@ -312,9 +306,7 @@ class KernelDG(nx.DiGraph):
# store to same location (presumed) # store to same location (presumed)
if self.is_memstore(dst.memory, instr_form, register_changes): if self.is_memstore(dst.memory, instr_form, register_changes):
break break
self._update_reg_changes( self._update_reg_changes(instr_form, register_changes, only_postindexed=True)
instr_form, register_changes, only_postindexed=True
)
def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False): def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False):
if self.arch_sem is None: if self.arch_sem is None:
@@ -322,9 +314,7 @@ class KernelDG(nx.DiGraph):
return {} return {}
if reg_state is None: if reg_state is None:
reg_state = {} reg_state = {}
for reg, change in self.arch_sem.get_reg_changes( for reg, change in self.arch_sem.get_reg_changes(iform, only_postindexed).items():
iform, only_postindexed
).items():
if change is None or reg_state.get(reg, {}) is None: if change is None or reg_state.get(reg, {}) is None:
reg_state[reg] = None reg_state[reg] = None
else: else:
@@ -362,23 +352,15 @@ class KernelDG(nx.DiGraph):
instruction_form.semantic_operands.src_dst, instruction_form.semantic_operands.src_dst,
): ):
if "register" in src: if "register" in src:
is_read = ( is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
self.parser.is_reg_dependend_of(register, src.register) or is_read
)
if "flag" in src: if "flag" in src:
is_read = ( is_read = self.parser.is_flag_dependend_of(register, src.flag) or is_read
self.parser.is_flag_dependend_of(register, src.flag) or is_read
)
if "memory" in src: if "memory" in src:
if src.memory.base is not None: if src.memory.base is not None:
is_read = ( is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
self.parser.is_reg_dependend_of(register, src.memory.base)
or is_read
)
if src.memory.index is not None: if src.memory.index is not None:
is_read = ( is_read = (
self.parser.is_reg_dependend_of(register, src.memory.index) self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
or is_read
) )
# Check also if read in destination memory address # Check also if read in destination memory address
for dst in chain( for dst in chain(
@@ -387,14 +369,10 @@ class KernelDG(nx.DiGraph):
): ):
if "memory" in dst: if "memory" in dst:
if dst.memory.base is not None: if dst.memory.base is not None:
is_read = ( is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
self.parser.is_reg_dependend_of(register, dst.memory.base)
or is_read
)
if dst.memory.index is not None: if dst.memory.index is not None:
is_read = ( is_read = (
self.parser.is_reg_dependend_of(register, dst.memory.index) self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
or is_read
) )
return is_read return is_read
@@ -443,10 +421,7 @@ class KernelDG(nx.DiGraph):
if mem.scale != src.scale: if mem.scale != src.scale:
# scale factors do not match # scale factors do not match
continue continue
if ( if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
mem.index.get("prefix", "") + mem.index["name"]
!= index_change["name"]
):
# index registers do not match # index registers do not match
continue continue
addr_change += index_change["value"] * src.scale addr_change += index_change["value"] * src.scale
@@ -468,19 +443,13 @@ class KernelDG(nx.DiGraph):
instruction_form.semantic_operands.src_dst, instruction_form.semantic_operands.src_dst,
): ):
if "register" in dst: if "register" in dst:
is_written = ( is_written = self.parser.is_reg_dependend_of(register, dst.register) or is_written
self.parser.is_reg_dependend_of(register, dst.register)
or is_written
)
if "flag" in dst: if "flag" in dst:
is_written = ( is_written = self.parser.is_flag_dependend_of(register, dst.flag) or is_written
self.parser.is_flag_dependend_of(register, dst.flag) or is_written
)
if "memory" in dst: if "memory" in dst:
if "pre_indexed" in dst.memory or "post_indexed" in dst.memory: if "pre_indexed" in dst.memory or "post_indexed" in dst.memory:
is_written = ( is_written = (
self.parser.is_reg_dependend_of(register, dst.memory.base) self.parser.is_reg_dependend_of(register, dst.memory.base) or is_written
or is_written
) )
# Check also for possible pre- or post-indexing in memory addresses # Check also for possible pre- or post-indexing in memory addresses
for src in chain( for src in chain(
@@ -490,8 +459,7 @@ class KernelDG(nx.DiGraph):
if "memory" in src: if "memory" in src:
if "pre_indexed" in src.memory or "post_indexed" in src.memory: if "pre_indexed" in src.memory or "post_indexed" in src.memory:
is_written = ( is_written = (
self.parser.is_reg_dependend_of(register, src.memory.base) self.parser.is_reg_dependend_of(register, src.memory.base) or is_written
or is_written
) )
return is_written return is_written
@@ -522,9 +490,7 @@ class KernelDG(nx.DiGraph):
lcd = self.get_loopcarried_dependencies() lcd = self.get_loopcarried_dependencies()
lcd_line_numbers = {} lcd_line_numbers = {}
for dep in lcd: for dep in lcd:
lcd_line_numbers[dep] = [ lcd_line_numbers[dep] = [x["line_number"] for x, lat in lcd[dep]["dependencies"]]
x["line_number"] for x, lat in lcd[dep]["dependencies"]
]
# add color scheme # add color scheme
graph.graph["node"] = {"colorscheme": "accent8"} graph.graph["node"] = {"colorscheme": "accent8"}
graph.graph["edge"] = {"colorscheme": "accent8"} graph.graph["edge"] = {"colorscheme": "accent8"}
@@ -535,9 +501,7 @@ class KernelDG(nx.DiGraph):
max_line_number = max(lcd_line_numbers[dep]) max_line_number = max(lcd_line_numbers[dep])
graph.add_edge(max_line_number, min_line_number) graph.add_edge(max_line_number, min_line_number)
graph.edges[max_line_number, min_line_number]["latency"] = [ graph.edges[max_line_number, min_line_number]["latency"] = [
lat lat for x, lat in lcd[dep]["dependencies"] if x["line_number"] == max_line_number
for x, lat in lcd[dep]["dependencies"]
if x["line_number"] == max_line_number
] ]
# add label to edges # add label to edges
@@ -546,9 +510,7 @@ class KernelDG(nx.DiGraph):
# add CP values to graph # add CP values to graph
for n in cp: for n in cp:
graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n[ graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n["latency_cp"]
"latency_cp"
]
# color CP and LCD # color CP and LCD
for n in graph.nodes: for n in graph.nodes:
@@ -568,8 +530,7 @@ class KernelDG(nx.DiGraph):
for e in graph.edges: for e in graph.edges:
if ( if (
graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers
and graph.nodes[e[1]]["instruction_form"]["line_number"] and graph.nodes[e[1]]["instruction_form"]["line_number"] in cp_line_numbers
in cp_line_numbers
and e[0] < e[1] and e[0] < e[1]
): ):
bold_edge = True bold_edge = True
@@ -581,8 +542,7 @@ class KernelDG(nx.DiGraph):
graph.edges[e]["penwidth"] = 3 graph.edges[e]["penwidth"] = 3
for dep in lcd_line_numbers: for dep in lcd_line_numbers:
if ( if (
graph.nodes[e[0]]["instruction_form"]["line_number"] graph.nodes[e[0]]["instruction_form"]["line_number"] in lcd_line_numbers[dep]
in lcd_line_numbers[dep]
and graph.nodes[e[1]]["instruction_form"]["line_number"] and graph.nodes[e[1]]["instruction_form"]["line_number"]
in lcd_line_numbers[dep] in lcd_line_numbers[dep]
): ):

View File

@@ -133,11 +133,7 @@ def find_marked_section(
index_end = -1 index_end = -1
for i, line in enumerate(lines): for i, line in enumerate(lines):
try: try:
if ( if line.instruction is None and comments is not None and line.comment is not None:
line.instruction is None
and comments is not None
and line.comment is not None
):
if comments["start"] == line.comment: if comments["start"] == line.comment:
index_start = i + 1 index_start = i + 1
elif comments["end"] == line.comment: elif comments["end"] == line.comment: