mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-05 02:30:08 +01:00
fixed formatting with correct line length
This commit is contained in:
@@ -46,9 +46,7 @@ class ArchSemantics(ISASemantics):
|
|||||||
ports = list(uop[1])
|
ports = list(uop[1])
|
||||||
indices = [port_list.index(p) for p in ports]
|
indices = [port_list.index(p) for p in ports]
|
||||||
# check if port sum of used ports for uop are unbalanced
|
# check if port sum of used ports for uop are unbalanced
|
||||||
port_sums = self._to_list(
|
port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
|
||||||
itemgetter(*indices)(self.get_throughput_sum(kernel))
|
|
||||||
)
|
|
||||||
instr_ports = self._to_list(
|
instr_ports = self._to_list(
|
||||||
itemgetter(*indices)(instruction_form["port_pressure"])
|
itemgetter(*indices)(instruction_form["port_pressure"])
|
||||||
)
|
)
|
||||||
@@ -67,9 +65,7 @@ class ArchSemantics(ISASemantics):
|
|||||||
differences[max_port_idx] -= INC
|
differences[max_port_idx] -= INC
|
||||||
differences[min_port_idx] += INC
|
differences[min_port_idx] += INC
|
||||||
# instr_ports = [round(p, 2) for p in instr_ports]
|
# instr_ports = [round(p, 2) for p in instr_ports]
|
||||||
self._itemsetter(*indices)(
|
self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports)
|
||||||
instruction_form["port_pressure"], *instr_ports
|
|
||||||
)
|
|
||||||
# check if min port is zero
|
# check if min port is zero
|
||||||
if round(min(instr_ports), 2) <= 0:
|
if round(min(instr_ports), 2) <= 0:
|
||||||
# if port_pressure is not exactly 0.00, add the residual to
|
# if port_pressure is not exactly 0.00, add the residual to
|
||||||
@@ -87,15 +83,12 @@ class ArchSemantics(ISASemantics):
|
|||||||
zero_index = [
|
zero_index = [
|
||||||
p
|
p
|
||||||
for p in indices
|
for p in indices
|
||||||
if round(instruction_form["port_pressure"][p], 2)
|
if round(instruction_form["port_pressure"][p], 2) == 0
|
||||||
== 0
|
|
||||||
][0]
|
][0]
|
||||||
instruction_form["port_pressure"][zero_index] = 0.0
|
instruction_form["port_pressure"][zero_index] = 0.0
|
||||||
# Remove from further balancing
|
# Remove from further balancing
|
||||||
indices = [
|
indices = [
|
||||||
p
|
p for p in indices if instruction_form["port_pressure"][p] > 0
|
||||||
for p in indices
|
|
||||||
if instruction_form["port_pressure"][p] > 0
|
|
||||||
]
|
]
|
||||||
instr_ports = self._to_list(
|
instr_ports = self._to_list(
|
||||||
itemgetter(*indices)(instruction_form["port_pressure"])
|
itemgetter(*indices)(instruction_form["port_pressure"])
|
||||||
@@ -148,11 +141,9 @@ class ArchSemantics(ISASemantics):
|
|||||||
if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"]
|
if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"]
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
load = [
|
load = [instr for instr in kernel if instr["line_number"] == min_distance_load[1]][
|
||||||
instr
|
0
|
||||||
for instr in kernel
|
]
|
||||||
if instr["line_number"] == min_distance_load[1]
|
|
||||||
][0]
|
|
||||||
# Hide load
|
# Hide load
|
||||||
load["flags"] += [INSTR_FLAGS.HIDDEN_LD]
|
load["flags"] += [INSTR_FLAGS.HIDDEN_LD]
|
||||||
load["port_pressure"] = self._nullify_data_ports(load["port_pressure"])
|
load["port_pressure"] = self._nullify_data_ports(load["port_pressure"])
|
||||||
@@ -230,39 +221,27 @@ class ArchSemantics(ISASemantics):
|
|||||||
data_port_uops = self._machine_model.get_load_throughput(
|
data_port_uops = self._machine_model.get_load_throughput(
|
||||||
[
|
[
|
||||||
x["memory"]
|
x["memory"]
|
||||||
for x in instruction_form["semantic_operands"][
|
for x in instruction_form["semantic_operands"]["source"]
|
||||||
"source"
|
|
||||||
]
|
|
||||||
+ instruction_form["semantic_operands"]["src_dst"]
|
+ instruction_form["semantic_operands"]["src_dst"]
|
||||||
if "memory" in x
|
if "memory" in x
|
||||||
][0]
|
][0]
|
||||||
)
|
)
|
||||||
data_port_pressure = (
|
data_port_pressure = self._machine_model.average_port_pressure(
|
||||||
self._machine_model.average_port_pressure(
|
data_port_uops
|
||||||
data_port_uops
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
if "load_throughput_multiplier" in self._machine_model:
|
if "load_throughput_multiplier" in self._machine_model:
|
||||||
multiplier = self._machine_model[
|
multiplier = self._machine_model["load_throughput_multiplier"][
|
||||||
"load_throughput_multiplier"
|
reg_type
|
||||||
][reg_type]
|
|
||||||
data_port_pressure = [
|
|
||||||
pp * multiplier for pp in data_port_pressure
|
|
||||||
]
|
]
|
||||||
|
data_port_pressure = [pp * multiplier for pp in data_port_pressure]
|
||||||
if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
|
if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
|
||||||
# STORE performance data
|
# STORE performance data
|
||||||
destinations = (
|
destinations = (
|
||||||
instruction_form["semantic_operands"]["destination"]
|
instruction_form["semantic_operands"]["destination"]
|
||||||
+ instruction_form["semantic_operands"]["src_dst"]
|
+ instruction_form["semantic_operands"]["src_dst"]
|
||||||
)
|
)
|
||||||
st_data_port_uops = (
|
st_data_port_uops = self._machine_model.get_store_throughput(
|
||||||
self._machine_model.get_store_throughput(
|
[x["memory"] for x in destinations if "memory" in x][0]
|
||||||
[
|
|
||||||
x["memory"]
|
|
||||||
for x in destinations
|
|
||||||
if "memory" in x
|
|
||||||
][0]
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
# zero data port pressure and remove HAS_ST flag if
|
# zero data port pressure and remove HAS_ST flag if
|
||||||
# - no mem operand in dst &&
|
# - no mem operand in dst &&
|
||||||
@@ -271,16 +250,12 @@ class ArchSemantics(ISASemantics):
|
|||||||
if (
|
if (
|
||||||
self._isa == "aarch64"
|
self._isa == "aarch64"
|
||||||
and "memory"
|
and "memory"
|
||||||
not in instruction_form["semantic_operands"][
|
not in instruction_form["semantic_operands"]["destination"]
|
||||||
"destination"
|
|
||||||
]
|
|
||||||
and all(
|
and all(
|
||||||
[
|
[
|
||||||
"post_indexed" in op["memory"]
|
"post_indexed" in op["memory"]
|
||||||
or "pre_indexed" in op["memory"]
|
or "pre_indexed" in op["memory"]
|
||||||
for op in instruction_form["semantic_operands"][
|
for op in instruction_form["semantic_operands"]["src_dst"]
|
||||||
"src_dst"
|
|
||||||
]
|
|
||||||
if "memory" in op
|
if "memory" in op
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
@@ -289,21 +264,18 @@ class ArchSemantics(ISASemantics):
|
|||||||
instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)
|
instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)
|
||||||
|
|
||||||
# sum up all data ports in case for LOAD and STORE
|
# sum up all data ports in case for LOAD and STORE
|
||||||
st_data_port_pressure = (
|
st_data_port_pressure = self._machine_model.average_port_pressure(
|
||||||
self._machine_model.average_port_pressure(
|
st_data_port_uops
|
||||||
st_data_port_uops
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
if "store_throughput_multiplier" in self._machine_model:
|
if "store_throughput_multiplier" in self._machine_model:
|
||||||
multiplier = self._machine_model[
|
multiplier = self._machine_model["store_throughput_multiplier"][
|
||||||
"store_throughput_multiplier"
|
reg_type
|
||||||
][reg_type]
|
]
|
||||||
st_data_port_pressure = [
|
st_data_port_pressure = [
|
||||||
pp * multiplier for pp in st_data_port_pressure
|
pp * multiplier for pp in st_data_port_pressure
|
||||||
]
|
]
|
||||||
data_port_pressure = [
|
data_port_pressure = [
|
||||||
sum(x)
|
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
|
||||||
for x in zip(data_port_pressure, st_data_port_pressure)
|
|
||||||
]
|
]
|
||||||
data_port_uops += st_data_port_uops
|
data_port_uops += st_data_port_uops
|
||||||
throughput = max(
|
throughput = max(
|
||||||
@@ -355,9 +327,7 @@ class ArchSemantics(ISASemantics):
|
|||||||
throughput = 0.0
|
throughput = 0.0
|
||||||
latency = 0.0
|
latency = 0.0
|
||||||
latency_wo_load = latency
|
latency_wo_load = latency
|
||||||
instruction_form["port_pressure"] = [
|
instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
|
||||||
0.0 for i in range(port_number)
|
|
||||||
]
|
|
||||||
instruction_form["port_uops"] = []
|
instruction_form["port_uops"] = []
|
||||||
flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
|
flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
|
||||||
# flatten flag list
|
# flatten flag list
|
||||||
@@ -373,9 +343,7 @@ class ArchSemantics(ISASemantics):
|
|||||||
instruction_form["latency_cp"] = 0
|
instruction_form["latency_cp"] = 0
|
||||||
instruction_form["latency_lcd"] = 0
|
instruction_form["latency_lcd"] = 0
|
||||||
|
|
||||||
def _handle_instruction_found(
|
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
|
||||||
self, instruction_data, port_number, instruction_form, flags
|
|
||||||
):
|
|
||||||
"""Apply performance data to instruction if it was found in the archDB"""
|
"""Apply performance data to instruction if it was found in the archDB"""
|
||||||
throughput = instruction_data["throughput"]
|
throughput = instruction_data["throughput"]
|
||||||
port_pressure = self._machine_model.average_port_pressure(
|
port_pressure = self._machine_model.average_port_pressure(
|
||||||
@@ -457,9 +425,7 @@ class ArchSemantics(ISASemantics):
|
|||||||
"""Get the overall throughput sum separated by port of all instructions of a kernel."""
|
"""Get the overall throughput sum separated by port of all instructions of a kernel."""
|
||||||
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up
|
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up
|
||||||
# typically comment, label and non-instruction lines
|
# typically comment, label and non-instruction lines
|
||||||
port_pressures = [
|
port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0]
|
||||||
instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0
|
|
||||||
]
|
|
||||||
# Essentially summing up each columns of port_pressures, where each column is one port
|
# Essentially summing up each columns of port_pressures, where each column is one port
|
||||||
# and each row is one line of the kernel
|
# and each row is one line of the kernel
|
||||||
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
|
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput
|
||||||
|
|||||||
@@ -18,9 +18,7 @@ from ruamel.yaml.compat import StringIO
|
|||||||
|
|
||||||
class MachineModel(object):
|
class MachineModel(object):
|
||||||
WILDCARD = "*"
|
WILDCARD = "*"
|
||||||
INTERNAL_VERSION = (
|
INTERNAL_VERSION = 1 # increase whenever self._data format changes to invalidate cache!
|
||||||
1 # increase whenever self._data format changes to invalidate cache!
|
|
||||||
)
|
|
||||||
_runtime_cache = {}
|
_runtime_cache = {}
|
||||||
|
|
||||||
def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
|
def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False):
|
||||||
@@ -45,9 +43,7 @@ class MachineModel(object):
|
|||||||
"scale": s,
|
"scale": s,
|
||||||
"port_pressure": [],
|
"port_pressure": [],
|
||||||
}
|
}
|
||||||
for b, i, o, s in product(
|
for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
|
||||||
["gpr"], ["gpr", None], ["imd", None], [1, 8]
|
|
||||||
)
|
|
||||||
],
|
],
|
||||||
"load_throughput_default": [],
|
"load_throughput_default": [],
|
||||||
"store_throughput": [],
|
"store_throughput": [],
|
||||||
@@ -89,9 +85,7 @@ class MachineModel(object):
|
|||||||
self._data["instruction_forms"] = []
|
self._data["instruction_forms"] = []
|
||||||
# separate multi-alias instruction forms
|
# separate multi-alias instruction forms
|
||||||
for entry in [
|
for entry in [
|
||||||
x
|
x for x in self._data["instruction_forms"] if isinstance(x["name"], list)
|
||||||
for x in self._data["instruction_forms"]
|
|
||||||
if isinstance(x["name"], list)
|
|
||||||
]:
|
]:
|
||||||
for name in entry["name"]:
|
for name in entry["name"]:
|
||||||
new_entry = {"name": name}
|
new_entry = {"name": name}
|
||||||
@@ -139,9 +133,7 @@ class MachineModel(object):
|
|||||||
instruction_form
|
instruction_form
|
||||||
for instruction_form in name_matched_iforms
|
for instruction_form in name_matched_iforms
|
||||||
if self._match_operands(
|
if self._match_operands(
|
||||||
instruction_form["operands"]
|
instruction_form["operands"] if "operands" in instruction_form else [],
|
||||||
if "operands" in instruction_form
|
|
||||||
else [],
|
|
||||||
operands,
|
operands,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -223,19 +215,11 @@ class MachineModel(object):
|
|||||||
|
|
||||||
def get_load_latency(self, reg_type):
|
def get_load_latency(self, reg_type):
|
||||||
"""Return load latency for given register type."""
|
"""Return load latency for given register type."""
|
||||||
return (
|
return self._data["load_latency"][reg_type] if self._data["load_latency"][reg_type] else 0
|
||||||
self._data["load_latency"][reg_type]
|
|
||||||
if self._data["load_latency"][reg_type]
|
|
||||||
else 0
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_load_throughput(self, memory):
|
def get_load_throughput(self, memory):
|
||||||
"""Return load thorughput for given register type."""
|
"""Return load thorughput for given register type."""
|
||||||
ld_tp = [
|
ld_tp = [m for m in self._data["load_throughput"] if self._match_mem_entries(memory, m)]
|
||||||
m
|
|
||||||
for m in self._data["load_throughput"]
|
|
||||||
if self._match_mem_entries(memory, m)
|
|
||||||
]
|
|
||||||
if len(ld_tp) > 0:
|
if len(ld_tp) > 0:
|
||||||
return ld_tp[0]["port_pressure"].copy()
|
return ld_tp[0]["port_pressure"].copy()
|
||||||
return self._data["load_throughput_default"].copy()
|
return self._data["load_throughput_default"].copy()
|
||||||
@@ -247,11 +231,7 @@ class MachineModel(object):
|
|||||||
|
|
||||||
def get_store_throughput(self, memory):
|
def get_store_throughput(self, memory):
|
||||||
"""Return store throughput for given register type."""
|
"""Return store throughput for given register type."""
|
||||||
st_tp = [
|
st_tp = [m for m in self._data["store_throughput"] if self._match_mem_entries(memory, m)]
|
||||||
m
|
|
||||||
for m in self._data["store_throughput"]
|
|
||||||
if self._match_mem_entries(memory, m)
|
|
||||||
]
|
|
||||||
if len(st_tp) > 0:
|
if len(st_tp) > 0:
|
||||||
return st_tp[0]["port_pressure"].copy()
|
return st_tp[0]["port_pressure"].copy()
|
||||||
return self._data["store_throughput_default"].copy()
|
return self._data["store_throughput_default"].copy()
|
||||||
@@ -319,9 +299,7 @@ class MachineModel(object):
|
|||||||
formatted_instruction_forms = deepcopy(self._data["instruction_forms"])
|
formatted_instruction_forms = deepcopy(self._data["instruction_forms"])
|
||||||
for instruction_form in formatted_instruction_forms:
|
for instruction_form in formatted_instruction_forms:
|
||||||
if instruction_form["port_pressure"] is not None:
|
if instruction_form["port_pressure"] is not None:
|
||||||
cs = ruamel.yaml.comments.CommentedSeq(
|
cs = ruamel.yaml.comments.CommentedSeq(instruction_form["port_pressure"])
|
||||||
instruction_form["port_pressure"]
|
|
||||||
)
|
|
||||||
cs.fa.set_flow_style()
|
cs.fa.set_flow_style()
|
||||||
instruction_form["port_pressure"] = cs
|
instruction_form["port_pressure"] = cs
|
||||||
|
|
||||||
@@ -371,9 +349,7 @@ class MachineModel(object):
|
|||||||
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
|
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
|
||||||
|
|
||||||
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
|
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
|
||||||
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
|
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
|
||||||
".pickle"
|
|
||||||
)
|
|
||||||
if companion_cachefile.exists():
|
if companion_cachefile.exists():
|
||||||
# companion file (must be up-to-date, due to equal hash)
|
# companion file (must be up-to-date, due to equal hash)
|
||||||
with companion_cachefile.open("rb") as f:
|
with companion_cachefile.open("rb") as f:
|
||||||
@@ -382,9 +358,7 @@ class MachineModel(object):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
|
# 2. home cachefile: ~/.osaca/cache/<name>_<sha512hash>.pickle
|
||||||
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(
|
home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(".pickle")
|
||||||
".pickle"
|
|
||||||
)
|
|
||||||
if home_cachefile.exists():
|
if home_cachefile.exists():
|
||||||
# home file (must be up-to-date, due to equal hash)
|
# home file (must be up-to-date, due to equal hash)
|
||||||
with home_cachefile.open("rb") as f:
|
with home_cachefile.open("rb") as f:
|
||||||
@@ -403,9 +377,7 @@ class MachineModel(object):
|
|||||||
p = Path(filepath)
|
p = Path(filepath)
|
||||||
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
|
hexhash = hashlib.sha256(p.read_bytes()).hexdigest()
|
||||||
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
|
# 1. companion cachefile: same location, with '.<name>_<sha512hash>.pickle'
|
||||||
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(
|
companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle")
|
||||||
".pickle"
|
|
||||||
)
|
|
||||||
if os.access(str(companion_cachefile.parent), os.W_OK):
|
if os.access(str(companion_cachefile.parent), os.W_OK):
|
||||||
with companion_cachefile.open("wb") as f:
|
with companion_cachefile.open("wb") as f:
|
||||||
pickle.dump(self._data, f)
|
pickle.dump(self._data, f)
|
||||||
@@ -449,9 +421,7 @@ class MachineModel(object):
|
|||||||
operand_string += operand["prefix"]
|
operand_string += operand["prefix"]
|
||||||
operand_string += operand["shape"] if "shape" in operand else ""
|
operand_string += operand["shape"] if "shape" in operand else ""
|
||||||
elif "name" in operand:
|
elif "name" in operand:
|
||||||
operand_string += (
|
operand_string += "r" if operand["name"] == "gpr" else operand["name"][0]
|
||||||
"r" if operand["name"] == "gpr" else operand["name"][0]
|
|
||||||
)
|
|
||||||
elif opclass == "memory":
|
elif opclass == "memory":
|
||||||
# Memory
|
# Memory
|
||||||
operand_string += "m"
|
operand_string += "m"
|
||||||
@@ -614,9 +584,7 @@ class MachineModel(object):
|
|||||||
if "register" in operand:
|
if "register" in operand:
|
||||||
if i_operand["class"] != "register":
|
if i_operand["class"] != "register":
|
||||||
return False
|
return False
|
||||||
return self._is_x86_reg_type(
|
return self._is_x86_reg_type(i_operand, operand["register"], consider_masking=False)
|
||||||
i_operand, operand["register"], consider_masking=False
|
|
||||||
)
|
|
||||||
# memory
|
# memory
|
||||||
if "memory" in operand:
|
if "memory" in operand:
|
||||||
if i_operand["class"] != "memory":
|
if i_operand["class"] != "memory":
|
||||||
@@ -664,8 +632,7 @@ class MachineModel(object):
|
|||||||
return False
|
return False
|
||||||
if "shape" in reg:
|
if "shape" in reg:
|
||||||
if "shape" in i_reg and (
|
if "shape" in i_reg and (
|
||||||
reg["shape"] == i_reg["shape"]
|
reg["shape"] == i_reg["shape"] or self.WILDCARD in (reg["shape"] + i_reg["shape"])
|
||||||
or self.WILDCARD in (reg["shape"] + i_reg["shape"])
|
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@@ -695,8 +662,7 @@ class MachineModel(object):
|
|||||||
if (
|
if (
|
||||||
(
|
(
|
||||||
"mask" in reg
|
"mask" in reg
|
||||||
and reg["mask"].rstrip(string.digits).lower()
|
and reg["mask"].rstrip(string.digits).lower() == i_reg.get("mask")
|
||||||
== i_reg.get("mask")
|
|
||||||
)
|
)
|
||||||
or reg.get("mask") == self.WILDCARD
|
or reg.get("mask") == self.WILDCARD
|
||||||
or i_reg.get("mask") == self.WILDCARD
|
or i_reg.get("mask") == self.WILDCARD
|
||||||
|
|||||||
@@ -45,10 +45,7 @@ class ISASemantics(object):
|
|||||||
def assign_src_dst(self, instruction_form):
|
def assign_src_dst(self, instruction_form):
|
||||||
"""Update instruction form dictionary with source, destination and flag information."""
|
"""Update instruction form dictionary with source, destination and flag information."""
|
||||||
# if the instruction form doesn't have operands or is None, there's nothing to do
|
# if the instruction form doesn't have operands or is None, there's nothing to do
|
||||||
if (
|
if instruction_form["operands"] is None or instruction_form["instruction"] is None:
|
||||||
instruction_form["operands"] is None
|
|
||||||
or instruction_form["instruction"] is None
|
|
||||||
):
|
|
||||||
instruction_form["semantic_operands"] = AttrDict(
|
instruction_form["semantic_operands"] = AttrDict(
|
||||||
{"source": [], "destination": [], "src_dst": []}
|
{"source": [], "destination": [], "src_dst": []}
|
||||||
)
|
)
|
||||||
@@ -97,20 +94,16 @@ class ISASemantics(object):
|
|||||||
if assign_default:
|
if assign_default:
|
||||||
# no irregular operand structure, apply default
|
# no irregular operand structure, apply default
|
||||||
op_dict["source"] = self._get_regular_source_operands(instruction_form)
|
op_dict["source"] = self._get_regular_source_operands(instruction_form)
|
||||||
op_dict["destination"] = self._get_regular_destination_operands(
|
op_dict["destination"] = self._get_regular_destination_operands(instruction_form)
|
||||||
instruction_form
|
|
||||||
)
|
|
||||||
op_dict["src_dst"] = []
|
op_dict["src_dst"] = []
|
||||||
# post-process pre- and post-indexing for aarch64 memory operands
|
# post-process pre- and post-indexing for aarch64 memory operands
|
||||||
if self._isa == "aarch64":
|
if self._isa == "aarch64":
|
||||||
for operand in [op for op in op_dict["source"] if "memory" in op]:
|
for operand in [op for op in op_dict["source"] if "memory" in op]:
|
||||||
post_indexed = (
|
post_indexed = (
|
||||||
"post_indexed" in operand["memory"]
|
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||||
and operand["memory"]["post_indexed"]
|
|
||||||
)
|
)
|
||||||
pre_indexed = (
|
pre_indexed = (
|
||||||
"pre_indexed" in operand["memory"]
|
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||||
and operand["memory"]["pre_indexed"]
|
|
||||||
)
|
)
|
||||||
if post_indexed or pre_indexed:
|
if post_indexed or pre_indexed:
|
||||||
op_dict["src_dst"].append(
|
op_dict["src_dst"].append(
|
||||||
@@ -124,12 +117,10 @@ class ISASemantics(object):
|
|||||||
)
|
)
|
||||||
for operand in [op for op in op_dict["destination"] if "memory" in op]:
|
for operand in [op for op in op_dict["destination"] if "memory" in op]:
|
||||||
post_indexed = (
|
post_indexed = (
|
||||||
"post_indexed" in operand["memory"]
|
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||||
and operand["memory"]["post_indexed"]
|
|
||||||
)
|
)
|
||||||
pre_indexed = (
|
pre_indexed = (
|
||||||
"pre_indexed" in operand["memory"]
|
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||||
and operand["memory"]["pre_indexed"]
|
|
||||||
)
|
)
|
||||||
if post_indexed or pre_indexed:
|
if post_indexed or pre_indexed:
|
||||||
op_dict["src_dst"].append(
|
op_dict["src_dst"].append(
|
||||||
@@ -189,17 +180,14 @@ class ISASemantics(object):
|
|||||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||||
return {
|
return {
|
||||||
base_name: {
|
base_name: {
|
||||||
"name": o.memory.base.get("prefix", "")
|
"name": o.memory.base.get("prefix", "") + o.memory.base.name,
|
||||||
+ o.memory.base.name,
|
|
||||||
"value": o.memory.post_indexed.value,
|
"value": o.memory.post_indexed.value,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
||||||
operand_state = (
|
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
||||||
{}
|
|
||||||
) # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
|
||||||
|
|
||||||
for o in instruction_form.operands:
|
for o in instruction_form.operands:
|
||||||
if "pre_indexed" in o.get("memory", {}):
|
if "pre_indexed" in o.get("memory", {}):
|
||||||
@@ -211,9 +199,7 @@ class ISASemantics(object):
|
|||||||
)
|
)
|
||||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||||
reg_operand_names = {base_name: "op1"}
|
reg_operand_names = {base_name: "op1"}
|
||||||
operand_state = {
|
operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
|
||||||
"op1": {"name": base_name, "value": o.memory.offset.value}
|
|
||||||
}
|
|
||||||
|
|
||||||
if isa_data is not None and "operation" in isa_data:
|
if isa_data is not None and "operation" in isa_data:
|
||||||
for i, o in enumerate(instruction_form.operands):
|
for i, o in enumerate(instruction_form.operands):
|
||||||
@@ -254,18 +240,14 @@ class ISASemantics(object):
|
|||||||
op_dict["src_dst"] = []
|
op_dict["src_dst"] = []
|
||||||
|
|
||||||
# handle dependency breaking instructions
|
# handle dependency breaking instructions
|
||||||
if (
|
if "breaks_dependency_on_equal_operands" in isa_data and operands[1:] == operands[:-1]:
|
||||||
"breaks_dependency_on_equal_operands" in isa_data
|
|
||||||
and operands[1:] == operands[:-1]
|
|
||||||
):
|
|
||||||
op_dict["destination"] += operands
|
op_dict["destination"] += operands
|
||||||
if "hidden_operands" in isa_data:
|
if "hidden_operands" in isa_data:
|
||||||
op_dict["destination"] += [
|
op_dict["destination"] += [
|
||||||
AttrDict.convert_dict(
|
AttrDict.convert_dict(
|
||||||
{
|
{
|
||||||
hop["class"]: {
|
hop["class"]: {
|
||||||
k: hop[k]
|
k: hop[k] for k in ["name", "class", "source", "destination"]
|
||||||
for k in ["name", "class", "source", "destination"]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -351,9 +333,7 @@ class ISASemantics(object):
|
|||||||
|
|
||||||
def substitute_mem_address(self, operands):
|
def substitute_mem_address(self, operands):
|
||||||
"""Create memory wildcard for all memory operands"""
|
"""Create memory wildcard for all memory operands"""
|
||||||
return [
|
return [self._create_reg_wildcard() if "memory" in op else op for op in operands]
|
||||||
self._create_reg_wildcard() if "memory" in op else op for op in operands
|
|
||||||
]
|
|
||||||
|
|
||||||
def _create_reg_wildcard(self):
|
def _create_reg_wildcard(self):
|
||||||
"""Wildcard constructor"""
|
"""Wildcard constructor"""
|
||||||
|
|||||||
@@ -54,9 +54,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
dg = nx.DiGraph()
|
dg = nx.DiGraph()
|
||||||
for i, instruction_form in enumerate(kernel):
|
for i, instruction_form in enumerate(kernel):
|
||||||
dg.add_node(instruction_form["line_number"])
|
dg.add_node(instruction_form["line_number"])
|
||||||
dg.nodes[instruction_form["line_number"]][
|
dg.nodes[instruction_form["line_number"]]["instruction_form"] = instruction_form
|
||||||
"instruction_form"
|
|
||||||
] = instruction_form
|
|
||||||
# add load as separate node if existent
|
# add load as separate node if existent
|
||||||
if (
|
if (
|
||||||
INSTR_FLAGS.HAS_LD in instruction_form["flags"]
|
INSTR_FLAGS.HAS_LD in instruction_form["flags"]
|
||||||
@@ -71,16 +69,12 @@ class KernelDG(nx.DiGraph):
|
|||||||
dg.add_edge(
|
dg.add_edge(
|
||||||
instruction_form["line_number"] + 0.1,
|
instruction_form["line_number"] + 0.1,
|
||||||
instruction_form["line_number"],
|
instruction_form["line_number"],
|
||||||
latency=instruction_form["latency"]
|
latency=instruction_form["latency"] - instruction_form["latency_wo_load"],
|
||||||
- instruction_form["latency_wo_load"],
|
|
||||||
)
|
)
|
||||||
for dep, dep_flags in self.find_depending(
|
for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]):
|
||||||
instruction_form, kernel[i + 1 :]
|
|
||||||
):
|
|
||||||
edge_weight = (
|
edge_weight = (
|
||||||
instruction_form["latency"]
|
instruction_form["latency"]
|
||||||
if "mem_dep" in dep_flags
|
if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form
|
||||||
or "latency_wo_load" not in instruction_form
|
|
||||||
else instruction_form["latency_wo_load"]
|
else instruction_form["latency_wo_load"]
|
||||||
)
|
)
|
||||||
if "storeload_dep" in dep_flags:
|
if "storeload_dep" in dep_flags:
|
||||||
@@ -312,9 +306,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
# store to same location (presumed)
|
# store to same location (presumed)
|
||||||
if self.is_memstore(dst.memory, instr_form, register_changes):
|
if self.is_memstore(dst.memory, instr_form, register_changes):
|
||||||
break
|
break
|
||||||
self._update_reg_changes(
|
self._update_reg_changes(instr_form, register_changes, only_postindexed=True)
|
||||||
instr_form, register_changes, only_postindexed=True
|
|
||||||
)
|
|
||||||
|
|
||||||
def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False):
|
def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False):
|
||||||
if self.arch_sem is None:
|
if self.arch_sem is None:
|
||||||
@@ -322,9 +314,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
return {}
|
return {}
|
||||||
if reg_state is None:
|
if reg_state is None:
|
||||||
reg_state = {}
|
reg_state = {}
|
||||||
for reg, change in self.arch_sem.get_reg_changes(
|
for reg, change in self.arch_sem.get_reg_changes(iform, only_postindexed).items():
|
||||||
iform, only_postindexed
|
|
||||||
).items():
|
|
||||||
if change is None or reg_state.get(reg, {}) is None:
|
if change is None or reg_state.get(reg, {}) is None:
|
||||||
reg_state[reg] = None
|
reg_state[reg] = None
|
||||||
else:
|
else:
|
||||||
@@ -362,23 +352,15 @@ class KernelDG(nx.DiGraph):
|
|||||||
instruction_form.semantic_operands.src_dst,
|
instruction_form.semantic_operands.src_dst,
|
||||||
):
|
):
|
||||||
if "register" in src:
|
if "register" in src:
|
||||||
is_read = (
|
is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
|
||||||
self.parser.is_reg_dependend_of(register, src.register) or is_read
|
|
||||||
)
|
|
||||||
if "flag" in src:
|
if "flag" in src:
|
||||||
is_read = (
|
is_read = self.parser.is_flag_dependend_of(register, src.flag) or is_read
|
||||||
self.parser.is_flag_dependend_of(register, src.flag) or is_read
|
|
||||||
)
|
|
||||||
if "memory" in src:
|
if "memory" in src:
|
||||||
if src.memory.base is not None:
|
if src.memory.base is not None:
|
||||||
is_read = (
|
is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
|
||||||
self.parser.is_reg_dependend_of(register, src.memory.base)
|
|
||||||
or is_read
|
|
||||||
)
|
|
||||||
if src.memory.index is not None:
|
if src.memory.index is not None:
|
||||||
is_read = (
|
is_read = (
|
||||||
self.parser.is_reg_dependend_of(register, src.memory.index)
|
self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
|
||||||
or is_read
|
|
||||||
)
|
)
|
||||||
# Check also if read in destination memory address
|
# Check also if read in destination memory address
|
||||||
for dst in chain(
|
for dst in chain(
|
||||||
@@ -387,14 +369,10 @@ class KernelDG(nx.DiGraph):
|
|||||||
):
|
):
|
||||||
if "memory" in dst:
|
if "memory" in dst:
|
||||||
if dst.memory.base is not None:
|
if dst.memory.base is not None:
|
||||||
is_read = (
|
is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
|
||||||
self.parser.is_reg_dependend_of(register, dst.memory.base)
|
|
||||||
or is_read
|
|
||||||
)
|
|
||||||
if dst.memory.index is not None:
|
if dst.memory.index is not None:
|
||||||
is_read = (
|
is_read = (
|
||||||
self.parser.is_reg_dependend_of(register, dst.memory.index)
|
self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
|
||||||
or is_read
|
|
||||||
)
|
)
|
||||||
return is_read
|
return is_read
|
||||||
|
|
||||||
@@ -443,10 +421,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
if mem.scale != src.scale:
|
if mem.scale != src.scale:
|
||||||
# scale factors do not match
|
# scale factors do not match
|
||||||
continue
|
continue
|
||||||
if (
|
if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
|
||||||
mem.index.get("prefix", "") + mem.index["name"]
|
|
||||||
!= index_change["name"]
|
|
||||||
):
|
|
||||||
# index registers do not match
|
# index registers do not match
|
||||||
continue
|
continue
|
||||||
addr_change += index_change["value"] * src.scale
|
addr_change += index_change["value"] * src.scale
|
||||||
@@ -468,19 +443,13 @@ class KernelDG(nx.DiGraph):
|
|||||||
instruction_form.semantic_operands.src_dst,
|
instruction_form.semantic_operands.src_dst,
|
||||||
):
|
):
|
||||||
if "register" in dst:
|
if "register" in dst:
|
||||||
is_written = (
|
is_written = self.parser.is_reg_dependend_of(register, dst.register) or is_written
|
||||||
self.parser.is_reg_dependend_of(register, dst.register)
|
|
||||||
or is_written
|
|
||||||
)
|
|
||||||
if "flag" in dst:
|
if "flag" in dst:
|
||||||
is_written = (
|
is_written = self.parser.is_flag_dependend_of(register, dst.flag) or is_written
|
||||||
self.parser.is_flag_dependend_of(register, dst.flag) or is_written
|
|
||||||
)
|
|
||||||
if "memory" in dst:
|
if "memory" in dst:
|
||||||
if "pre_indexed" in dst.memory or "post_indexed" in dst.memory:
|
if "pre_indexed" in dst.memory or "post_indexed" in dst.memory:
|
||||||
is_written = (
|
is_written = (
|
||||||
self.parser.is_reg_dependend_of(register, dst.memory.base)
|
self.parser.is_reg_dependend_of(register, dst.memory.base) or is_written
|
||||||
or is_written
|
|
||||||
)
|
)
|
||||||
# Check also for possible pre- or post-indexing in memory addresses
|
# Check also for possible pre- or post-indexing in memory addresses
|
||||||
for src in chain(
|
for src in chain(
|
||||||
@@ -490,8 +459,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
if "memory" in src:
|
if "memory" in src:
|
||||||
if "pre_indexed" in src.memory or "post_indexed" in src.memory:
|
if "pre_indexed" in src.memory or "post_indexed" in src.memory:
|
||||||
is_written = (
|
is_written = (
|
||||||
self.parser.is_reg_dependend_of(register, src.memory.base)
|
self.parser.is_reg_dependend_of(register, src.memory.base) or is_written
|
||||||
or is_written
|
|
||||||
)
|
)
|
||||||
return is_written
|
return is_written
|
||||||
|
|
||||||
@@ -522,9 +490,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
lcd = self.get_loopcarried_dependencies()
|
lcd = self.get_loopcarried_dependencies()
|
||||||
lcd_line_numbers = {}
|
lcd_line_numbers = {}
|
||||||
for dep in lcd:
|
for dep in lcd:
|
||||||
lcd_line_numbers[dep] = [
|
lcd_line_numbers[dep] = [x["line_number"] for x, lat in lcd[dep]["dependencies"]]
|
||||||
x["line_number"] for x, lat in lcd[dep]["dependencies"]
|
|
||||||
]
|
|
||||||
# add color scheme
|
# add color scheme
|
||||||
graph.graph["node"] = {"colorscheme": "accent8"}
|
graph.graph["node"] = {"colorscheme": "accent8"}
|
||||||
graph.graph["edge"] = {"colorscheme": "accent8"}
|
graph.graph["edge"] = {"colorscheme": "accent8"}
|
||||||
@@ -535,9 +501,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
max_line_number = max(lcd_line_numbers[dep])
|
max_line_number = max(lcd_line_numbers[dep])
|
||||||
graph.add_edge(max_line_number, min_line_number)
|
graph.add_edge(max_line_number, min_line_number)
|
||||||
graph.edges[max_line_number, min_line_number]["latency"] = [
|
graph.edges[max_line_number, min_line_number]["latency"] = [
|
||||||
lat
|
lat for x, lat in lcd[dep]["dependencies"] if x["line_number"] == max_line_number
|
||||||
for x, lat in lcd[dep]["dependencies"]
|
|
||||||
if x["line_number"] == max_line_number
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# add label to edges
|
# add label to edges
|
||||||
@@ -546,9 +510,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
|
|
||||||
# add CP values to graph
|
# add CP values to graph
|
||||||
for n in cp:
|
for n in cp:
|
||||||
graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n[
|
graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n["latency_cp"]
|
||||||
"latency_cp"
|
|
||||||
]
|
|
||||||
|
|
||||||
# color CP and LCD
|
# color CP and LCD
|
||||||
for n in graph.nodes:
|
for n in graph.nodes:
|
||||||
@@ -568,8 +530,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
for e in graph.edges:
|
for e in graph.edges:
|
||||||
if (
|
if (
|
||||||
graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers
|
graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers
|
||||||
and graph.nodes[e[1]]["instruction_form"]["line_number"]
|
and graph.nodes[e[1]]["instruction_form"]["line_number"] in cp_line_numbers
|
||||||
in cp_line_numbers
|
|
||||||
and e[0] < e[1]
|
and e[0] < e[1]
|
||||||
):
|
):
|
||||||
bold_edge = True
|
bold_edge = True
|
||||||
@@ -581,8 +542,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
graph.edges[e]["penwidth"] = 3
|
graph.edges[e]["penwidth"] = 3
|
||||||
for dep in lcd_line_numbers:
|
for dep in lcd_line_numbers:
|
||||||
if (
|
if (
|
||||||
graph.nodes[e[0]]["instruction_form"]["line_number"]
|
graph.nodes[e[0]]["instruction_form"]["line_number"] in lcd_line_numbers[dep]
|
||||||
in lcd_line_numbers[dep]
|
|
||||||
and graph.nodes[e[1]]["instruction_form"]["line_number"]
|
and graph.nodes[e[1]]["instruction_form"]["line_number"]
|
||||||
in lcd_line_numbers[dep]
|
in lcd_line_numbers[dep]
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -133,11 +133,7 @@ def find_marked_section(
|
|||||||
index_end = -1
|
index_end = -1
|
||||||
for i, line in enumerate(lines):
|
for i, line in enumerate(lines):
|
||||||
try:
|
try:
|
||||||
if (
|
if line.instruction is None and comments is not None and line.comment is not None:
|
||||||
line.instruction is None
|
|
||||||
and comments is not None
|
|
||||||
and line.comment is not None
|
|
||||||
):
|
|
||||||
if comments["start"] == line.comment:
|
if comments["start"] == line.comment:
|
||||||
index_start = i + 1
|
index_start = i + 1
|
||||||
elif comments["end"] == line.comment:
|
elif comments["end"] == line.comment:
|
||||||
|
|||||||
Reference in New Issue
Block a user