Added 2 operand types and made changes for attribute usage

This commit is contained in:
stefandesouza
2023-08-20 21:01:44 +02:00
parent eb09cbde42
commit 0a32c77751
16 changed files with 326 additions and 153 deletions

134
osaca/semantics/arch_semantics.py Executable file → Normal file
View File

@@ -46,10 +46,10 @@ class ArchSemantics(ISASemantics):
for idx, instruction_form in enumerate(kernel[start:], start):
multiple_assignments = False
# if iform has multiple possible port assignments, check all in a DFS manner and take the best
if isinstance(instruction_form["port_uops"], dict):
if isinstance(instruction_form.port_uops, dict):
best_kernel = None
best_kernel_tp = sys.maxsize
for port_util_alt in list(instruction_form["port_uops"].values())[1:]:
for port_util_alt in list(instruction_form.port_uops.values())[1:]:
k_tmp = deepcopy(kernel)
k_tmp[idx]["port_uops"] = deepcopy(port_util_alt)
k_tmp[idx]["port_pressure"] = self._machine_model.average_port_pressure(
@@ -62,15 +62,15 @@ class ArchSemantics(ISASemantics):
best_kernel_tp = max(self.get_throughput_sum(best_kernel))
# check the first option in the main branch and compare against the best option later
multiple_assignments = True
kernel[idx]["port_uops"] = list(instruction_form["port_uops"].values())[0]
for uop in instruction_form["port_uops"]:
kernel[idx]["port_uops"] = list(instruction_form.port_uops.values())[0]
for uop in instruction_form.port_uops:
cycles = uop[0]
ports = list(uop[1])
indices = [port_list.index(p) for p in ports]
# check if port sum of used ports for uop are unbalanced
port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
instr_ports = self._to_list(
itemgetter(*indices)(instruction_form["port_pressure"])
itemgetter(*indices)(instruction_form.port_pressure)
)
if len(set(port_sums)) > 1:
# balance ports
@@ -87,7 +87,7 @@ class ArchSemantics(ISASemantics):
differences[max_port_idx] -= INC
differences[min_port_idx] += INC
# instr_ports = [round(p, 2) for p in instr_ports]
self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports)
self._itemsetter(*indices)(instruction_form.port_pressure, *instr_ports)
# check if min port is zero
if round(min(instr_ports), 2) <= 0:
# if port_pressure is not exactly 0.00, add the residual to
@@ -100,21 +100,21 @@ class ArchSemantics(ISASemantics):
# delete it
del differences[instr_ports.index(min(instr_ports))]
self._itemsetter(*indices)(
instruction_form["port_pressure"], *instr_ports
instruction_form.port_pressure, *instr_ports
)
zero_index = [
p
for p in indices
if round(instruction_form["port_pressure"][p], 2) == 0
or instruction_form["port_pressure"][p] < 0.00
if round(instruction_form.port_pressure[p], 2) == 0
or instruction_form.port_pressure[p] < 0.00
][0]
instruction_form["port_pressure"][zero_index] = 0.0
instruction_form.port_pressure[zero_index] = 0.0
# Remove from further balancing
indices = [
p for p in indices if instruction_form["port_pressure"][p] > 0
p for p in indices if instruction_form.port_pressure[p] > 0
]
instr_ports = self._to_list(
itemgetter(*indices)(instruction_form["port_pressure"])
itemgetter(*indices)(instruction_form.port_pressure)
)
# never remove more than the fixed utilization per uop and port, i.e.,
# cycles/len(ports)
@@ -124,7 +124,7 @@ class ArchSemantics(ISASemantics):
# pressure is not 0
del indices[differences.index(min(differences))]
instr_ports = self._to_list(
itemgetter(*indices)(instruction_form["port_pressure"])
itemgetter(*indices)(instruction_form.port_pressure)
)
del differences[differences.index(min(differences))]
port_sums = self._to_list(
@@ -139,14 +139,14 @@ class ArchSemantics(ISASemantics):
def set_hidden_loads(self, kernel):
"""Hide loads behind stores if architecture supports hidden loads (depricated)"""
loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr["flags"]]
stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr["flags"]]
loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr.flags]
stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr.flags]
# Filter instructions including load and store
load_ids = [instr["line_number"] for instr in loads]
store_ids = [instr["line_number"] for instr in stores]
load_ids = [instr.line_number for instr in loads]
store_ids = [instr.line_number for instr in stores]
shared_ldst = list(set(load_ids).intersection(set(store_ids)))
loads = [instr for instr in loads if instr["line_number"] not in shared_ldst]
stores = [instr for instr in stores if instr["line_number"] not in shared_ldst]
loads = [instr for instr in loads if instr.line_number not in shared_ldst]
stores = [instr for instr in stores if instr.line_number not in shared_ldst]
if len(stores) == 0 or len(loads) == 0:
# nothing to do
@@ -182,35 +182,35 @@ class ArchSemantics(ISASemantics):
"""Assign throughput and latency to an instruction form."""
flags = []
port_number = len(self._machine_model["ports"])
if instruction_form["instruction"] is None:
if instruction_form.instruction is None:
# No instruction (label, comment, ...) --> ignore
throughput = 0.0
latency = 0.0
latency_wo_load = latency
instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
instruction_form["port_uops"] = []
instruction_form.port_pressure = [0.0 for i in range(port_number)]
instruction_form.port_uops = []
else:
instruction_data = self._machine_model.get_instruction(
instruction_form["instruction"], instruction_form["operands"]
instruction_form.instruction, instruction_form.operands
)
if (
not instruction_data
and self._isa == "x86"
and instruction_form["instruction"][-1] in self.GAS_SUFFIXES
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# check for instruction without GAS suffix
instruction_data = self._machine_model.get_instruction(
instruction_form["instruction"][:-1], instruction_form["operands"]
instruction_form.instruction[:-1], instruction_form.operands
)
if (
instruction_data is None
and self._isa == "aarch64"
and "." in instruction_form["instruction"]
and "." in instruction_form.instruction
):
# Check for instruction without shape/cc suffix
suffix_start = instruction_form["instruction"].index(".")
suffix_start = instruction_form.instruction.index(".")
instruction_data = self._machine_model.get_instruction(
instruction_form["instruction"][:suffix_start], instruction_form["operands"]
instruction_form.instruction[:suffix_start], instruction_form.operands
)
if instruction_data:
# instruction form in DB
@@ -227,33 +227,33 @@ class ArchSemantics(ISASemantics):
assign_unknown = True
# check for equivalent register-operands DB entry if LD
if (
INSTR_FLAGS.HAS_LD in instruction_form["flags"]
or INSTR_FLAGS.HAS_ST in instruction_form["flags"]
INSTR_FLAGS.HAS_LD in instruction_form.flags
or INSTR_FLAGS.HAS_ST in instruction_form.flags
):
# dynamically combine LD/ST and reg form of instruction form
# substitute mem and look for reg-only variant
operands = self.substitute_mem_address(instruction_form["operands"])
operands = self.substitute_mem_address(instruction_form.operands)
instruction_data_reg = self._machine_model.get_instruction(
instruction_form["instruction"], operands
instruction_form.instruction, operands
)
if (
not instruction_data_reg
and self._isa == "x86"
and instruction_form["instruction"][-1] in self.GAS_SUFFIXES
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# check for instruction without GAS suffix
instruction_data_reg = self._machine_model.get_instruction(
instruction_form["instruction"][:-1], operands
instruction_form.instruction[:-1], operands
)
if (
instruction_data_reg is None
and self._isa == "aarch64"
and "." in instruction_form["instruction"]
and "." in instruction_form.instruction
):
# Check for instruction without shape/cc suffix
suffix_start = instruction_form["instruction"].index(".")
suffix_start = instruction_form.instruction.index(".")
instruction_data_reg = self._machine_model.get_instruction(
instruction_form["instruction"][:suffix_start], operands
instruction_form.instruction[:suffix_start], operands
)
if instruction_data_reg:
assign_unknown = False
@@ -265,13 +265,13 @@ class ArchSemantics(ISASemantics):
dummy_reg = {"class": "register", "name": reg_type}
data_port_pressure = [0.0 for _ in range(port_number)]
data_port_uops = []
if INSTR_FLAGS.HAS_LD in instruction_form["flags"]:
if INSTR_FLAGS.HAS_LD in instruction_form.flags:
# LOAD performance data
load_perf_data = self._machine_model.get_load_throughput(
[
x["memory"]
for x in instruction_form["semantic_operands"]["source"]
+ instruction_form["semantic_operands"]["src_dst"]
for x in instruction_form.semantic_operands["source"]
+ instruction_form.semantic_operands["src_dst"]
if "memory" in x
][0]
)
@@ -296,11 +296,11 @@ class ArchSemantics(ISASemantics):
reg_type
]
data_port_pressure = [pp * multiplier for pp in data_port_pressure]
if INSTR_FLAGS.HAS_ST in instruction_form["flags"]:
if INSTR_FLAGS.HAS_ST in instruction_form.flags:
# STORE performance data
destinations = (
instruction_form["semantic_operands"]["destination"]
+ instruction_form["semantic_operands"]["src_dst"]
instruction_form.semantic_operands["destination"]
+ instruction_form.semantic_operands["src_dst"]
)
store_perf_data = self._machine_model.get_store_throughput(
[x["memory"] for x in destinations if "memory" in x][0], dummy_reg
@@ -314,18 +314,18 @@ class ArchSemantics(ISASemantics):
if (
self._isa == "aarch64"
and "memory"
not in instruction_form["semantic_operands"]["destination"]
not in instruction_form.semantic_operands["destination"]
and all(
[
"post_indexed" in op["memory"]
or "pre_indexed" in op["memory"]
for op in instruction_form["semantic_operands"]["src_dst"]
for op in instruction_form.semantic_operands["src_dst"]
if "memory" in op
]
)
):
st_data_port_uops = []
instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST)
instruction_form.flags.remove(INSTR_FLAGS.HAS_ST)
# sum up all data ports in case for LOAD and STORE
st_data_port_pressure = self._machine_model.average_port_pressure(
@@ -349,12 +349,12 @@ class ArchSemantics(ISASemantics):
# Add LD and ST latency
latency += (
self._machine_model.get_load_latency(reg_type)
if INSTR_FLAGS.HAS_LD in instruction_form["flags"]
if INSTR_FLAGS.HAS_LD in instruction_form.flags
else 0
)
latency += (
self._machine_model.get_store_latency(reg_type)
if INSTR_FLAGS.HAS_ST in instruction_form["flags"]
if INSTR_FLAGS.HAS_ST in instruction_form.flags
else 0
)
latency_wo_load = instruction_data_reg["latency"]
@@ -367,13 +367,13 @@ class ArchSemantics(ISASemantics):
# [
# 'post_indexed' in op['memory'] or
# 'pre_indexed' in op['memory']
# for op in instruction_form['operands']
# for op in instruction_form.operands
# if 'memory' in op
# ]
# )
# ):
# latency_wo_load = 1.0
instruction_form["port_pressure"] = [
instruction_form.port_pressure = [
sum(x)
for x in zip(
data_port_pressure,
@@ -382,7 +382,7 @@ class ArchSemantics(ISASemantics):
),
)
]
instruction_form["port_uops"] = list(
instruction_form.port_uops = list(
chain(instruction_data_reg["port_pressure"], data_port_uops)
)
@@ -391,21 +391,21 @@ class ArchSemantics(ISASemantics):
throughput = 0.0
latency = 0.0
latency_wo_load = latency
instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
instruction_form["port_uops"] = []
instruction_form.port_pressure = [0.0 for i in range(port_number)]
instruction_formport_uops = []
flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN]
# flatten flag list
flags = list(set(flags))
if "flags" not in instruction_form:
instruction_form["flags"] = flags
if instruction_form.flags == []:
instruction_form.flags = flags
else:
instruction_form["flags"] += flags
instruction_form["throughput"] = throughput
instruction_form["latency"] = latency
instruction_form["latency_wo_load"] = latency_wo_load
instruction_form.flags += flags
instruction_form.throughput = throughput
instruction_form.latency = latency
instruction_form.latency_wo_load = latency_wo_load
# for later CP and loop-carried dependency analysis
instruction_form["latency_cp"] = 0
instruction_form["latency_lcd"] = 0
instruction_form.latency_cp = 0
instruction_form.latency_lcd = 0
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
"""Apply performance data to instruction if it was found in the archDB"""
@@ -413,11 +413,11 @@ class ArchSemantics(ISASemantics):
port_pressure = self._machine_model.average_port_pressure(
instruction_data["port_pressure"]
)
instruction_form["port_uops"] = instruction_data["port_pressure"]
instruction_form.port_uops = instruction_data["port_pressure"]
try:
assert isinstance(port_pressure, list)
assert len(port_pressure) == port_number
instruction_form["port_pressure"] = port_pressure
instruction_form.port_pressure = port_pressure
if sum(port_pressure) == 0 and throughput is not None:
# port pressure on all ports 0 --> not bound to a port
flags.append(INSTR_FLAGS.NOT_BOUND)
@@ -426,8 +426,8 @@ class ArchSemantics(ISASemantics):
"Port pressure could not be imported correctly from database. "
+ "Please check entry for:\n {}".format(instruction_form)
)
instruction_form["port_pressure"] = [0.0 for i in range(port_number)]
instruction_form["port_uops"] = []
instruction_form.port_pressure = [0.0 for i in range(port_number)]
instruction_form.port_uops = []
flags.append(INSTR_FLAGS.TP_UNKWN)
if throughput is None:
# assume 0 cy and mark as unknown
@@ -440,7 +440,7 @@ class ArchSemantics(ISASemantics):
latency = 0.0
latency_wo_load = latency
flags.append(INSTR_FLAGS.LT_UNKWN)
if INSTR_FLAGS.HAS_LD in instruction_form["flags"]:
if INSTR_FLAGS.HAS_LD in instruction_form.flags:
flags.append(INSTR_FLAGS.LD)
return throughput, port_pressure, latency, latency_wo_load
@@ -489,7 +489,7 @@ class ArchSemantics(ISASemantics):
"""Get the overall throughput sum separated by port of all instructions of a kernel."""
# ignoring all lines with throughput == 0.0, because there won't be anything to sum up
# typically comment, label and non-instruction lines
port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0]
port_pressures = [instr.port_pressure for instr in kernel if instr.throughput != 0.0]
# Essentially summing up each columns of port_pressures, where each column is one port
# and each row is one line of the kernel
# round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput