From 314feb410426d6385fc85aeafe731d1aec1ce5d3 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Mon, 4 Oct 2021 14:33:28 +0200 Subject: [PATCH] black formatting --- osaca/semantics/arch_semantics.py | 86 +++++++++++++++++++++---------- osaca/semantics/hw_model.py | 64 +++++++++++++++++------ osaca/semantics/kernel_dg.py | 84 ++++++++++++++++++++++-------- osaca/semantics/marker_utils.py | 6 ++- 4 files changed, 176 insertions(+), 64 deletions(-) diff --git a/osaca/semantics/arch_semantics.py b/osaca/semantics/arch_semantics.py index 103c71f..5260ca9 100755 --- a/osaca/semantics/arch_semantics.py +++ b/osaca/semantics/arch_semantics.py @@ -46,7 +46,9 @@ class ArchSemantics(ISASemantics): ports = list(uop[1]) indices = [port_list.index(p) for p in ports] # check if port sum of used ports for uop are unbalanced - port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel))) + port_sums = self._to_list( + itemgetter(*indices)(self.get_throughput_sum(kernel)) + ) instr_ports = self._to_list( itemgetter(*indices)(instruction_form["port_pressure"]) ) @@ -65,7 +67,9 @@ class ArchSemantics(ISASemantics): differences[max_port_idx] -= INC differences[min_port_idx] += INC # instr_ports = [round(p, 2) for p in instr_ports] - self._itemsetter(*indices)(instruction_form["port_pressure"], *instr_ports) + self._itemsetter(*indices)( + instruction_form["port_pressure"], *instr_ports + ) # check if min port is zero if round(min(instr_ports), 2) <= 0: # if port_pressure is not exactly 0.00, add the residual to @@ -83,12 +87,15 @@ class ArchSemantics(ISASemantics): zero_index = [ p for p in indices - if round(instruction_form["port_pressure"][p], 2) == 0 + if round(instruction_form["port_pressure"][p], 2) + == 0 ][0] instruction_form["port_pressure"][zero_index] = 0.0 # Remove from further balancing indices = [ - p for p in indices if instruction_form["port_pressure"][p] > 0 + p + for p in indices + if instruction_form["port_pressure"][p] > 0 ] instr_ports = self._to_list( itemgetter(*indices)(instruction_form["port_pressure"]) @@ -141,9 +148,11 @@ class ArchSemantics(ISASemantics): if INSTR_FLAGS.HIDDEN_LD not in load_instr["flags"] ] ) - load = [instr for instr in kernel if instr["line_number"] == min_distance_load[1]][ - 0 - ] + load = [ + instr + for instr in kernel + if instr["line_number"] == min_distance_load[1] + ][0] # Hide load load["flags"] += [INSTR_FLAGS.HIDDEN_LD] load["port_pressure"] = self._nullify_data_ports(load["port_pressure"]) @@ -221,27 +230,39 @@ class ArchSemantics(ISASemantics): data_port_uops = self._machine_model.get_load_throughput( [ x["memory"] - for x in instruction_form["semantic_operands"]["source"] + for x in instruction_form["semantic_operands"][ + "source" + ] + instruction_form["semantic_operands"]["src_dst"] if "memory" in x ][0] ) - data_port_pressure = self._machine_model.average_port_pressure( - data_port_uops + data_port_pressure = ( + self._machine_model.average_port_pressure( + data_port_uops + ) ) if "load_throughput_multiplier" in self._machine_model: - multiplier = self._machine_model["load_throughput_multiplier"][ - reg_type + multiplier = self._machine_model[ + "load_throughput_multiplier" + ][reg_type] + data_port_pressure = [ + pp * multiplier for pp in data_port_pressure ] - data_port_pressure = [pp * multiplier for pp in data_port_pressure] if INSTR_FLAGS.HAS_ST in instruction_form["flags"]: # STORE performance data destinations = ( instruction_form["semantic_operands"]["destination"] + instruction_form["semantic_operands"]["src_dst"] ) - st_data_port_uops = self._machine_model.get_store_throughput( - [x["memory"] for x in destinations if "memory" in x][0] + st_data_port_uops = ( + self._machine_model.get_store_throughput( + [ + x["memory"] + for x in destinations + if "memory" in x + ][0] + ) ) # zero data port pressure and remove HAS_ST flag if # - no mem operand in dst && @@ -250,12 +271,16 @@ class ArchSemantics(ISASemantics): if ( self._isa == "aarch64" and "memory" - not in instruction_form["semantic_operands"]["destination"] + not in instruction_form["semantic_operands"][ + "destination" + ] and all( [ "post_indexed" in op["memory"] or "pre_indexed" in op["memory"] - for op in instruction_form["semantic_operands"]["src_dst"] + for op in instruction_form["semantic_operands"][ + "src_dst" + ] if "memory" in op ] ) @@ -264,18 +289,21 @@ class ArchSemantics(ISASemantics): instruction_form["flags"].remove(INSTR_FLAGS.HAS_ST) # sum up all data ports in case for LOAD and STORE - st_data_port_pressure = self._machine_model.average_port_pressure( - st_data_port_uops + st_data_port_pressure = ( + self._machine_model.average_port_pressure( + st_data_port_uops + ) ) if "store_throughput_multiplier" in self._machine_model: - multiplier = self._machine_model["store_throughput_multiplier"][ - reg_type - ] + multiplier = self._machine_model[ + "store_throughput_multiplier" + ][reg_type] st_data_port_pressure = [ pp * multiplier for pp in st_data_port_pressure ] data_port_pressure = [ - sum(x) for x in zip(data_port_pressure, st_data_port_pressure) + sum(x) + for x in zip(data_port_pressure, st_data_port_pressure) ] data_port_uops += st_data_port_uops throughput = max( @@ -327,7 +355,9 @@ class ArchSemantics(ISASemantics): throughput = 0.0 latency = 0.0 latency_wo_load = latency - instruction_form["port_pressure"] = [0.0 for i in range(port_number)] + instruction_form["port_pressure"] = [ + 0.0 for i in range(port_number) + ] instruction_form["port_uops"] = [] flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN] # flatten flag list @@ -343,7 +373,9 @@ class ArchSemantics(ISASemantics): instruction_form["latency_cp"] = 0 instruction_form["latency_lcd"] = 0 - def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags): + def _handle_instruction_found( + self, instruction_data, port_number, instruction_form, flags + ): """Apply performance data to instruction if it was found in the archDB""" throughput = instruction_data["throughput"] port_pressure = self._machine_model.average_port_pressure( @@ -425,7 +457,9 @@ class ArchSemantics(ISASemantics): """Get the overall throughput sum separated by port of all instructions of a kernel.""" # ignoring all lines with throughput == 0.0, because there won't be anything to sum up # typically comment, label and non-instruction lines - port_pressures = [instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0] + port_pressures = [ + instr["port_pressure"] for instr in kernel if instr["throughput"] != 0.0 + ] # Essentially summing up each columns of port_pressures, where each column is one port # and each row is one line of the kernel # round is necessary to ensure termination of ArchsSemantics.assign_optimal_throughput diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 948c2de..5ec21fe 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -18,7 +18,9 @@ from ruamel.yaml.compat import StringIO class MachineModel(object): WILDCARD = "*" - INTERNAL_VERSION = 1 # increase whenever self._data format changes to invalidate cache! + INTERNAL_VERSION = ( + 1 # increase whenever self._data format changes to invalidate cache! + ) _runtime_cache = {} def __init__(self, arch=None, path_to_yaml=None, isa=None, lazy=False): @@ -43,7 +45,9 @@ class MachineModel(object): "scale": s, "port_pressure": [], } - for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8]) + for b, i, o, s in product( + ["gpr"], ["gpr", None], ["imd", None], [1, 8] + ) ], "load_throughput_default": [], "store_throughput": [], @@ -85,7 +89,9 @@ class MachineModel(object): self._data["instruction_forms"] = [] # separate multi-alias instruction forms for entry in [ - x for x in self._data["instruction_forms"] if isinstance(x["name"], list) + x + for x in self._data["instruction_forms"] + if isinstance(x["name"], list) ]: for name in entry["name"]: new_entry = {"name": name} @@ -133,7 +139,9 @@ class MachineModel(object): instruction_form for instruction_form in name_matched_iforms if self._match_operands( - instruction_form["operands"] if "operands" in instruction_form else [], + instruction_form["operands"] + if "operands" in instruction_form + else [], operands, ) ) @@ -215,11 +223,19 @@ class MachineModel(object): def get_load_latency(self, reg_type): """Return load latency for given register type.""" - return self._data["load_latency"][reg_type] if self._data["load_latency"][reg_type] else 0 + return ( + self._data["load_latency"][reg_type] + if self._data["load_latency"][reg_type] + else 0 + ) def get_load_throughput(self, memory): """Return load thorughput for given register type.""" - ld_tp = [m for m in self._data["load_throughput"] if self._match_mem_entries(memory, m)] + ld_tp = [ + m + for m in self._data["load_throughput"] + if self._match_mem_entries(memory, m) + ] if len(ld_tp) > 0: return ld_tp[0]["port_pressure"].copy() return self._data["load_throughput_default"].copy() @@ -231,7 +247,11 @@ class MachineModel(object): def get_store_throughput(self, memory): """Return store throughput for given register type.""" - st_tp = [m for m in self._data["store_throughput"] if self._match_mem_entries(memory, m)] + st_tp = [ + m + for m in self._data["store_throughput"] + if self._match_mem_entries(memory, m) + ] if len(st_tp) > 0: return st_tp[0]["port_pressure"].copy() return self._data["store_throughput_default"].copy() @@ -299,7 +319,9 @@ class MachineModel(object): formatted_instruction_forms = deepcopy(self._data["instruction_forms"]) for instruction_form in formatted_instruction_forms: if instruction_form["port_pressure"] is not None: - cs = ruamel.yaml.comments.CommentedSeq(instruction_form["port_pressure"]) + cs = ruamel.yaml.comments.CommentedSeq( + instruction_form["port_pressure"] + ) cs.fa.set_flow_style() instruction_form["port_pressure"] = cs @@ -349,7 +371,9 @@ class MachineModel(object): hexhash = hashlib.sha256(p.read_bytes()).hexdigest() # 1. companion cachefile: same location, with '._.pickle' - companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle") + companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix( + ".pickle" + ) if companion_cachefile.exists(): # companion file (must be up-to-date, due to equal hash) with companion_cachefile.open("rb") as f: @@ -358,7 +382,9 @@ class MachineModel(object): return data # 2. home cachefile: ~/.osaca/cache/_.pickle - home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix(".pickle") + home_cachefile = (Path(utils.CACHE_DIR) / (p.stem + "_" + hexhash)).with_suffix( + ".pickle" + ) if home_cachefile.exists(): # home file (must be up-to-date, due to equal hash) with home_cachefile.open("rb") as f: @@ -377,7 +403,9 @@ class MachineModel(object): p = Path(filepath) hexhash = hashlib.sha256(p.read_bytes()).hexdigest() # 1. companion cachefile: same location, with '._.pickle' - companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix(".pickle") + companion_cachefile = p.with_name("." + p.stem + "_" + hexhash).with_suffix( + ".pickle" + ) if os.access(str(companion_cachefile.parent), os.W_OK): with companion_cachefile.open("wb") as f: pickle.dump(self._data, f) @@ -421,7 +449,9 @@ class MachineModel(object): operand_string += operand["prefix"] operand_string += operand["shape"] if "shape" in operand else "" elif "name" in operand: - operand_string += "r" if operand["name"] == "gpr" else operand["name"][0] + operand_string += ( + "r" if operand["name"] == "gpr" else operand["name"][0] + ) elif opclass == "memory": # Memory operand_string += "m" @@ -584,7 +614,9 @@ class MachineModel(object): if "register" in operand: if i_operand["class"] != "register": return False - return self._is_x86_reg_type(i_operand, operand["register"], consider_masking=False) + return self._is_x86_reg_type( + i_operand, operand["register"], consider_masking=False + ) # memory if "memory" in operand: if i_operand["class"] != "memory": @@ -632,7 +664,8 @@ class MachineModel(object): return False if "shape" in reg: if "shape" in i_reg and ( - reg["shape"] == i_reg["shape"] or self.WILDCARD in (reg["shape"] + i_reg["shape"]) + reg["shape"] == i_reg["shape"] + or self.WILDCARD in (reg["shape"] + i_reg["shape"]) ): return True return False @@ -662,7 +695,8 @@ class MachineModel(object): if ( ( "mask" in reg - and reg["mask"].rstrip(string.digits).lower() == i_reg.get("mask") + and reg["mask"].rstrip(string.digits).lower() + == i_reg.get("mask") ) or reg.get("mask") == self.WILDCARD or i_reg.get("mask") == self.WILDCARD diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index e95034e..0f90c02 100755 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -54,7 +54,9 @@ class KernelDG(nx.DiGraph): dg = nx.DiGraph() for i, instruction_form in enumerate(kernel): dg.add_node(instruction_form["line_number"]) - dg.nodes[instruction_form["line_number"]]["instruction_form"] = instruction_form + dg.nodes[instruction_form["line_number"]][ + "instruction_form" + ] = instruction_form # add load as separate node if existent if ( INSTR_FLAGS.HAS_LD in instruction_form["flags"] @@ -69,12 +71,16 @@ class KernelDG(nx.DiGraph): dg.add_edge( instruction_form["line_number"] + 0.1, instruction_form["line_number"], - latency=instruction_form["latency"] - instruction_form["latency_wo_load"], + latency=instruction_form["latency"] + - instruction_form["latency_wo_load"], ) - for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]): + for dep, dep_flags in self.find_depending( + instruction_form, kernel[i + 1 :] + ): edge_weight = ( instruction_form["latency"] - if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form + if "mem_dep" in dep_flags + or "latency_wo_load" not in instruction_form else instruction_form["latency_wo_load"] ) if "storeload_dep" in dep_flags: @@ -306,7 +312,9 @@ class KernelDG(nx.DiGraph): # store to same location (presumed) if self.is_memstore(dst.memory, instr_form, register_changes): break - self._update_reg_changes(instr_form, register_changes, only_postindexed=True) + self._update_reg_changes( + instr_form, register_changes, only_postindexed=True + ) def _update_reg_changes(self, iform, reg_state=None, only_postindexed=False): if self.arch_sem is None: @@ -314,7 +322,9 @@ class KernelDG(nx.DiGraph): return {} if reg_state is None: reg_state = {} - for reg, change in self.arch_sem.get_reg_changes(iform, only_postindexed).items(): + for reg, change in self.arch_sem.get_reg_changes( + iform, only_postindexed + ).items(): if change is None or reg_state.get(reg, {}) is None: reg_state[reg] = None else: @@ -352,15 +362,23 @@ class KernelDG(nx.DiGraph): instruction_form.semantic_operands.src_dst, ): if "register" in src: - is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read + is_read = ( + self.parser.is_reg_dependend_of(register, src.register) or is_read + ) if "flag" in src: - is_read = self.parser.is_flag_dependend_of(register, src.flag) or is_read + is_read = ( + self.parser.is_flag_dependend_of(register, src.flag) or is_read + ) if "memory" in src: if src.memory.base is not None: - is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read + is_read = ( + self.parser.is_reg_dependend_of(register, src.memory.base) + or is_read + ) if src.memory.index is not None: is_read = ( - self.parser.is_reg_dependend_of(register, src.memory.index) or is_read + self.parser.is_reg_dependend_of(register, src.memory.index) + or is_read ) # Check also if read in destination memory address for dst in chain( @@ -369,10 +387,14 @@ class KernelDG(nx.DiGraph): ): if "memory" in dst: if dst.memory.base is not None: - is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read + is_read = ( + self.parser.is_reg_dependend_of(register, dst.memory.base) + or is_read + ) if dst.memory.index is not None: is_read = ( - self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read + self.parser.is_reg_dependend_of(register, dst.memory.index) + or is_read ) return is_read @@ -421,7 +443,10 @@ class KernelDG(nx.DiGraph): if mem.scale != src.scale: # scale factors do not match continue - if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]: + if ( + mem.index.get("prefix", "") + mem.index["name"] + != index_change["name"] + ): # index registers do not match continue addr_change += index_change["value"] * src.scale @@ -443,13 +468,19 @@ class KernelDG(nx.DiGraph): instruction_form.semantic_operands.src_dst, ): if "register" in dst: - is_written = self.parser.is_reg_dependend_of(register, dst.register) or is_written + is_written = ( + self.parser.is_reg_dependend_of(register, dst.register) + or is_written + ) if "flag" in dst: - is_written = self.parser.is_flag_dependend_of(register, dst.flag) or is_written + is_written = ( + self.parser.is_flag_dependend_of(register, dst.flag) or is_written + ) if "memory" in dst: if "pre_indexed" in dst.memory or "post_indexed" in dst.memory: is_written = ( - self.parser.is_reg_dependend_of(register, dst.memory.base) or is_written + self.parser.is_reg_dependend_of(register, dst.memory.base) + or is_written ) # Check also for possible pre- or post-indexing in memory addresses for src in chain( @@ -459,7 +490,8 @@ class KernelDG(nx.DiGraph): if "memory" in src: if "pre_indexed" in src.memory or "post_indexed" in src.memory: is_written = ( - self.parser.is_reg_dependend_of(register, src.memory.base) or is_written + self.parser.is_reg_dependend_of(register, src.memory.base) + or is_written ) return is_written @@ -490,7 +522,9 @@ class KernelDG(nx.DiGraph): lcd = self.get_loopcarried_dependencies() lcd_line_numbers = {} for dep in lcd: - lcd_line_numbers[dep] = [x["line_number"] for x, lat in lcd[dep]["dependencies"]] + lcd_line_numbers[dep] = [ + x["line_number"] for x, lat in lcd[dep]["dependencies"] + ] # add color scheme graph.graph["node"] = {"colorscheme": "accent8"} graph.graph["edge"] = {"colorscheme": "accent8"} @@ -501,7 +535,9 @@ class KernelDG(nx.DiGraph): max_line_number = max(lcd_line_numbers[dep]) graph.add_edge(max_line_number, min_line_number) graph.edges[max_line_number, min_line_number]["latency"] = [ - lat for x, lat in lcd[dep]["dependencies"] if x["line_number"] == max_line_number + lat + for x, lat in lcd[dep]["dependencies"] + if x["line_number"] == max_line_number ] # add label to edges @@ -510,7 +546,9 @@ class KernelDG(nx.DiGraph): # add CP values to graph for n in cp: - graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n["latency_cp"] + graph.nodes[n["line_number"]]["instruction_form"]["latency_cp"] = n[ + "latency_cp" + ] # color CP and LCD for n in graph.nodes: @@ -530,7 +568,8 @@ class KernelDG(nx.DiGraph): for e in graph.edges: if ( graph.nodes[e[0]]["instruction_form"]["line_number"] in cp_line_numbers - and graph.nodes[e[1]]["instruction_form"]["line_number"] in cp_line_numbers + and graph.nodes[e[1]]["instruction_form"]["line_number"] + in cp_line_numbers and e[0] < e[1] ): bold_edge = True @@ -542,7 +581,8 @@ class KernelDG(nx.DiGraph): graph.edges[e]["penwidth"] = 3 for dep in lcd_line_numbers: if ( - graph.nodes[e[0]]["instruction_form"]["line_number"] in lcd_line_numbers[dep] + graph.nodes[e[0]]["instruction_form"]["line_number"] + in lcd_line_numbers[dep] and graph.nodes[e[1]]["instruction_form"]["line_number"] in lcd_line_numbers[dep] ): diff --git a/osaca/semantics/marker_utils.py b/osaca/semantics/marker_utils.py index 708f9f1..af2cf18 100755 --- a/osaca/semantics/marker_utils.py +++ b/osaca/semantics/marker_utils.py @@ -133,7 +133,11 @@ def find_marked_section( index_end = -1 for i, line in enumerate(lines): try: - if line.instruction is None and comments is not None and line.comment is not None: + if ( + line.instruction is None + and comments is not None + and line.comment is not None + ): if comments["start"] == line.comment: index_start = i + 1 elif comments["end"] == line.comment: