diff --git a/osaca/frontend.py b/osaca/frontend.py index 9a1fa2f..19814c3 100755 --- a/osaca/frontend.py +++ b/osaca/frontend.py @@ -144,9 +144,9 @@ class Frontend(object): + "-----------------------------------------\n" ) # TODO find a way to overcome padding for different tab-lengths - for dep in dep_dict: + for dep in sorted(dep_dict.keys()): s += "{:4d} {} {:4.1f} {} {:36}{} {}\n".format( - dep, + int(dep.split("-")[0]), separator, dep_dict[dep]["latency"], separator, diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index 337f7d1..fec3261 100755 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -196,27 +196,27 @@ class ParserAArch64(BaseParser): "policy" ) ).setResultsName("prfop") - # Condition codes - condition = pp.Group( - ( - pp.CaselessLiteral("EQ") - ^ pp.CaselessLiteral("NE") - ^ pp.CaselessLiteral("CS") - ^ pp.CaselessLiteral("HS") - ^ pp.CaselessLiteral("CC") - ^ pp.CaselessLiteral("LO") - ^ pp.CaselessLiteral("HI") - ^ pp.CaselessLiteral("LS") - ^ pp.CaselessLiteral("GE") - ^ pp.CaselessLiteral("LT") - ^ pp.CaselessLiteral("GT") - ^ pp.CaselessLiteral("LE") - ^ pp.CaselessLiteral("MI") - ^ pp.CaselessLiteral("PL") - ^ pp.CaselessLiteral("VS") - ^ pp.CaselessLiteral("VC") - ).setResultsName("code") + # Condition codes, based on http://tiny.cc/armcc + condition = ( + pp.CaselessLiteral("EQ") # z set + ^ pp.CaselessLiteral("NE") # z clear + ^ pp.CaselessLiteral("CS") # c set + ^ pp.CaselessLiteral("HS") # c set + ^ pp.CaselessLiteral("CC") # c clear + ^ pp.CaselessLiteral("LO") # c clear + ^ pp.CaselessLiteral("MI") # n set + ^ pp.CaselessLiteral("PL") # n clear + ^ pp.CaselessLiteral("VS") # v set + ^ pp.CaselessLiteral("VC") # v clear + ^ pp.CaselessLiteral("HI") # c set and z clear + ^ pp.CaselessLiteral("LS") # c clear or z set + ^ pp.CaselessLiteral("GE") # n and v the same + ^ pp.CaselessLiteral("LT") # n and v different + ^ pp.CaselessLiteral("GT") # z clear, and n and v the same + ^ pp.CaselessLiteral("LE") # z set, or n and v different + ^ pp.CaselessLiteral("AL") # any ).setResultsName("condition") + self.condition = condition # Combine to instruction form operand_first = pp.Group( register ^ (prefetch_op | immediate) ^ memory ^ arith_immediate ^ identifier diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 88468ff..dbbfdf3 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -581,8 +581,15 @@ class MachineModel(object): # prefetch option if "prfop" in operand: return i_operand["class"] == "prfop" + # condition if "condition" in operand: - return i_operand["class"] == "condition" + if i_operand["ccode"] == self.WILDCARD: + return True + return i_operand["class"] == "condition" and ( + operand.get("condition", None) == i_operand.get("ccode", None).upper() + if isinstance(i_operand.get("ccode", None), str) + else i_operand.get("ccode", None) + ) # no match return False diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index fca5955..6d523a7 100755 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -127,7 +127,6 @@ class ISASemantics(object): } ) ) - # store operand list in dict and reassign operand key/value pair instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict) # assign LD/ST flags diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index 8424177..a21beac 100755 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -22,14 +22,17 @@ class KernelDG(nx.DiGraph): hw_model: MachineModel, semantics: ArchSemantics, timeout=10, + flag_dependencies=False, ): self.timed_out = False self.kernel = parsed_kernel self.parser = parser self.model = hw_model self.arch_sem = semantics - self.dg = self.create_DG(self.kernel) - self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel, timeout) + self.dg = self.create_DG(self.kernel, flag_dependencies) + self.loopcarried_deps = self.check_for_loopcarried_dep( + self.kernel, timeout, flag_dependencies + ) def _extend_path(self, dst_list, kernel, dg, offset): for instr in kernel: @@ -40,12 +43,15 @@ class KernelDG(nx.DiGraph): dst_list.extend(tmp_list) # print('Thread [{}-{}] done'.format(kernel[0]['line_number'], kernel[-1]['line_number'])) - def create_DG(self, kernel): + def create_DG(self, kernel, flag_dependencies=False): """ Create directed graph from given kernel :param kernel: Parsed asm kernel with assigned semantic information :type kerne: list + :param flag_dependencies: indicating if dependencies of flags should be considered, + defaults to `False` + :type flag_dependencies: boolean, optional :returns: :class:`~nx.DiGraph` -- directed graph object """ # 1. go through kernel instruction forms and add them as node attribute @@ -71,23 +77,28 @@ class KernelDG(nx.DiGraph): instruction_form["line_number"], latency=instruction_form["latency"] - instruction_form["latency_wo_load"], ) - for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]): + for dep, dep_flags in self.find_depending( + instruction_form, kernel[i + 1 :], flag_dependencies + ): edge_weight = ( instruction_form["latency"] if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form else instruction_form["latency_wo_load"] ) - if "storeload_dep" in dep_flags: + if "storeload_dep" in dep_flags and self.model is not None: edge_weight += self.model.get("store_to_load_forward_latency", 0) + if "p_indexed" in dep_flags and self.model is not None: + edge_weight = self.model.get("p_index_latency", 1) dg.add_edge( instruction_form["line_number"], dep["line_number"], latency=edge_weight, ) + dg.nodes[dep["line_number"]]["instruction_form"] = dep return dg - def check_for_loopcarried_dep(self, kernel, timeout=10): + def check_for_loopcarried_dep(self, kernel, timeout=10, flag_dependencies=False): """ Try to find loop-carried dependencies in given kernel. @@ -106,7 +117,7 @@ class KernelDG(nx.DiGraph): temp_iform["line_number"] += offset tmp_kernel.append(temp_iform) # get dependency graph - dg = self.create_DG(tmp_kernel) + dg = self.create_DG(tmp_kernel, flag_dependencies) # build cyclic loop-carried dependencies loopcarried_deps = [] @@ -191,7 +202,8 @@ class KernelDG(nx.DiGraph): # map lcd back to nodes loopcarried_deps_dict = {} for lat_sum, involved_lines in loopcarried_deps: - loopcarried_deps_dict[involved_lines[0][0]] = { + dict_key = "-".join([str(il[0]) for il in involved_lines]) + loopcarried_deps_dict[dict_key] = { "root": self._get_node_by_lineno(involved_lines[0][0]), "dependencies": [ (self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines @@ -272,10 +284,11 @@ class KernelDG(nx.DiGraph): # print(" TO", instr_form.line, register_changes) if "register" in dst: # read of register - if self.is_read(dst.register, instr_form) and not ( - dst.get("pre_indexed", False) or dst.get("post_indexed", False) - ): - yield instr_form, [] + if self.is_read(dst.register, instr_form): + if dst.get("pre_indexed", False) or dst.get("post_indexed", False): + yield instr_form, ["p_indexed"] + else: + yield instr_form, [] # write to register -> abort if self.is_written(dst.register, instr_form): break diff --git a/tests/test_files/kernel_aarch64_deps.s b/tests/test_files/kernel_aarch64_deps.s new file mode 100644 index 0000000..ee4848f --- /dev/null +++ b/tests/test_files/kernel_aarch64_deps.s @@ -0,0 +1,14 @@ +// OSACA-BEGIN +.LBB0_32: + ldp q4, q5, [x9, #-32] + ldp q6, q7, [x9], #64 + add x9, x9, x9 + add x10, x9, #64 // =64 + fmul v4.2d, v4.2d, v6.2d + fmul v5.2d, v4.2d, v7.2d + adds x10, x10, x10 + csel, x9, x1, x9, eq + stp q14, q15, [x9, #-32]! + stp q14, q15, [x9], #64 + b.ne .LBB0_32 +// OSACA-END diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py index fdcf7f1..ba11504 100755 --- a/tests/test_parser_AArch64.py +++ b/tests/test_parser_AArch64.py @@ -73,6 +73,14 @@ class TestParserAArch64(unittest.TestCase): "IACA START", ) + def test_condition_parser(self): + self.assertEqual(self._get_condition(self.parser, "EQ"), "EQ") + self.assertEqual(self._get_condition(self.parser, "ne"), "NE") + self.assertEqual(self._get_condition(self.parser, "Lt"), "LT") + self.assertEqual(self._get_condition(self.parser, "Gt"), "GT") + with self.assertRaises(ParseException): + self._get_condition(self.parser, "LOcondition") + def test_parse_instruction(self): instr1 = "\t\tvcvt.F32.S32 w1, w2\t\t\t//12.27" instr2 = "b.lo ..B1.4 \t" @@ -81,6 +89,7 @@ class TestParserAArch64(unittest.TestCase): instr5 = "ldr x0, [x0, #:got_lo12:q2c]" instr6 = "adrp x0, :got:visited" instr7 = "fadd v17.2d, v16.2d, v1.2d" + instr8 = "ccmp x0, x1, #4, cc" parsed_1 = self.parser.parse_instruction(instr1) parsed_2 = self.parser.parse_instruction(instr2) @@ -89,6 +98,7 @@ class TestParserAArch64(unittest.TestCase): parsed_5 = self.parser.parse_instruction(instr5) parsed_6 = self.parser.parse_instruction(instr6) parsed_7 = self.parser.parse_instruction(instr7) + parsed_8 = self.parser.parse_instruction(instr8) self.assertEqual(parsed_1.instruction, "vcvt.F32.S32") self.assertEqual(parsed_1.operands[0].register.name, "1") @@ -142,6 +152,11 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(parsed_7.operands[0].register.shape, "d") self.assertEqual(self.parser.get_full_reg_name(parsed_7.operands[2].register), "v1.2d") + self.assertEqual(parsed_8.instruction, "ccmp") + self.assertEqual(parsed_8.operands[0].register.name, "0") + self.assertEqual(parsed_8.operands[0].register.prefix, "x") + self.assertEqual(parsed_8.operands[3].condition, "CC") + def test_parse_line(self): line_comment = "// -- Begin main" line_label = ".LBB0_1: // =>This Inner Loop Header: Depth=1" @@ -151,6 +166,7 @@ class TestParserAArch64(unittest.TestCase): line_preindexed = "stp x29, x30, [sp, #-16]!" line_postindexed = "ldp q2, q3, [x11], #64" line_5_operands = "fcmla z26.d, p0/m, z29.d, z21.d, #90" + line_conditions = "ccmn x11, #1, #3, eq" instruction_form_1 = { "instruction": None, @@ -281,6 +297,20 @@ class TestParserAArch64(unittest.TestCase): "line": "fcmla z26.d, p0/m, z29.d, z21.d, #90", "line_number": 8, } + instruction_form_9 = { + "instruction": "ccmn", + "operands": [ + {"register": {"prefix": "x", "name": "11"}}, + {"immediate": {"value": 1, "type": "int"}}, + {"immediate": {"value": 3, "type": "int"}}, + {"condition": "EQ"} + ], + "directive": None, + "comment": None, + "label": None, + "line": "ccmn x11, #1, #3, eq", + "line_number": 9, + } parsed_1 = self.parser.parse_line(line_comment, 1) parsed_2 = self.parser.parse_line(line_label, 2) @@ -290,6 +320,7 @@ class TestParserAArch64(unittest.TestCase): parsed_6 = self.parser.parse_line(line_preindexed, 6) parsed_7 = self.parser.parse_line(line_postindexed, 7) parsed_8 = self.parser.parse_line(line_5_operands, 8) + parsed_9 = self.parser.parse_line(line_conditions, 9) self.assertEqual(parsed_1, instruction_form_1) self.assertEqual(parsed_2, instruction_form_2) @@ -299,6 +330,7 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(parsed_6, instruction_form_6) self.assertEqual(parsed_7, instruction_form_7) self.assertEqual(parsed_8, instruction_form_8) + self.assertEqual(parsed_9, instruction_form_9) def test_parse_file(self): parsed = self.parser.parse_file(self.triad_code) @@ -425,6 +457,11 @@ class TestParserAArch64(unittest.TestCase): parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()) ).directive + def _get_condition(self, parser, condition): + return AttrDict.convert_dict( + parser.process_operand(parser.condition.parseString(condition, parseAll=True).asDict()) + ).condition + @staticmethod def _find_file(name): testdir = os.path.dirname(__file__) diff --git a/tests/test_semantics.py b/tests/test_semantics.py index 327f173..f044a00 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -43,6 +43,8 @@ class TestSemanticTools(unittest.TestCase): cls.code_AArch64 = f.read() with open(cls._find_file("kernel_aarch64_sve.s")) as f: cls.code_AArch64_SVE = f.read() + with open(cls._find_file("kernel_aarch64_deps.s")) as f: + cls.code_AArch64_deps = f.read() cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86") cls.kernel_x86_memdep = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_memdep), "x86" @@ -59,6 +61,9 @@ class TestSemanticTools(unittest.TestCase): cls.kernel_aarch64_SVE = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64" ) + cls.kernel_aarch64_deps = reduce_to_section( + cls.parser_AArch64.parse_file(cls.code_AArch64_deps), "aarch64" + ) # set up machine models cls.machine_model_csx = MachineModel( @@ -104,6 +109,9 @@ class TestSemanticTools(unittest.TestCase): for i in range(len(cls.kernel_aarch64_SVE)): cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i]) cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i]) + for i in range(len(cls.kernel_aarch64_deps)): + cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_deps[i]) + cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_deps[i]) ########### # Tests @@ -365,7 +373,7 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8}) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10}) - self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {7, 8}) + self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {6, 7, 8}) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=6)), {9, 10}) self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=7)), 13) self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=8)), 14) @@ -434,40 +442,76 @@ class TestSemanticTools(unittest.TestCase): self.semantics_tx2, ) lc_deps = dg.get_loopcarried_dependencies() - self.assertEqual(len(lc_deps), 2) + self.assertEqual(len(lc_deps), 4) # based on line 6 - self.assertEqual(lc_deps[6]["latency"], 28.0) + dep_path = "6-10-11-12-13-14" + self.assertEqual(lc_deps[dep_path]["latency"], 29.0) self.assertEqual( - [(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]], - [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)], + [ + (iform.line_number, lat) + for iform, lat in lc_deps[dep_path]["dependencies"] + ], + [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 1.0)], + ) + dg = KernelDG( + self.kernel_aarch64_deps, + self.parser_AArch64, + self.machine_model_a64fx, + self.semantics_a64fx, + flag_dependencies=True, + ) + lc_deps = dg.get_loopcarried_dependencies() + self.assertEqual(len(lc_deps), 2) + # based on line 4 + dep_path = "4-5-6-9-10-11-12" + self.assertEqual(lc_deps[dep_path]["latency"], 7.0) + self.assertEqual( + [(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]], + [(4, 1.0), (5, 1.0), (6, 1.0), (9, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)], + ) + dg = KernelDG( + self.kernel_aarch64_deps, + self.parser_AArch64, + self.machine_model_a64fx, + self.semantics_a64fx, + flag_dependencies=False, + ) + lc_deps = dg.get_loopcarried_dependencies() + self.assertEqual(len(lc_deps), 1) + # based on line 4 + dep_path = "4-5-10-11-12" + self.assertEqual(lc_deps[dep_path]["latency"], 5.0) + self.assertEqual( + [(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]], + [(4, 1.0), (5, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)], ) def test_loop_carried_dependency_x86(self): - lcd_id = 8 - lcd_id2 = 5 + lcd_id = "8" + lcd_id2 = "5" dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) lc_deps = dg.get_loopcarried_dependencies() self.assertEqual(len(lc_deps), 2) # ID 8 self.assertEqual( - lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[lcd_id]["instruction_form"] + lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"] ) self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1) self.assertEqual( lc_deps[lcd_id]["dependencies"][0][0], - dg.dg.nodes(data=True)[lcd_id]["instruction_form"], + dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"], ) # w/ flag dependencies: ID 9 w/ len=2 # w/o flag dependencies: ID 5 w/ len=1 # TODO discuss self.assertEqual( lc_deps[lcd_id2]["root"], - dg.dg.nodes(data=True)[lcd_id2]["instruction_form"], + dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"], ) self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1) self.assertEqual( lc_deps[lcd_id2]["dependencies"][0][0], - dg.dg.nodes(data=True)[lcd_id2]["instruction_form"], + dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"], ) def test_timeout_during_loop_carried_dependency(self):