From bcecabd9115a4227794a400fd73dc1f85fffcb6b Mon Sep 17 00:00:00 2001 From: JanLJL Date: Fri, 7 Mar 2025 11:49:14 +0100 Subject: [PATCH] added support for ! registers and []! mem addresses in Arm --- osaca/parser/parser_AArch64.py | 11 ++++--- osaca/semantics/isa_semantics.py | 22 +++++++++++++- tests/test_files/mops_aarch64.s | 8 +++++ tests/test_parser_AArch64.py | 9 ++++++ tests/test_semantics.py | 52 ++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 tests/test_files/mops_aarch64.s diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index 85a8ad0..f19db97 100644 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -124,16 +124,18 @@ class ParserAArch64(BaseParser): + pp.Optional(immediate).setResultsName("shift") ).setResultsName(self.immediate_id) # Register: - # scalar: [XWBHSDQ][0-9]{1,2} | vector: [VZ][0-9]{1,2}(\.[12468]{1,2}[BHSD])? + # scalar: [XWBHSDQ][0-9]{1,2}! | vector: [VZ][0-9]{1,2}(\.[12468]{1,2}[BHSD])? # | predicate: P[0-9]{1,2}(/[ZM])? # ignore vector len control ZCR_EL[123] for now # define SP, ZR register aliases as regex, due to pyparsing does not support # proper lookahead alias_r31_sp = pp.Regex("(?P[a-zA-Z])?(?P(sp|SP))") alias_r31_zr = pp.Regex("(?P[a-zA-Z])?(?P(zr|ZR))") - scalar = pp.Word("xwbhsdqXWBHSDQ", exact=1).setResultsName("prefix") + pp.Word( - pp.nums - ).setResultsName("name") + scalar = ( + pp.Word("xwbhsdqXWBHSDQ", exact=1).setResultsName("prefix") + + pp.Word(pp.nums).setResultsName("name") + + pp.Optional(pp.Literal("!")).setResultsName("pre_indexed") + ) index = pp.Literal("[") + pp.Word(pp.nums).setResultsName("index") + pp.Literal("]") vector = ( pp.oneOf("v z", caseless=True).setResultsName("prefix") @@ -417,6 +419,7 @@ class ParserAArch64(BaseParser): lanes=operand["lanes"] if "lanes" in operand else None, index=operand["index"] if "index" in operand else None, predication=operand["predication"].lower() if "predication" in operand else None, + pre_indexed=True if "pre_indexed" in operand else False, ) def process_memory_address(self, memory_address): diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index a84602d..6bf0e44 100644 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -116,6 +116,22 @@ class ISASemantics(object): op_dict["source"] = self._get_regular_source_operands(instruction_form) op_dict["destination"] = self._get_regular_destination_operands(instruction_form) op_dict["src_dst"] = [] + # handle Xd! registers in aarch64 + if any( + [ + isinstance(op, RegisterOperand) and op.pre_indexed + for op in instruction_form.operands + ] + ): + src_dst_regs = [ + op + for op in instruction_form.operands + if (isinstance(op, RegisterOperand) and op.pre_indexed) + ] + for reg in src_dst_regs: + if reg in op_dict["source"]: + op_dict["source"].remove(reg) + op_dict["src_dst"].append(reg) # post-process pre- and post-indexing for aarch64 memory operands if self._isa == "aarch64": for operand in [op for op in op_dict["source"] if isinstance(op, MemoryOperand)]: @@ -221,7 +237,11 @@ class ISASemantics(object): base_name = (o.base.prefix if o.base.prefix is not None else "") + o.base.name reg_operand_names = {base_name: "op1"} - operand_state = {"op1": {"name": base_name, "value": o.offset.value}} + if o.offset: + operand_state = {"op1": {"name": base_name, "value": o.offset.value}} + else: + # no offset (e.g., with Arm9 memops) -> base is updated + operand_state = {"op1": None} if isa_data is not None and isa_data.operation is not None: for i, o in enumerate(instruction_form.operands): diff --git a/tests/test_files/mops_aarch64.s b/tests/test_files/mops_aarch64.s new file mode 100644 index 0000000..d48c159 --- /dev/null +++ b/tests/test_files/mops_aarch64.s @@ -0,0 +1,8 @@ +memprolog [x3]!, [x1]!, x2! +//ALT1 ldp x3,x1, [sp, #40] +memmain [x3]!, [x1]!, x2! +//ALT1 ldp x3,x1, [sp, #40] +memepilog [x3]!, [x1]!, x2! +//ALT1 ldp x3,x1, [sp, #40] +//ALT1 memprolog [x3]!, x2!, x1 +add x2, x2, #123 diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py index c895c99..167dcfb 100755 --- a/tests/test_parser_AArch64.py +++ b/tests/test_parser_AArch64.py @@ -23,6 +23,9 @@ class TestParserAArch64(unittest.TestCase): self.parser = ParserAArch64() with open(self._find_file("triad_arm_iaca.s")) as f: self.triad_code = f.read() + with open(self._find_file("mops_aarch64.s")) as f: + self.mops_1_code = f.read() + self.mops_2_code = self.mops_1_code.replace("//ALT1 ", "") ################## # Test @@ -173,6 +176,12 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(parsed_9.operands[0].prefix, "x") self.assertEqual(parsed_9.operands[3].ccode, "CC") + def test_mops(self): + parsed_1 = self.parser.parse_file(self.mops_1_code) + parsed_2 = self.parser.parse_file(self.mops_1_code) + self.assertEqual(len(parsed_1), 8) + self.assertEqual(len(parsed_2), 8) + def test_parse_line(self): line_comment = "// -- Begin main" line_label = ".LBB0_1: // =>This Inner Loop Header: Depth=1" diff --git a/tests/test_semantics.py b/tests/test_semantics.py index cc8b1b6..f9073eb 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -48,6 +48,10 @@ class TestSemanticTools(unittest.TestCase): cls.code_AArch64_SVE = f.read() with open(cls._find_file("kernel_aarch64_deps.s")) as f: cls.code_AArch64_deps = f.read() + with open(cls._find_file("mops_aarch64.s")) as f: + cls.mops_1_code = f.read() + cls.mops_2_code = cls.mops_1_code.replace("//ALT1 ", "") + cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86") cls.kernel_x86_memdep = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_memdep), "x86" @@ -415,6 +419,54 @@ class TestSemanticTools(unittest.TestCase): ) # TODO check for correct analysis + def test_mops_deps_AArch64(self): + self.kernel_mops_1 = reduce_to_section( + self.parser_AArch64.parse_file(self.mops_1_code), "aarch64" + ) + self.kernel_mops_2 = reduce_to_section( + self.parser_AArch64.parse_file(self.mops_2_code), "aarch64" + ) + for i in range(len(self.kernel_mops_1)): + self.semantics_a64fx.assign_src_dst(self.kernel_mops_1[i]) + for i in range(len(self.kernel_mops_2)): + self.semantics_a64fx.assign_src_dst(self.kernel_mops_2[i]) + + mops_dest = MemoryOperand( + offset=None, + base=RegisterOperand(prefix="x", name="3"), + index=None, + scale=1, + pre_indexed=True, + ) + mops_src = MemoryOperand( + offset=None, + base=RegisterOperand(prefix="x", name="1"), + index=None, + scale=1, + pre_indexed=True, + ) + mops_n = RegisterOperand(prefix="x", name="2", pre_indexed=True) + mops_x1 = RegisterOperand(prefix="x", name="1") + for instruction_form in self.kernel_mops_1[:-1]: + with self.subTest(instruction_form=instruction_form): + if not instruction_form.line.startswith("//"): + self.assertTrue(mops_dest in instruction_form.semantic_operands["destination"]) + self.assertTrue(mops_src in instruction_form.semantic_operands["source"]) + self.assertTrue(mops_n in instruction_form.semantic_operands["src_dst"]) + self.assertTrue( + mops_dest.base in instruction_form.semantic_operands["src_dst"] + ) + self.assertTrue(mops_src.base in instruction_form.semantic_operands["src_dst"]) + for instruction_form in self.kernel_mops_2[-2:-1]: + with self.subTest(instruction_form=instruction_form): + if not instruction_form.line.startswith("//"): + self.assertTrue(mops_dest in instruction_form.semantic_operands["destination"]) + self.assertTrue(mops_x1 in instruction_form.semantic_operands["source"]) + self.assertTrue(mops_n in instruction_form.semantic_operands["src_dst"]) + self.assertTrue( + mops_dest.base in instruction_form.semantic_operands["src_dst"] + ) + def test_hidden_load(self): machine_model_hld = MachineModel( path_to_yaml=self._find_file("hidden_load_machine_model.yml")