diff --git a/osaca/parser/directive.py b/osaca/parser/directive.py index 0c5d5e7..20a49a8 100644 --- a/osaca/parser/directive.py +++ b/osaca/parser/directive.py @@ -31,6 +31,15 @@ class DirectiveOperand(Operand): @comment.setter def comment(self, comment): self._COMMENT_ID = comment + + def __eq__(self, other): + if isinstance(other, DirectiveOperand): + return ( + self._NAME_ID == other._NAME_ID and + self._PARAMETER_ID == other._PARAMETER_ID and + self._COMMENT_ID == other._COMMENT_ID + ) + return False def __str__(self): return f"Directive(NAME_ID={self._NAME_ID}, PARAMETERS={self._PARAMETER_ID}, COMMENT={self._COMMENT_ID})" diff --git a/osaca/parser/instruction_form.py b/osaca/parser/instruction_form.py index b020c09..a1db8db 100644 --- a/osaca/parser/instruction_form.py +++ b/osaca/parser/instruction_form.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +from osaca.parser.directive import DirectiveOperand + class InstructionForm: # Identifiers for operand types COMMENT_ID = "comment" @@ -18,7 +20,10 @@ class InstructionForm: , SEMANTIC_OPERANDS = None): self._INSTRUCTION_ID = INSTRUCTION_ID self._OPERANDS_ID = OPERANDS_ID - self._DIRECTIVE_ID = DIRECTIVE_ID + if DIRECTIVE_ID != None: + self._DIRECTIVE_ID = DirectiveOperand(NAME_ID = DIRECTIVE_ID['name'], PARAMETER_ID = DIRECTIVE_ID['parameters']) + else: + self._DIRECTIVE_ID = DIRECTIVE_ID self._COMMENT_ID = COMMENT_ID self._LABEL_ID = LABEL_ID self._LINE = LINE diff --git a/osaca/parser/memory.py b/osaca/parser/memory.py index dfe2ff9..d46baa0 100644 --- a/osaca/parser/memory.py +++ b/osaca/parser/memory.py @@ -3,10 +3,10 @@ from osaca.parser.operand import Operand class MemoryOperand(Operand): - def __init__(self, NAME_ID, OFFSET_ID = None, BASE_ID = None, INDEX_ID = None + def __init__(self, OFFSET_ID = None, BASE_ID = None, INDEX_ID = None , SCALE_ID = 1, SEGMENT_EXT_ID = None, MASK = None, PRE_INDEXED = False , POST_INDEXED = False, INDEXED_VAL = None): - super().__init__(NAME_ID) + super().__init__('memory') self._OFFSET_ID = OFFSET_ID self._BASE_ID = BASE_ID self._INDEX_ID = INDEX_ID diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index 30dd9d6..3504425 100755 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -3,7 +3,12 @@ from copy import deepcopy import pyparsing as pp from osaca.parser import AttrDict, BaseParser - +from osaca.parser.instruction_form import InstructionForm +from osaca.parser.operand import Operand +from osaca.parser.directive import DirectiveOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.label import LabelOperand +from osaca.parser.immediate import ImmediateOperand class ParserAArch64(BaseParser): _instance = None @@ -252,16 +257,14 @@ class ParserAArch64(BaseParser): :type line_number: int, optional :return: `dict` -- parsed asm line (comment, label, directive or instruction form) """ - instruction_form = AttrDict( - { - self.INSTRUCTION_ID: None, - self.OPERANDS_ID: [], - self.DIRECTIVE_ID: None, - self.COMMENT_ID: None, - self.LABEL_ID: None, - "line": line, - "line_number": line_number, - } + instruction_form = InstructionForm( + INSTRUCTION_ID = None, + OPERANDS_ID = [], + DIRECTIVE_ID = None, + COMMENT_ID = None, + LABEL_ID = None, + LINE = line, + LINE_NUMBER = line_number, ) result = None @@ -269,7 +272,7 @@ class ParserAArch64(BaseParser): try: result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict()) result = AttrDict.convert_dict(result) - instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID]) + instruction_form.comment = " ".join(result[self.COMMENT_ID]) except pp.ParseException: pass # 1.2 check for llvm-mca marker @@ -278,7 +281,7 @@ class ParserAArch64(BaseParser): self.llvm_markers.parseString(line, parseAll=True).asDict() ) result = AttrDict.convert_dict(result) - instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID]) + instruction_form.comment = " ".join(result[self.COMMENT_ID]) except pp.ParseException: pass # 2. Parse label @@ -286,9 +289,9 @@ class ParserAArch64(BaseParser): try: result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) result = AttrDict.convert_dict(result) - instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name + instruction_form.label = result[self.LABEL_ID].name if self.COMMENT_ID in result[self.LABEL_ID]: - instruction_form[self.COMMENT_ID] = " ".join( + instruction_form.comment= " ".join( result[self.LABEL_ID][self.COMMENT_ID] ) except pp.ParseException: @@ -301,14 +304,14 @@ class ParserAArch64(BaseParser): self.directive.parseString(line, parseAll=True).asDict() ) result = AttrDict.convert_dict(result) - instruction_form[self.DIRECTIVE_ID] = AttrDict( + instruction_form.directive = AttrDict( { "name": result[self.DIRECTIVE_ID].name, "parameters": result[self.DIRECTIVE_ID].parameters, } ) if self.COMMENT_ID in result[self.DIRECTIVE_ID]: - instruction_form[self.COMMENT_ID] = " ".join( + instruction_form.comment = " ".join( result[self.DIRECTIVE_ID][self.COMMENT_ID] ) except pp.ParseException: @@ -322,9 +325,9 @@ class ParserAArch64(BaseParser): raise ValueError( "Unable to parse {!r} on line {}".format(line, line_number) ) from e - instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID] - instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID] - instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID] + instruction_form.instruction = result[self.INSTRUCTION_ID] + instruction_form.operands = result[self.OPERANDS_ID] + instruction_form.comment = result[self.COMMENT_ID] return instruction_form diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 790911b..f6d3d80 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -218,7 +218,8 @@ class ParserX86ATT(BaseParser): try: result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) instruction_form.label = result.name - if self.COMMENT_ID in result: + if result.comment != None: + #print(result) instruction_form.comment = " ".join( result.comment ) @@ -236,7 +237,7 @@ class ParserX86ATT(BaseParser): PARAMETER_ID = result.parameters, ) - if self.COMMENT_ID in result: + if result.comment != None: instruction_form.comment = " ".join( result.comment ) @@ -325,7 +326,7 @@ class ParserX86ATT(BaseParser): offset = {"value": offset} elif offset is not None and "value" in offset: offset["value"] = int(offset["value"], 0) - new_dict = MemoryOperand(memory_address.get("name", None),OFFSET_ID = offset, BASE_ID = base, INDEX_ID = index, SCALE_ID = scale) + new_dict = MemoryOperand(OFFSET_ID = offset, BASE_ID = base, INDEX_ID = index, SCALE_ID = scale) # Add segmentation extension if existing if self.SEGMENT_EXT_ID in memory_address: new_dict.segment_ext_id = memory_address[self.SEGMENT_EXT_ID] diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py index 76bff89..b302c2d 100755 --- a/tests/test_parser_AArch64.py +++ b/tests/test_parser_AArch64.py @@ -178,37 +178,37 @@ class TestParserAArch64(unittest.TestCase): line_5_operands = "fcmla z26.d, p0/m, z29.d, z21.d, #90" line_conditions = "ccmn x11, #1, #3, eq" - instruction_form_1 = { - "instruction": None, - "operands": [], - "directive": None, - "comment": "-- Begin main", - "label": None, - "line": "// -- Begin main", - "line_number": 1, - } + instruction_form_1 = InstructionForm( + INSTRUCTION_ID = None, + OPERANDS_ID = [], + DIRECTIVE_ID = None, + COMMENT_ID = "-- Begin main", + LABEL_ID = None, + LINE = "// -- Begin main", + LINE_NUMBER = 1, + ) - instruction_form_2 = { - "instruction": None, - "operands": [], - "directive": None, - "comment": "=>This Inner Loop Header: Depth=1", - "label": ".LBB0_1", - "line": ".LBB0_1: // =>This Inner Loop Header: Depth=1", - "line_number": 2, - } - instruction_form_3 = { - "instruction": None, - "operands": [], - "directive": {"name": "cfi_def_cfa", "parameters": ["w29", "-16"]}, - "comment": None, - "label": None, - "line": ".cfi_def_cfa w29, -16", - "line_number": 3, - } - instruction_form_4 = { - "instruction": "ldr", - "operands": [ + instruction_form_2 = InstructionForm( + INSTRUCTION_ID = None, + OPERANDS_ID = [], + DIRECTIVE_ID = None, + COMMENT_ID = "=>This Inner Loop Header: Depth=1", + LABEL_ID = ".LBB0_1", + LINE = ".LBB0_1: // =>This Inner Loop Header: Depth=1", + LINE_NUMBER = 2, + ) + instruction_form_3 = InstructionForm( + INSTRUCTION_ID = None, + OPERANDS_ID = [], + DIRECTIVE_ID = {"name": "cfi_def_cfa", "parameters": ["w29", "-16"]}, + COMMENT_ID = None, + LABEL_ID = None, + LINE = ".cfi_def_cfa w29, -16", + LINE_NUMBER = 3, + ) + instruction_form_4 = InstructionForm( + INSTRUCTION_ID = "ldr", + OPERANDS_ID = [ {"register": {"prefix": "s", "name": "0"}}, { "memory": { @@ -225,15 +225,15 @@ class TestParserAArch64(unittest.TestCase): } }, ], - "directive": None, - "comment": "= <<2", - "label": None, - "line": "ldr s0, [x11, w10, sxtw #2] // = <<2", - "line_number": 4, - } - instruction_form_5 = { - "instruction": "prfm", - "operands": [ + DIRECTIVE_ID = None, + COMMENT_ID = "= <<2", + LABEL_ID = None, + LINE = "ldr s0, [x11, w10, sxtw #2] // = <<2", + LINE_NUMBER = 4, + ) + instruction_form_5 = InstructionForm( + INSTRUCTION_ID = "prfm", + OPERANDS_ID = [ {"prfop": {"type": ["PLD"], "target": ["L1"], "policy": ["KEEP"]}}, { "memory": { @@ -244,15 +244,15 @@ class TestParserAArch64(unittest.TestCase): } }, ], - "directive": None, - "comment": "HPL", - "label": None, - "line": "prfm pldl1keep, [x26, #2048] //HPL", - "line_number": 5, - } - instruction_form_6 = { - "instruction": "stp", - "operands": [ + DIRECTIVE_ID = None, + COMMENT_ID = "HPL", + LABEL_ID = None, + LINE = "prfm pldl1keep, [x26, #2048] //HPL", + LINE_NUMBER = 5, + ) + instruction_form_6 = InstructionForm( + INSTRUCTION_ID = "stp", + OPERANDS_ID = [ {"register": {"prefix": "x", "name": "29"}}, {"register": {"prefix": "x", "name": "30"}}, { @@ -265,15 +265,15 @@ class TestParserAArch64(unittest.TestCase): } }, ], - "directive": None, - "comment": None, - "label": None, - "line": "stp x29, x30, [sp, #-16]!", - "line_number": 6, - } - instruction_form_7 = { - "instruction": "ldp", - "operands": [ + DIRECTIVE_ID = None, + COMMENT_ID = None, + LABEL_ID = None, + LINE = "stp x29, x30, [sp, #-16]!", + LINE_NUMBER = 6, + ) + instruction_form_7 = InstructionForm( + INSTRUCTION_ID = "ldp", + OPERANDS_ID = [ {"register": {"prefix": "q", "name": "2"}}, {"register": {"prefix": "q", "name": "3"}}, { @@ -286,41 +286,41 @@ class TestParserAArch64(unittest.TestCase): } }, ], - "directive": None, - "comment": None, - "label": None, - "line": "ldp q2, q3, [x11], #64", - "line_number": 7, - } - instruction_form_8 = { - "instruction": "fcmla", - "operands": [ + DIRECTIVE_ID = None, + COMMENT_ID = None, + LABEL_ID = None, + LINE = "ldp q2, q3, [x11], #64", + LINE_NUMBER = 7, + ) + instruction_form_8 = InstructionForm( + INSTRUCTION_ID = "fcmla", + OPERANDS_ID = [ {"register": {"prefix": "z", "name": "26", "shape": "d"}}, {"register": {"prefix": "p", "name": "0", "predication": "m"}}, {"register": {"prefix": "z", "name": "29", "shape": "d"}}, {"register": {"prefix": "z", "name": "21", "shape": "d"}}, {"immediate": {"value": 90, "type": "int"}}, ], - "directive": None, - "comment": None, - "label": None, - "line": "fcmla z26.d, p0/m, z29.d, z21.d, #90", - "line_number": 8, - } - instruction_form_9 = { - "instruction": "ccmn", - "operands": [ + DIRECTIVE_ID = None, + COMMENT_ID = None, + LABEL_ID = None, + LINE = "fcmla z26.d, p0/m, z29.d, z21.d, #90", + LINE_NUMBER = 8, + ) + instruction_form_9 = InstructionForm( + INSTRUCTION_ID = "ccmn", + OPERANDS_ID = [ {"register": {"prefix": "x", "name": "11"}}, {"immediate": {"value": 1, "type": "int"}}, {"immediate": {"value": 3, "type": "int"}}, {"condition": "EQ"}, ], - "directive": None, - "comment": None, - "label": None, - "line": "ccmn x11, #1, #3, eq", - "line_number": 9, - } + DIRECTIVE_ID = None, + COMMENT_ID = None, + LABEL_ID = None, + LINE = "ccmn x11, #1, #3, eq", + LINE_NUMBER = 9, + ) parsed_1 = self.parser.parse_line(line_comment, 1) parsed_2 = self.parser.parse_line(line_label, 2) @@ -458,13 +458,19 @@ class TestParserAArch64(unittest.TestCase): ) def _get_label(self, parser, label): - return parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()) + return AttrDict.convert_dict( + parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()) + ).label def _get_directive(self, parser, directive): - return parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()) + return AttrDict.convert_dict( + parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()) + ).directive def _get_condition(self, parser, condition): - return parser.process_operand(parser.condition.parseString(condition, parseAll=True).asDict()).condition + return AttrDict.convert_dict( + parser.process_operand(parser.condition.parseString(condition, parseAll=True).asDict()) + ).condition @staticmethod def _find_file(name): @@ -476,4 +482,4 @@ class TestParserAArch64(unittest.TestCase): if __name__ == "__main__": suite = unittest.TestLoader().loadTestsFromTestCase(TestParserAArch64) - unittest.TextTestRunner(verbosity=2).run(suite) + unittest.TextTestRunner(verbosity=2).run(suite) \ No newline at end of file diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index 1aa230f..7dfa76e 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -212,9 +212,9 @@ class TestParserX86ATT(unittest.TestCase): parsed_3 = self.parser.parse_line(line_directive, 3) parsed_4 = self.parser.parse_line(line_instruction, 4) - #self.assertEqual(parsed_1, instruction_form_1) - #self.assertEqual(parsed_2, instruction_form_2) - #self.assertEqual(parsed_3, instruction_form_3) + self.assertEqual(parsed_1, instruction_form_1) + self.assertEqual(parsed_2, instruction_form_2) + self.assertEqual(parsed_3, instruction_form_3) #self.assertEqual(parsed_4, instruction_form_4) def test_parse_file(self):