From 4a382193a5d177547a1e5c7d3b79235e6a6068d5 Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Sun, 20 Aug 2023 12:10:07 +0200 Subject: [PATCH] Separate operand files with inheritance, str and repr classes --- osaca/parser/directive.py | 19 ++--- osaca/parser/immediate.py | 13 ++- osaca/parser/instruction_form copy.py | 111 -------------------------- osaca/parser/label.py | 17 ++-- osaca/parser/memory.py | 38 +++++++-- osaca/parser/operand.py | 87 +++----------------- osaca/parser/parser_x86att.py | 24 +++--- tests/test_parser_AArch64.py | 4 +- tests/test_parser_x86att.py | 32 ++++---- 9 files changed, 98 insertions(+), 247 deletions(-) delete mode 100644 osaca/parser/instruction_form copy.py diff --git a/osaca/parser/directive.py b/osaca/parser/directive.py index b98707c..0c5d5e7 100644 --- a/osaca/parser/directive.py +++ b/osaca/parser/directive.py @@ -4,15 +4,10 @@ from osaca.parser.operand import Operand class DirectiveOperand(Operand): def __init__(self, NAME_ID = None, PARAMETER_ID = None, COMMENT_ID = None): - super().__init__() - self._NAME_ID = NAME_ID + super().__init__(NAME_ID) self._PARAMETER_ID = PARAMETER_ID self._COMMENT_ID = COMMENT_ID - @property - def name(self): - return self._NAME_ID - @property def parameters(self): return self._PARAMETER_ID @@ -29,14 +24,16 @@ class DirectiveOperand(Operand): raise StopIteration return self._COMMENT_ID.pop(0) - @name.setter - def name(self, name): - self._NAME_ID = name - @parameters.setter def parameters(self, parameters): self._PARAMETER_ID = parameters @comment.setter def comment(self, comment): - self._COMMENT_ID = comment \ No newline at end of file + self._COMMENT_ID = comment + + def __str__(self): + return f"Directive(NAME_ID={self._NAME_ID}, PARAMETERS={self._PARAMETER_ID}, COMMENT={self._COMMENT_ID})" + + def __repr__(self): + return f"DirectiveOperand(NAME_ID={self._NAME_ID}, PARAMETERS={self._PARAMETER_ID}, COMMENT={self._COMMENT_ID})" \ No newline at end of file diff --git a/osaca/parser/immediate.py b/osaca/parser/immediate.py index 0b970b6..e6ffe82 100644 --- a/osaca/parser/immediate.py +++ b/osaca/parser/immediate.py @@ -5,7 +5,7 @@ from osaca.parser.operand import Operand class ImmediateOperand(Operand): def __init__(self, IDENTIFIER_ID = None, TYPE_ID = None, VALUE_ID = None, SHIFT_ID = None , ): - super().__init__() + super().__init__(str(VALUE_ID)) self._IDENTIFIER_ID = IDENTIFIER_ID self._TYPE_ID = TYPE_ID self._VALUE_ID = VALUE_ID @@ -43,3 +43,14 @@ class ImmediateOperand(Operand): def index(self, shift): self._SHIFT_ID = shift + def __str__(self): + return ( + f"ImmediateOperand(IDENTIFIER_ID={self._IDENTIFIER_ID}, TYPE_ID={self._TYPE_ID}, " + f"VALUE_ID={self._VALUE_ID}, SHIFT_ID={self._SHIFT_ID})" + ) + + def __repr__(self): + return ( + f"ImmediateOperand(IDENTIFIER_ID={self._IDENTIFIER_ID}, TYPE_ID={self._TYPE_ID}, " + f"VALUE_ID={self._VALUE_ID}, SHIFT_ID={self._SHIFT_ID})" + ) \ No newline at end of file diff --git a/osaca/parser/instruction_form copy.py b/osaca/parser/instruction_form copy.py deleted file mode 100644 index b020c09..0000000 --- a/osaca/parser/instruction_form copy.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python3 - -class InstructionForm: - # Identifiers for operand types - COMMENT_ID = "comment" - DIRECTIVE_ID = "directive" - IMMEDIATE_ID = "immediate" - LABEL_ID = "label" - IDENTIFIER_ID = "identifier" - MEMORY_ID = "memory" - REGISTER_ID = "register" - SEGMENT_EXT_ID = "segment_extension" - INSTRUCTION_ID = "instruction" - OPERANDS_ID = "operands" - - def __init__(self, INSTRUCTION_ID = None, OPERANDS_ID = [], DIRECTIVE_ID = None - , COMMENT_ID = None, LABEL_ID = None, LINE = None, LINE_NUMBER = None - , SEMANTIC_OPERANDS = None): - self._INSTRUCTION_ID = INSTRUCTION_ID - self._OPERANDS_ID = OPERANDS_ID - self._DIRECTIVE_ID = DIRECTIVE_ID - self._COMMENT_ID = COMMENT_ID - self._LABEL_ID = LABEL_ID - self._LINE = LINE - self._LINE_NUMBER = LINE_NUMBER - self._SEMANTIC_OPERANDS = SEMANTIC_OPERANDS - - @property - def semantic_operands(self): - return self._SEMANTIC_OPERANDS - - @property - def instruction(self): - return self._INSTRUCTION_ID - - @property - def label(self): - return self._LABEL_ID - - @property - def comment(self): - return self._COMMENT_ID - - @property - def directive(self): - return self._DIRECTIVE_ID - - @property - def line_number(self): - return self._LINE_NUMBER - - @property - def line(self): - return self._LINE - - @property - def operands(self): - return self._OPERANDS_ID - - @semantic_operands.setter - def semantic_operands(self, semantic_operands): - self._SEMANTIC_OPERANDS = semantic_operands - - @directive.setter - def directive(self, directive): - self._DIRECTIVE_ID = directive - - @line_number.setter - def line_number(self, line_number): - self._LINE_NUMBER = line_number - - @line.setter - def line(self, line): - self._LINE = line - - @operands.setter - def operands(self, operands): - self._OPERANDS_ID = operands - - @instruction.setter - def instruction(self, instruction): - self._INSTRUCTION_ID = instruction - - @label.setter - def label(self, label): - self._LABEL_ID = label - - @comment.setter - def comment(self, comment): - self._COMMENT_ID =comment - - def __repr__(self): - return f"InstructionForm(INSTRUCTION_ID={self._INSTRUCTION_ID}, OPERANDS_ID={self._OPERANDS_ID}, DIRECTIVE_ID={self._DIRECTIVE_ID}, COMMENT_ID={self._COMMENT_ID}, LABEL_ID={self._LABEL_ID}, LINE={self._LINE}, LINE_NUMBER={self._LINE_NUMBER}, SEMANTIC_OPERANDS={self._SEMANTIC_OPERANDS})" - - def __str__(self): - return f"Instruction: {self._INSTRUCTION_ID}\nOperands: {self._OPERANDS_ID}\nDirective: {self._DIRECTIVE_ID}\nComment: {self._COMMENT_ID}\nLabel: {self._LABEL_ID}\nLine: {self._LINE}\nLine Number: {self._LINE_NUMBER}\nSemantic Operands: {self._SEMANTIC_OPERANDS}" - - def __eq__(self, other): - if isinstance(other, InstructionForm): - return ( - self._INSTRUCTION_ID == other._INSTRUCTION_ID and - self._OPERANDS_ID == other._OPERANDS_ID and - self._DIRECTIVE_ID == other._DIRECTIVE_ID and - self._COMMENT_ID == other._COMMENT_ID and - self._LABEL_ID == other._LABEL_ID and - self._LINE == other._LINE and - self._LINE_NUMBER == other._LINE_NUMBER and - self._SEMANTIC_OPERANDS == other._SEMANTIC_OPERANDS - ) - return False - \ No newline at end of file diff --git a/osaca/parser/label.py b/osaca/parser/label.py index dfbbf9f..134a240 100644 --- a/osaca/parser/label.py +++ b/osaca/parser/label.py @@ -4,17 +4,8 @@ from osaca.parser.operand import Operand class LabelOperand(Operand): def __init__(self, NAME_ID = None, COMMENT_ID = None): - super().__init__() - self._NAME_ID = NAME_ID + super().__init__(NAME_ID) self._COMMENT_ID = COMMENT_ID - - @property - def name(self): - return self._NAME_ID - - @name.setter - def name(self, name): - self._NAME_ID = name @property def comment(self): @@ -31,4 +22,10 @@ class LabelOperand(Operand): if not self._COMMENT_ID: raise StopIteration return self._COMMENT_ID.pop(0) + + def __str__(self): + return f"LabelOperand(NAME_ID={self._NAME_ID}, COMMENT={self._COMMENT_ID})" + + def __repr__(self): + return f"LabelOperand(NAME_ID={self._NAME_ID}, COMMENT={self._COMMENT_ID})" \ No newline at end of file diff --git a/osaca/parser/memory.py b/osaca/parser/memory.py index ee50f13..dfe2ff9 100644 --- a/osaca/parser/memory.py +++ b/osaca/parser/memory.py @@ -3,10 +3,10 @@ from osaca.parser.operand import Operand class MemoryOperand(Operand): - def __init__(self, OFFSET_ID = None, BASE_ID = None, INDEX_ID = None - , SCALE_ID = None, SEGMENT_EXT_ID = None, MASK = None, PRE_INDEXED = False - , POST_INDEXED = False, IMMEDIATE_ID = None): - super().__init__() + def __init__(self, NAME_ID, OFFSET_ID = None, BASE_ID = None, INDEX_ID = None + , SCALE_ID = 1, SEGMENT_EXT_ID = None, MASK = None, PRE_INDEXED = False + , POST_INDEXED = False, INDEXED_VAL = None): + super().__init__(NAME_ID) self._OFFSET_ID = OFFSET_ID self._BASE_ID = BASE_ID self._INDEX_ID = INDEX_ID @@ -15,7 +15,7 @@ class MemoryOperand(Operand): self._MASK = MASK self._PRE_INDEXED = PRE_INDEXED self._POST_INDEXED = POST_INDEXED - self._IMMEDIATE_ID = IMMEDIATE_ID + self._INDEXED_VAL = INDEXED_VAL @property def offset(self): @@ -53,6 +53,10 @@ class MemoryOperand(Operand): def post_indexed(self): return self._POST_INDEXED + @property + def indexed_val(self): + return self._INDEXED_VAL + @segment_ext_id.setter def segment_ext_id(self, segment): self._SEGMENT_EXT_ID= segment @@ -83,4 +87,26 @@ class MemoryOperand(Operand): @post_indexed.setter def post_indexed(self, post_indexed): - self._POST_INDEXED = post_indexed \ No newline at end of file + self._POST_INDEXED = post_indexed + + @indexed_val.setter + def indexed_val(self, value): + self._INDEXED_VAL = value + + def __str__(self): + return ( + f"MemoryOperand(NAME_ID={self._NAME_ID}, OFFSET_ID={self._OFFSET_ID}, " + f"BASE_ID={self._BASE_ID}, INDEX_ID={self._INDEX_ID}, SCALE_ID={self._SCALE_ID}, " + f"SEGMENT_EXT_ID={self._SEGMENT_EXT_ID}, MASK={self._MASK}, " + f"PRE_INDEXED={self._PRE_INDEXED}, POST_INDEXED={self._POST_INDEXED}, " + f"INDEXED_VAL={self._INDEXED_VAL})" + ) + + def __repr__(self): + return ( + f"MemoryOperand(NAME_ID={self._NAME_ID}, OFFSET_ID={self._OFFSET_ID}, " + f"BASE_ID={self._BASE_ID}, INDEX_ID={self._INDEX_ID}, SCALE_ID={self._SCALE_ID}, " + f"SEGMENT_EXT_ID={self._SEGMENT_EXT_ID}, MASK={self._MASK}, " + f"PRE_INDEXED={self._PRE_INDEXED}, POST_INDEXED={self._POST_INDEXED}, " + f"INDEXED_VAL={self._INDEXED_VAL})" + ) \ No newline at end of file diff --git a/osaca/parser/operand.py b/osaca/parser/operand.py index 0af91bb..028886f 100644 --- a/osaca/parser/operand.py +++ b/osaca/parser/operand.py @@ -1,88 +1,19 @@ #!/usr/bin/env python3 class Operand: - def __init__(self, MEMORY_ID = None, IMMEDIATE_ID = None, DIRECTIVE_ID = None, LABEL_ID = None - , COMMENT_ID = None, REGISTER_ID = None, IDENTIFIER_ID = None, CONDITION_ID = None): - self._MEMORY_ID = MEMORY_ID - self._IMMEDIATE_ID = IMMEDIATE_ID - self._DIRECTIVE_ID = DIRECTIVE_ID - self._LABEL_ID = LABEL_ID - self._COMMENT_ID = COMMENT_ID - self._REGISTER_ID = REGISTER_ID - self._IDENTIFIER_ID = IDENTIFIER_ID - self._CONDITION_ID = CONDITION_ID + def __init__(self, NAME_ID): + self._NAME_ID = NAME_ID @property - def memory(self): - return self._MEMORY_ID + def name(self): + return self._NAME_ID - @property - def condition(self): - return self._CONDITION_ID - - @property - def immediate(self): - return self._IMMEDIATE_ID - - @property - def directive(self): - return self._DIRECTIVE_ID - - @property - def label(self): - return self._LABEL_ID - - @property - def comment(self): - return self._COMMENT_ID - - @property - def register(self): - return self._REGISTER_ID - - @property - def identifier(self): - return self._IDENTIFIER_ID - - def copyFrom(self, operand_dict): - #self._COMMENT_ID = operand_dict["comment"] if "comment" in operand_dict else None - for key, value in operand_dict.items(): - setattr(self, key, value) - - @memory.setter - def memory(self, memory): - self._MEMORY_ID = memory - - @immediate.setter - def immediate(self, immediate): - self._IMMEDIATE_ID = immediate - - @directive.setter - def directive(self, directive): - self._DIRECTIVE_ID = directive - - @label.setter - def label(self, label): - self._LABEL_ID = label - - @comment.setter - def comment(self, comment): - self._COMMENT_ID = comment - - @register.setter - def register(self, register): - self._REGISTER_ID = register - - @identifier.setter - def identifier(self, identifier): - self._IDENTIFIER_ID = identifier - - @condition.setter - def condition(self, condition): - self._CONDITION_ID = condition + @name.setter + def name(self, name): + self._NAME_ID = name def __repr__(self): - return f"Operand(MEMORY_ID={self._MEMORY_ID}, IMMEDIATE_ID={self._IMMEDIATE_ID}, DIRECTIVE_ID={self._DIRECTIVE_ID}, LABEL_ID={self._LABEL_ID}, COMMENT_ID={self._COMMENT_ID}), REGISTER_ID={self._REGISTER_ID}, IDENTIFIER_ID={self._IDENTIFIER_ID})" + return f"Operand(NAME_ID={self._NAME_ID}" def __str__(self): - return f"Memory: {self._MEMORY_ID}\nImmediate: {self._IMMEDIATE_ID}\nDirective: {self._DIRECTIVE_ID}\nLabel: {self._LABEL_ID}\nComment: {self._COMMENT_ID}\nRegister: {self._REGISTER_ID}\nIdentifier: {self._IDENTIFIER_ID}" \ No newline at end of file + return f"Name: {self._NAME_ID}" \ No newline at end of file diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 2face35..790911b 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -217,10 +217,10 @@ class ParserX86ATT(BaseParser): if result is None: try: result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) - instruction_form.label = result.label.name - if self.COMMENT_ID in result.label: + instruction_form.label = result.name + if self.COMMENT_ID in result: instruction_form.comment = " ".join( - result.label.comment + result.comment ) except pp.ParseException: pass @@ -232,13 +232,13 @@ class ParserX86ATT(BaseParser): self.directive.parseString(line, parseAll=True).asDict() ) instruction_form.directive = DirectiveOperand( - NAME_ID = result.directive.name, - PARAMETER_ID = result.directive.parameters, + NAME_ID = result.name, + PARAMETER_ID = result.parameters, ) - if self.COMMENT_ID in result.directive: + if self.COMMENT_ID in result: instruction_form.comment = " ".join( - result.directive.comment + result.comment ) except pp.ParseException: pass @@ -309,7 +309,7 @@ class ParserX86ATT(BaseParser): directive_new.parameters = directive["parameters"] if "comment" in directive: directive_new.comment = directive["comment"] - return Operand(DIRECTIVE_ID = directive_new) + return directive_new def process_memory_address(self, memory_address): """Post-process memory address operand""" @@ -325,18 +325,18 @@ class ParserX86ATT(BaseParser): offset = {"value": offset} elif offset is not None and "value" in offset: offset["value"] = int(offset["value"], 0) - new_dict = MemoryOperand(OFFSET_ID = offset, BASE_ID = base, INDEX_ID = index, SCALE_ID = scale) + new_dict = MemoryOperand(memory_address.get("name", None),OFFSET_ID = offset, BASE_ID = base, INDEX_ID = index, SCALE_ID = scale) # Add segmentation extension if existing if self.SEGMENT_EXT_ID in memory_address: new_dict.segment_ext_id = memory_address[self.SEGMENT_EXT_ID] - return Operand(MEMORY_ID = new_dict) + return new_dict def process_label(self, label): """Post-process label asm line""" # remove duplicated 'name' level due to identifier label["name"] = label["name"][0]["name"] new_label = LabelOperand(NAME_ID = label["name"], COMMENT_ID = label["comment"] if "comment" in label else None) - return Operand(LABEL_ID = new_label) + return new_label def process_immediate(self, immediate): """Post-process immediate operand""" @@ -345,7 +345,7 @@ class ParserX86ATT(BaseParser): return immediate # otherwise just make sure the immediate is a decimal immediate["value"] = int(immediate["value"], 0) - return Operand(IMMEDIATE_ID = immediate) + return immediate def get_full_reg_name(self, register): """Return one register name string including all attributes""" diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py index 1a508bd..76bff89 100755 --- a/tests/test_parser_AArch64.py +++ b/tests/test_parser_AArch64.py @@ -458,10 +458,10 @@ class TestParserAArch64(unittest.TestCase): ) def _get_label(self, parser, label): - return parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()).label + return parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()) def _get_directive(self, parser, directive): - return parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()).directive + return parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()) def _get_condition(self, parser, condition): return parser.process_operand(parser.condition.parseString(condition, parseAll=True).asDict()).condition diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index 9f5dcf2..1aa230f 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -123,33 +123,33 @@ class TestParserX86ATT(unittest.TestCase): self.assertIsNone(parsed_2.comment) self.assertEqual(parsed_3.instruction, "movl") - self.assertEqual(parsed_3.operands[0].immediate.value, 222) + self.assertEqual(parsed_3.operands[0].value, 222) self.assertEqual(parsed_3.operands[1].register.name, "ebx") self.assertEqual(parsed_3.comment, "IACA END") self.assertEqual(parsed_4.instruction, "vmovss") - self.assertEqual(parsed_4.operands[1].memory.offset.value, -4) - self.assertEqual(parsed_4.operands[1].memory.base.name, "rsp") - self.assertEqual(parsed_4.operands[1].memory.index.name, "rax") - self.assertEqual(parsed_4.operands[1].memory.scale, 8) + self.assertEqual(parsed_4.operands[1].offset.value, -4) + self.assertEqual(parsed_4.operands[1].base.name, "rsp") + self.assertEqual(parsed_4.operands[1].index.name, "rax") + self.assertEqual(parsed_4.operands[1].scale, 8) self.assertEqual(parsed_4.operands[0].register.name, "xmm4") self.assertEqual(parsed_4.comment, "12.9") self.assertEqual(parsed_5.instruction, "mov") - self.assertEqual(parsed_5.operands[1].memory.offset.identifier.name, "var") - self.assertIsNone(parsed_5.operands[1].memory.base) - self.assertIsNone(parsed_5.operands[1].memory.index) - self.assertEqual(parsed_5.operands[1].memory.scale, 1) + self.assertEqual(parsed_5.operands[1].offset.identifier.name, "var") + self.assertIsNone(parsed_5.operands[1].base) + self.assertIsNone(parsed_5.operands[1].index) + self.assertEqual(parsed_5.operands[1].scale, 1) self.assertEqual(parsed_5.operands[0].register.name, "ebx") self.assertEqual(parsed_6.instruction, "lea") - self.assertIsNone(parsed_6.operands[0].memory.offset) - self.assertIsNone(parsed_6.operands[0].memory.base) - self.assertEqual(parsed_6.operands[0].memory.index.name, "rax") - self.assertEqual(parsed_6.operands[0].memory.scale, 8) + self.assertIsNone(parsed_6.operands[0].offset) + self.assertIsNone(parsed_6.operands[0].base) + self.assertEqual(parsed_6.operands[0].index.name, "rax") + self.assertEqual(parsed_6.operands[0].scale, 8) self.assertEqual(parsed_6.operands[1].register.name, "rbx") - self.assertEqual(parsed_7.operands[0].immediate.value, 0x1) + self.assertEqual(parsed_7.operands[0].value, 0x1) self.assertEqual(parsed_7.operands[1].register.name, "xmm0") self.assertEqual(parsed_7.operands[2].register.name, "ymm1") self.assertEqual(parsed_7.operands[3].register.name, "ymm1") @@ -310,10 +310,10 @@ class TestParserX86ATT(unittest.TestCase): ) def _get_label(self, parser, label): - return parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()).label + return parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()) def _get_directive(self, parser, directive): - return parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()).directive + return parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()) @staticmethod def _find_file(name):