From 7ad3438af543c9d6991cac6fed9450ab6adb1500 Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Sat, 24 Feb 2024 14:15:25 +0100 Subject: [PATCH] Removed comments from operands --- osaca/parser/directive.py | 21 +--- osaca/parser/label.py | 21 +--- osaca/parser/parser_AArch64.py | 100 +++++++++--------- osaca/parser/parser_x86att.py | 86 +++++++--------- tests/test_parser_AArch64.py | 182 ++++++++++++++++----------------- tests/test_parser_x86att.py | 68 ++++++------ 6 files changed, 213 insertions(+), 265 deletions(-) diff --git a/osaca/parser/directive.py b/osaca/parser/directive.py index e68be74..286e7d8 100644 --- a/osaca/parser/directive.py +++ b/osaca/parser/directive.py @@ -4,48 +4,31 @@ from osaca.parser.operand import Operand class DirectiveOperand(Operand): - def __init__(self, name=None, parameter_id=None, comment_id=None): + def __init__(self, name=None, parameter_id=None): super().__init__(name) self._parameter_id = parameter_id - self._comment_id = comment_id @property def parameters(self): return self._parameter_id - @property - def comment(self): - return self._comment_id - - def __iter__(self): - return self - - def __next__(self): - if not self._comment_id: - raise StopIteration - return self._comment_id.pop(0) - @parameters.setter def parameters(self, parameters): self._parameter_id = parameters - @comment.setter - def comment(self, comment): - self._comment_id = comment def __eq__(self, other): if isinstance(other, DirectiveOperand): return ( self._name == other._name and self._parameter_id == other._parameter_id - and self._comment_id == other._comment_id ) elif isinstance(other, dict): return self._name == other["name"] and self._parameter_id == other["parameters"] return False def __str__(self): - return f"Directive(name={self._name}, parameters={self._parameter_id}, comment={self._comment_id})" + return f"Directive(name={self._name}, parameters={self._parameter_id})" def __repr__(self): return self.__str__() diff --git a/osaca/parser/label.py b/osaca/parser/label.py index bfc9716..4cafc57 100644 --- a/osaca/parser/label.py +++ b/osaca/parser/label.py @@ -4,28 +4,11 @@ from osaca.parser.operand import Operand class LabelOperand(Operand): - def __init__(self, name=None, comment_id=None): + def __init__(self, name=None): super().__init__(name) - self._comment_id = comment_id - - @property - def comment(self): - return self._comment_id - - @comment.setter - def comment(self, comment): - self._comment_id = comment - - def __iter__(self): - return self - - def __next__(self): - if not self._comment_id: - raise StopIteration - return self._comment_id.pop(0) def __str__(self): - return f"Label(name={self._name}, comment={self._comment_id})" + return f"Label(name={self._name}" def __repr__(self): return self.__str__() diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index 35cc85d..087f639 100644 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -3,7 +3,7 @@ from copy import deepcopy import pyparsing as pp from osaca.parser import BaseParser -from osaca.parser.instruction_form import instructionForm +from osaca.parser.instruction_form import InstructionForm from osaca.parser.operand import Operand from osaca.parser.directive import DirectiveOperand from osaca.parser.memory import MemoryOperand @@ -12,6 +12,7 @@ from osaca.parser.register import RegisterOperand from osaca.parser.identifier import IdentifierOperand from osaca.parser.immediate import ImmediateOperand from osaca.parser.condition import ConditionOperand +from osaca.parser.flag import FlagOperand class ParserAArch64(BaseParser): @@ -51,7 +52,7 @@ class ParserAArch64(BaseParser): pp.Suppress(pp.Literal("+")) + (hex_number | decimal_number).setResultsName("offset") ) - ).setResultsName(self.IDENTIFIER_ID) + ).setResultsName(self.identifier_id) # Label self.label = pp.Group( identifier.setResultsName("name") + pp.Literal(":") + pp.Optional(self.comment) @@ -105,7 +106,7 @@ class ParserAArch64(BaseParser): pp.Optional(pp.Literal(symbol_immediate)) + (hex_number ^ decimal_number ^ float_ ^ double_) | (pp.Optional(pp.Literal(symbol_immediate)) + identifier) - ).setResultsName(self.IMMEDIATE_ID) + ).setResultsName(self.immediate_id) shift_op = ( pp.CaselessLiteral("lsl") ^ pp.CaselessLiteral("lsr") @@ -121,7 +122,7 @@ class ParserAArch64(BaseParser): + pp.Suppress(pp.Literal(",")) + shift_op.setResultsName("shift_op") + pp.Optional(immediate).setResultsName("shift") - ).setResultsName(self.IMMEDIATE_ID) + ).setResultsName(self.immediate_id) # Register: # scalar: [XWBHSDQ][0-9]{1,2} | vector: [VZ][0-9]{1,2}(\.[12468]{1,2}[BHSD])? # | predicate: P[0-9]{1,2}(/[ZM])? @@ -179,7 +180,7 @@ class ParserAArch64(BaseParser): + shift_op.setResultsName("shift_op") + pp.Optional(immediate).setResultsName("shift") ) - ).setResultsName(self.REGISTER_ID) + ).setResultsName(self.register_id) self.register = register # Memory register_index = register.setResultsName("index") + pp.Optional( @@ -195,7 +196,7 @@ class ParserAArch64(BaseParser): pp.Literal("!").setResultsName("pre_indexed") | (pp.Suppress(pp.Literal(",")) + immediate.setResultsName("post_indexed")) ) - ).setResultsName(self.MEMORY_ID) + ).setResultsName(self.memory_id) prefetch_op = pp.Group( pp.Group(pp.CaselessLiteral("PLD") ^ pp.CaselessLiteral("PST")).setResultsName("type") + pp.Group( @@ -261,8 +262,8 @@ class ParserAArch64(BaseParser): :type line_number: int, optional :return: `dict` -- parsed asm line (comment, label, directive or instruction form) """ - instruction_form = instructionForm( - instruction_id=None, + instruction_form = InstructionForm( + mnemonic=None, operands_id=[], directive_id=None, comment_id=None, @@ -289,24 +290,26 @@ class ParserAArch64(BaseParser): # 2. Parse label if result is None: try: + # returns tuple with label operand and comment, if any result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) - instruction_form.label = result.name - if result.comment is not None: - instruction_form.comment = " ".join(result.comment) + instruction_form.label = result[0].name + if result[1] is not None: + instruction_form.comment = " ".join(result[1]) except pp.ParseException: pass # 3. Parse directive if result is None: try: + # returns directive with label operand and comment, if any result = self.process_operand( self.directive.parseString(line, parseAll=True).asDict() ) instruction_form.directive = DirectiveOperand( - name=result.name, parameter_id=result.parameters + name=result[0].name, parameter_id=result[0].parameters ) - if result.comment is not None: - instruction_form.comment = " ".join(result.comment) + if result[1] is not None: + instruction_form.comment = " ".join(result[1]) except pp.ParseException: pass @@ -318,7 +321,7 @@ class ParserAArch64(BaseParser): raise ValueError( "Unable to parse {!r} on line {}".format(line, line_number) ) from e - instruction_form.instruction = result.instruction + instruction_form.mnemonic = result.mnemonic instruction_form.operands = result.operands instruction_form.comment = result.comment return instruction_form @@ -353,8 +356,8 @@ class ParserAArch64(BaseParser): if "operand5" in result: operand = self.process_operand(result["operand5"]) operands.extend(operand) if isinstance(operand, list) else operands.append(operand) - return_dict = instructionForm( - instruction_id=result["mnemonic"], + return_dict = InstructionForm( + mnemonic=result["mnemonic"], operands_id=operands, comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None, ) @@ -363,41 +366,40 @@ class ParserAArch64(BaseParser): def process_operand(self, operand): """Post-process operand""" # structure memory addresses - if self.MEMORY_ID in operand: - return self.process_memory_address(operand[self.MEMORY_ID]) + if self.memory_id in operand: + return self.process_memory_address(operand[self.memory_id]) # structure register lists - if self.REGISTER_ID in operand and ( - "list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID] + if self.register_id in operand and ( + "list" in operand[self.register_id] or "range" in operand[self.register_id] ): # resolve ranges and lists - return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID])) - if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"].lower() == "sp": - return self.process_sp_register(operand[self.REGISTER_ID]) + return self.resolve_range_list(self.process_register_list(operand[self.register_id])) + if self.register_id in operand and operand[self.register_id]["name"].lower() == "sp": + return self.process_sp_register(operand[self.register_id]) # add value attribute to floating point immediates without exponent - if self.IMMEDIATE_ID in operand: - return self.process_immediate(operand[self.IMMEDIATE_ID]) + if self.immediate_id in operand: + return self.process_immediate(operand[self.immediate_id]) if self.label_id in operand: return self.process_label(operand[self.label_id]) - if self.IDENTIFIER_ID in operand: - return self.process_identifier(operand[self.IDENTIFIER_ID]) - if self.REGISTER_ID in operand: - return self.process_register_operand(operand[self.REGISTER_ID]) + if self.identifier_id in operand: + return self.process_identifier(operand[self.identifier_id]) + if self.register_id in operand: + return self.process_register_operand(operand[self.register_id]) if self.directive_id in operand: return self.process_directive_operand(operand[self.directive_id]) - if self.CONDITION_ID in operand: - return self.process_condition(operand[self.CONDITION_ID]) + if self.condition_id in operand: + return self.process_condition(operand[self.condition_id]) return operand def process_directive_operand(self, operand): return DirectiveOperand( name=operand["name"], parameter_id=operand["parameters"], - comment_id=operand["comment"] if "comment" in operand else None, - ) + ), operand["comment"] if "comment" in operand else None def process_register_operand(self, operand): return RegisterOperand( - prefix_id=operand["prefix"], + prefix=operand["prefix"], name=operand["name"], shape=operand["shape"] if "shape" in operand else None, lanes=operand["lanes"] if "lanes" in operand else None, @@ -434,15 +436,15 @@ class ParserAArch64(BaseParser): if index is not None: index = RegisterOperand( name=index["name"], - prefix_id=index["prefix"] if "prefix" in index else None, + prefix=index["prefix"] if "prefix" in index else None, shift=index["shift"] if "shift" in index else None, shift_op=index["shift_op"] if "shift_op" in index else None, ) new_dict = MemoryOperand( - offset_ID=offset, - base_id=RegisterOperand(name=base["name"], prefix_id=base["prefix"]), - index_id=index, - scale_id=scale, + offset=offset, + base=RegisterOperand(name=base["name"], prefix=base["prefix"]), + index=index, + scale=scale, ) if "pre_indexed" in memory_address: new_dict.pre_indexed = True @@ -455,7 +457,7 @@ class ParserAArch64(BaseParser): def process_sp_register(self, register): """Post-process stack pointer register""" - return RegisterOperand(prefix_id="x", name="sp") + return RegisterOperand(prefix="x", name="sp") def process_condition(self, condition): return ConditionOperand(ccode=condition.upper()) @@ -510,7 +512,7 @@ class ParserAArch64(BaseParser): rlist.append(self.list_element.parseString(r, parseAll=True).asDict()) index = register_list.get("index", None) new_dict = {dict_name: rlist, "index": index} - return {self.REGISTER_ID: new_dict} + return {self.register_id: new_dict} def process_immediate(self, immediate): """Post-process immediate operand""" @@ -555,10 +557,7 @@ class ParserAArch64(BaseParser): def process_label(self, label): """Post-process label asm line""" # remove duplicated 'name' level due to identifier - return LabelOperand( - name=label["name"]["name"], - comment_id=label["comment"] if self.comment_id in label else None, - ) + return LabelOperand(name=label["name"]["name"]), label["comment"] if self.comment_id in label else None def process_identifier(self, identifier): """Post-process identifier operand""" @@ -622,14 +621,7 @@ class ParserAArch64(BaseParser): """Check if ``flag_a`` is dependent on ``flag_b``""" # we assume flags are independent of each other, e.g., CF can be read while ZF gets written # TODO validate this assumption - if isinstance(flag_a, Operand): - return flag_a.name == flag_b["name"] - else: - return flag_a["name"] == flag_b["name"] - - if flag_a.name == flag_b["name"]: - return True - return False + return flag_a.name == flag_b.name def is_reg_dependend_of(self, reg_a, reg_b): """Check if ``reg_a`` is dependent on ``reg_b``""" diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 7ca2dec..c3f0f0b 100644 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -6,7 +6,7 @@ import re import pyparsing as pp from osaca.parser import BaseParser -from osaca.parser.instruction_form import instructionForm +from osaca.parser.instruction_form import InstructionForm from osaca.parser.operand import Operand from osaca.parser.directive import DirectiveOperand from osaca.parser.memory import MemoryOperand @@ -14,6 +14,7 @@ from osaca.parser.label import LabelOperand from osaca.parser.register import RegisterOperand from osaca.parser.identifier import IdentifierOperand from osaca.parser.immediate import ImmediateOperand +from osaca.parser.flag import FlagOperand class ParserX86ATT(BaseParser): @@ -89,16 +90,16 @@ class ParserX86ATT(BaseParser): + pp.Suppress(pp.Literal("}")) ) ) - ).setResultsName(self.REGISTER_ID) + ).setResultsName(self.register_id) # Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?') symbol_immediate = "$" immediate = pp.Group( pp.Literal(symbol_immediate) + (hex_number | decimal_number | identifier) - ).setResultsName(self.IMMEDIATE_ID) + ).setResultsName(self.immediate_id) # Memory preparations offset = pp.Group(hex_number | decimal_number | identifier).setResultsName( - self.IMMEDIATE_ID + self.immediate_id ) scale = pp.Word("1248", exact=1) # Segment register extension @@ -120,7 +121,7 @@ class ParserX86ATT(BaseParser): pp.Optional(pp.Suppress(pp.Literal("*"))) + self.register.setResultsName("base") + pp.Literal(":") - + segment_extension.setResultsName(self.segment_ext_id) + + segment_extension.setResultsName(self.segment_ext) ) # Memory: offset | seg:seg_ext | offset(base, index, scale){mask} memory_abs = pp.Suppress(pp.Literal("*")) + (offset | self.register).setResultsName( @@ -147,7 +148,7 @@ class ParserX86ATT(BaseParser): | memory_abs | memory_segmentation | (hex_number | pp.Word(pp.nums)).setResultsName("offset") - ).setResultsName(self.MEMORY_ID) + ).setResultsName(self.memory_id) # Directive # parameter can be any quoted string or sequence of characters besides '#' (for comments) @@ -207,7 +208,7 @@ class ParserX86ATT(BaseParser): :type line_number: int, optional :return: ``dict`` -- parsed asm line (comment, label, directive or instruction form) """ - instruction_form = instructionForm(line=line, line_number=line_number) + instruction_form = InstructionForm(line=line, line_number=line_number) result = None # 1. Parse comment @@ -220,26 +221,28 @@ class ParserX86ATT(BaseParser): # 2. Parse label if result is None: try: + # returns tuple with label operand and comment, if any result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) - instruction_form.label = result.name - if result.comment is not None: - instruction_form.comment = " ".join(result.comment) + instruction_form.label = result[0].name + if result[1] is not None: + instruction_form.comment = " ".join(result[1]) except pp.ParseException: pass # 3. Parse directive if result is None: try: + # returns tuple with directive operand and comment, if any result = self.process_operand( self.directive.parseString(line, parseAll=True).asDict() ) instruction_form.directive = DirectiveOperand( - name=result.name, - parameter_id=result.parameters, + name=result[0].name, + parameter_id=result[0].parameters, ) - if result.comment is not None: - instruction_form.comment = " ".join(result.comment) + if result[1] is not None: + instruction_form.comment = " ".join(result[1]) except pp.ParseException: pass @@ -251,7 +254,7 @@ class ParserX86ATT(BaseParser): raise ValueError( "Could not parse instruction on line {}: {!r}".format(line_number, line) ) - instruction_form.instruction = result.instruction + instruction_form.mnemonic = result.mnemonic instruction_form.operands = result.operands instruction_form.comment = result.comment return instruction_form @@ -278,8 +281,8 @@ class ParserX86ATT(BaseParser): # Check fourth operand if "operand4" in result: operands.append(self.process_operand(result["operand4"])) - return_dict = instructionForm( - instruction_id=result["mnemonic"].split(",")[0], + return_dict = InstructionForm( + mnemonic=result["mnemonic"].split(",")[0], operands_id=operands, comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None, ) @@ -289,23 +292,23 @@ class ParserX86ATT(BaseParser): def process_operand(self, operand): """Post-process operand""" # For the moment, only used to structure memory addresses - if self.MEMORY_ID in operand: - return self.process_memory_address(operand[self.MEMORY_ID]) - if self.IMMEDIATE_ID in operand: - return self.process_immediate(operand[self.IMMEDIATE_ID]) + if self.memory_id in operand: + return self.process_memory_address(operand[self.memory_id]) + if self.immediate_id in operand: + return self.process_immediate(operand[self.immediate_id]) if self.label_id in operand: return self.process_label(operand[self.label_id]) if self.directive_id in operand: return self.process_directive(operand[self.directive_id]) - if self.REGISTER_ID in operand: - return self.process_register(operand[self.REGISTER_ID]) - if self.IDENTIFIER_ID in operand: - return self.process_identifier(operand[self.IDENTIFIER_ID]) + if self.register_id in operand: + return self.process_register(operand[self.register_id]) + if self.identifier_id in operand: + return self.process_identifier(operand[self.identifier_id]) return operand def process_register(self, operand): return RegisterOperand( - prefix_id=operand["prefix"] if "prefix" in operand else None, + prefix=operand["prefix"] if "prefix" in operand else None, name=operand["name"], shape=operand["shape"] if "shape" in operand else None, lanes=operand["lanes"] if "lanes" in operand else None, @@ -314,12 +317,8 @@ class ParserX86ATT(BaseParser): ) def process_directive(self, directive): - directive_new = DirectiveOperand(name=directive["name"], parameter_id=[]) - if "parameters" in directive: - directive_new.parameters = directive["parameters"] - if "comment" in directive: - directive_new.comment = directive["comment"] - return directive_new + directive_new = DirectiveOperand(name=directive["name"], parameter_id=directive["parameters"] if "parameters" in directive else []) + return directive_new, directive["comment"] if "comment" in directive else None def process_memory_address(self, memory_address): """Post-process memory address operand""" @@ -339,29 +338,27 @@ class ParserX86ATT(BaseParser): offset = ImmediateOperand(value_id=int(offset["value"], 0)) if base is not None: baseOp = RegisterOperand( - name=base["name"], prefix_id=base["prefix"] if "prefix" in base else None + name=base["name"], prefix=base["prefix"] if "prefix" in base else None ) if index is not None: indexOp = RegisterOperand( - name=index["name"], prefix_id=index["prefix"] if "prefix" in index else None + name=index["name"], prefix=index["prefix"] if "prefix" in index else None ) if isinstance(offset, dict) and "identifier" in offset: offset = IdentifierOperand(name=offset["identifier"]["name"]) new_dict = MemoryOperand( - offset_ID=offset, base_id=baseOp, index_id=indexOp, scale_id=scale + offset=offset, base=baseOp, index=indexOp, scale=scale ) # Add segmentation extension if existing - if self.segment_ext_id in memory_address: - new_dict.segment_ext_id = memory_address[self.segment_ext_id] + if self.segment_ext in memory_address: + new_dict.segment_ext = memory_address[self.segment_ext] return new_dict def process_label(self, label): """Post-process label asm line""" # remove duplicated 'name' level due to identifier label["name"] = label["name"][0]["name"] - return LabelOperand( - name=label["name"], comment_id=label["comment"] if "comment" in label else None - ) + return LabelOperand(name=label["name"]), label["comment"] if "comment" in label else None def process_immediate(self, immediate): """Post-process immediate operand""" @@ -369,7 +366,6 @@ class ParserX86ATT(BaseParser): # actually an identifier, change declaration return self.process_identifier(immediate["identifier"]) # otherwise just make sure the immediate is a decimal - # immediate["value"] = int(immediate["value"], 0) new_immediate = ImmediateOperand(value_id=int(immediate["value"], 0)) return new_immediate @@ -398,13 +394,7 @@ class ParserX86ATT(BaseParser): """Check if ``flag_a`` is dependent on ``flag_b``""" # we assume flags are independent of each other, e.g., CF can be read while ZF gets written # TODO validate this assumption - if isinstance(flag_b, Operand): - return flag_a.name == flag_b.name - else: - return flag_a.name == flag_b["name"] - if flag_a.name == flag_b.name: - return True - return False + return flag_a.name == flag_b.name def is_reg_dependend_of(self, reg_a, reg_b): """Check if ``reg_a`` is dependent on ``reg_b``""" diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py index 95f45ca..b1e0350 100755 --- a/tests/test_parser_AArch64.py +++ b/tests/test_parser_AArch64.py @@ -8,7 +8,7 @@ import unittest from pyparsing import ParseException -from osaca.parser import ParserAArch64, instructionForm +from osaca.parser import ParserAArch64, InstructionForm from osaca.parser.directive import DirectiveOperand from osaca.parser.memory import MemoryOperand from osaca.parser.register import RegisterOperand @@ -38,40 +38,40 @@ class TestParserAArch64(unittest.TestCase): ) def test_label_parser(self): - self.assertEqual(self._get_label(self.parser, "main:").name, "main") - self.assertEqual(self._get_label(self.parser, "..B1.10:").name, "..B1.10") - self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3") - self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1") + self.assertEqual(self._get_label(self.parser, "main:")[0].name, "main") + self.assertEqual(self._get_label(self.parser, "..B1.10:")[0].name, "..B1.10") + self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:")[0].name, ".2.3_2_pack.3") + self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1")[0].name, ".L1") self.assertEqual( - " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), + " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1")[1]), "label1", ) with self.assertRaises(ParseException): self._get_label(self.parser, "\t.cfi_startproc") def test_directive_parser(self): - self.assertEqual(self._get_directive(self.parser, "\t.text").name, "text") - self.assertEqual(len(self._get_directive(self.parser, "\t.text").parameters), 0) - self.assertEqual(self._get_directive(self.parser, "\t.align\t16,0x90").name, "align") - self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2) + self.assertEqual(self._get_directive(self.parser, "\t.text")[0].name, "text") + self.assertEqual(len(self._get_directive(self.parser, "\t.text")[0].parameters), 0) + self.assertEqual(self._get_directive(self.parser, "\t.align\t16,0x90")[0].name, "align") + self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90")[0].parameters), 2) self.assertEqual( - self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90" + self._get_directive(self.parser, "\t.align\t16,0x90")[0].parameters[1], "0x90" ) self.assertEqual( - self._get_directive(self.parser, " .byte 100,103,144 //IACA START").name, + self._get_directive(self.parser, " .byte 100,103,144 //IACA START")[0].name, "byte", ) self.assertEqual( self._get_directive( self.parser, " .byte 100,103,144 //IACA START" - ).parameters[2], + )[0].parameters[2], "144", ) self.assertEqual( " ".join( self._get_directive( self.parser, " .byte 100,103,144 //IACA START" - ).comment + )[1] ), "IACA START", ) @@ -105,25 +105,25 @@ class TestParserAArch64(unittest.TestCase): parsed_8 = self.parser.parse_instruction(instr8) parsed_9 = self.parser.parse_instruction(instr9) - self.assertEqual(parsed_1.instruction, "vcvt.F32.S32") + self.assertEqual(parsed_1.mnemonic, "vcvt.F32.S32") self.assertEqual(parsed_1.operands[0].name, "1") self.assertEqual(parsed_1.operands[0].prefix, "w") self.assertEqual(parsed_1.operands[1].name, "2") self.assertEqual(parsed_1.operands[1].prefix, "w") self.assertEqual(parsed_1.comment, "12.27") - self.assertEqual(parsed_2.instruction, "b.lo") + self.assertEqual(parsed_2.mnemonic, "b.lo") self.assertEqual(parsed_2.operands[0].name, "..B1.4") self.assertEqual(len(parsed_2.operands), 1) self.assertIsNone(parsed_2.comment) - self.assertEqual(parsed_3.instruction, "mov") + self.assertEqual(parsed_3.mnemonic, "mov") self.assertEqual(parsed_3.operands[0].name, "2") self.assertEqual(parsed_3.operands[0].prefix, "x") self.assertEqual(parsed_3.operands[1].value, int("0x222", 0)) self.assertEqual(parsed_3.comment, "NOT IACA END") - self.assertEqual(parsed_4.instruction, "str") + self.assertEqual(parsed_4.mnemonic, "str") self.assertIsNone(parsed_4.operands[1].offset) self.assertEqual(parsed_4.operands[1].base.name, "sp") self.assertEqual(parsed_4.operands[1].base.prefix, "x") @@ -134,7 +134,7 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(parsed_4.operands[0].prefix, "x") self.assertEqual(parsed_4.comment, "12.9") - self.assertEqual(parsed_5.instruction, "ldr") + self.assertEqual(parsed_5.mnemonic, "ldr") self.assertEqual(parsed_5.operands[0].name, "0") self.assertEqual(parsed_5.operands[0].prefix, "x") self.assertEqual(parsed_5.operands[1].offset.name, "q2c") @@ -144,20 +144,20 @@ class TestParserAArch64(unittest.TestCase): self.assertIsNone(parsed_5.operands[1].index) self.assertEqual(parsed_5.operands[1].scale, 1) - self.assertEqual(parsed_6.instruction, "adrp") + self.assertEqual(parsed_6.mnemonic, "adrp") self.assertEqual(parsed_6.operands[0].name, "0") self.assertEqual(parsed_6.operands[0].prefix, "x") self.assertEqual(parsed_6.operands[1].relocation, ":got:") self.assertEqual(parsed_6.operands[1].name, "visited") - self.assertEqual(parsed_7.instruction, "fadd") + self.assertEqual(parsed_7.mnemonic, "fadd") self.assertEqual(parsed_7.operands[0].name, "17") self.assertEqual(parsed_7.operands[0].prefix, "v") self.assertEqual(parsed_7.operands[0].lanes, "2") self.assertEqual(parsed_7.operands[0].shape, "d") self.assertEqual(self.parser.get_full_reg_name(parsed_7.operands[2]), "v1.2d") - self.assertEqual(parsed_8.instruction, "mov.d") + self.assertEqual(parsed_8.mnemonic, "mov.d") self.assertEqual(parsed_8.operands[0].name, "0") self.assertEqual(parsed_8.operands[0].prefix, "x") self.assertEqual(parsed_8.operands[1].name, "16") @@ -165,7 +165,7 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(parsed_8.operands[1].index, "1") self.assertEqual(self.parser.get_full_reg_name(parsed_8.operands[1]), "v16.d[1]") - self.assertEqual(parsed_9.instruction, "ccmp") + self.assertEqual(parsed_9.mnemonic, "ccmp") self.assertEqual(parsed_9.operands[0].name, "0") self.assertEqual(parsed_9.operands[0].prefix, "x") self.assertEqual(parsed_9.operands[3].ccode, "CC") @@ -181,8 +181,8 @@ class TestParserAArch64(unittest.TestCase): line_5_operands = "fcmla z26.d, p0/m, z29.d, z21.d, #90" line_conditions = "ccmn x11, #1, #3, eq" - instruction_form_1 = instructionForm( - instruction_id=None, + instruction_form_1 = InstructionForm( + mnemonic=None, operands_id=[], directive_id=None, comment_id="-- Begin main", @@ -191,8 +191,8 @@ class TestParserAArch64(unittest.TestCase): line_number=1, ) - instruction_form_2 = instructionForm( - instruction_id=None, + instruction_form_2 = InstructionForm( + mnemonic=None, operands_id=[], directive_id=None, comment_id="=>This Inner Loop Header: Depth=1", @@ -200,8 +200,8 @@ class TestParserAArch64(unittest.TestCase): line=".LBB0_1: // =>This Inner Loop Header: Depth=1", line_number=2, ) - instruction_form_3 = instructionForm( - instruction_id=None, + instruction_form_3 = InstructionForm( + mnemonic=None, operands_id=[], directive_id=DirectiveOperand(name="cfi_def_cfa", parameter_id=["w29", "-16"]), comment_id=None, @@ -209,17 +209,17 @@ class TestParserAArch64(unittest.TestCase): line=".cfi_def_cfa w29, -16", line_number=3, ) - instruction_form_4 = instructionForm( - instruction_id="ldr", + instruction_form_4 = InstructionForm( + mnemonic="ldr", operands_id=[ - RegisterOperand(prefix_id="s", name="0"), + RegisterOperand(prefix="s", name="0"), MemoryOperand( - offset_ID=None, - base_id=RegisterOperand(prefix_id="x", name="11"), - index_id=RegisterOperand( - prefix_id="w", name="10", shift_op="sxtw", shift=[{"value": "2"}] + offset=None, + base=RegisterOperand(prefix="x", name="11"), + index=RegisterOperand( + prefix="w", name="10", shift_op="sxtw", shift=[{"value": "2"}] ), - scale_id=4, + scale=4, ), ], directive_id=None, @@ -228,15 +228,15 @@ class TestParserAArch64(unittest.TestCase): line="ldr s0, [x11, w10, sxtw #2] // = <<2", line_number=4, ) - instruction_form_5 = instructionForm( - instruction_id="prfm", + instruction_form_5 = InstructionForm( + mnemonic="prfm", operands_id=[ {"prfop": {"type": ["PLD"], "target": ["L1"], "policy": ["KEEP"]}}, MemoryOperand( - offset_ID=ImmediateOperand(value_id=2048), - base_id=RegisterOperand(prefix_id="x", name="26"), - index_id=None, - scale_id=1, + offset=ImmediateOperand(value_id=2048), + base=RegisterOperand(prefix="x", name="26"), + index=None, + scale=1, ), ], directive_id=None, @@ -245,16 +245,16 @@ class TestParserAArch64(unittest.TestCase): line="prfm pldl1keep, [x26, #2048] //HPL", line_number=5, ) - instruction_form_6 = instructionForm( - instruction_id="stp", + instruction_form_6 = InstructionForm( + mnemonic="stp", operands_id=[ - RegisterOperand(prefix_id="x", name="29"), - RegisterOperand(prefix_id="x", name="30"), + RegisterOperand(prefix="x", name="29"), + RegisterOperand(prefix="x", name="30"), MemoryOperand( - offset_ID=ImmediateOperand(value_id=-16), - base_id=RegisterOperand(name="sp", prefix_id="x"), - index_id=None, - scale_id=1, + offset=ImmediateOperand(value_id=-16), + base=RegisterOperand(name="sp", prefix="x"), + index=None, + scale=1, pre_indexed=True, ), ], @@ -264,16 +264,16 @@ class TestParserAArch64(unittest.TestCase): line="stp x29, x30, [sp, #-16]!", line_number=6, ) - instruction_form_7 = instructionForm( - instruction_id="ldp", + instruction_form_7 = InstructionForm( + mnemonic="ldp", operands_id=[ - RegisterOperand(prefix_id="q", name="2"), - RegisterOperand(prefix_id="q", name="3"), + RegisterOperand(prefix="q", name="2"), + RegisterOperand(prefix="q", name="3"), MemoryOperand( - offset_ID=None, - base_id=RegisterOperand(name="11", prefix_id="x"), - index_id=None, - scale_id=1, + offset=None, + base=RegisterOperand(name="11", prefix="x"), + index=None, + scale=1, post_indexed={"value": 64}, ), ], @@ -283,13 +283,13 @@ class TestParserAArch64(unittest.TestCase): line="ldp q2, q3, [x11], #64", line_number=7, ) - instruction_form_8 = instructionForm( - instruction_id="fcmla", + instruction_form_8 = InstructionForm( + mnemonic="fcmla", operands_id=[ - RegisterOperand(prefix_id="z", name="26", shape="d"), - RegisterOperand(prefix_id="p", name="0", predication="m"), - RegisterOperand(prefix_id="z", name="29", shape="d"), - RegisterOperand(prefix_id="z", name="21", shape="d"), + RegisterOperand(prefix="z", name="26", shape="d"), + RegisterOperand(prefix="p", name="0", predication="m"), + RegisterOperand(prefix="z", name="29", shape="d"), + RegisterOperand(prefix="z", name="21", shape="d"), ImmediateOperand(value_id=90, type_id="int"), ], directive_id=None, @@ -298,10 +298,10 @@ class TestParserAArch64(unittest.TestCase): line="fcmla z26.d, p0/m, z29.d, z21.d, #90", line_number=8, ) - instruction_form_9 = instructionForm( - instruction_id="ccmn", + instruction_form_9 = InstructionForm( + mnemonic="ccmn", operands_id=[ - RegisterOperand(prefix_id="x", name="11"), + RegisterOperand(prefix="x", name="11"), ImmediateOperand(value_id=1, type_id="int"), ImmediateOperand(value_id=3, type_id="int"), {"condition": "EQ"}, @@ -376,17 +376,17 @@ class TestParserAArch64(unittest.TestCase): instr_list_with_index = "ld4 {v0.S, v1.S, v2.S, v3.S}[2]" instr_range_single = "dummy { z1.d }" reg_list = [ - RegisterOperand(prefix_id="x", name="5"), - RegisterOperand(prefix_id="x", name="6"), - RegisterOperand(prefix_id="x", name="7"), + RegisterOperand(prefix="x", name="5"), + RegisterOperand(prefix="x", name="6"), + RegisterOperand(prefix="x", name="7"), ] reg_list_idx = [ - RegisterOperand(prefix_id="v", name="0", shape="S", index=2), - RegisterOperand(prefix_id="v", name="1", shape="S", index=2), - RegisterOperand(prefix_id="v", name="2", shape="S", index=2), - RegisterOperand(prefix_id="v", name="3", shape="S", index=2), + RegisterOperand(prefix="v", name="0", shape="S", index=2), + RegisterOperand(prefix="v", name="1", shape="S", index=2), + RegisterOperand(prefix="v", name="2", shape="S", index=2), + RegisterOperand(prefix="v", name="3", shape="S", index=2), ] - reg_list_single = [RegisterOperand(prefix_id="z", name="1", shape="d")] + reg_list_single = [RegisterOperand(prefix="z", name="1", shape="d")] prange = self.parser.parse_line(instr_range) plist = self.parser.parse_line(instr_list) @@ -401,22 +401,22 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(p_single.operands, reg_list_single) def test_reg_dependency(self): - reg_1_1 = RegisterOperand(prefix_id="b", name="1") - reg_1_2 = RegisterOperand(prefix_id="h", name="1") - reg_1_3 = RegisterOperand(prefix_id="s", name="1") - reg_1_4 = RegisterOperand(prefix_id="d", name="1") - reg_1_4 = RegisterOperand(prefix_id="q", name="1") - reg_2_1 = RegisterOperand(prefix_id="w", name="2") - reg_2_2 = RegisterOperand(prefix_id="x", name="2") - reg_v1_1 = RegisterOperand(prefix_id="v", name="11", lanes="16", shape="b") - reg_v1_2 = RegisterOperand(prefix_id="v", name="11", lanes="8", shape="h") - reg_v1_3 = RegisterOperand(prefix_id="v", name="11", lanes="4", shape="s") - reg_v1_4 = RegisterOperand(prefix_id="v", name="11", lanes="2", shape="d") + reg_1_1 = RegisterOperand(prefix="b", name="1") + reg_1_2 = RegisterOperand(prefix="h", name="1") + reg_1_3 = RegisterOperand(prefix="s", name="1") + reg_1_4 = RegisterOperand(prefix="d", name="1") + reg_1_4 = RegisterOperand(prefix="q", name="1") + reg_2_1 = RegisterOperand(prefix="w", name="2") + reg_2_2 = RegisterOperand(prefix="x", name="2") + reg_v1_1 = RegisterOperand(prefix="v", name="11", lanes="16", shape="b") + reg_v1_2 = RegisterOperand(prefix="v", name="11", lanes="8", shape="h") + reg_v1_3 = RegisterOperand(prefix="v", name="11", lanes="4", shape="s") + reg_v1_4 = RegisterOperand(prefix="v", name="11", lanes="2", shape="d") - reg_b5 = RegisterOperand(prefix_id="b", name="5") - reg_q15 = RegisterOperand(prefix_id="q", name="15") - reg_v10 = RegisterOperand(prefix_id="v", name="10", lanes="2", shape="s") - reg_v20 = RegisterOperand(prefix_id="v", name="20", lanes="2", shape="d") + reg_b5 = RegisterOperand(prefix="b", name="5") + reg_q15 = RegisterOperand(prefix="q", name="15") + reg_v10 = RegisterOperand(prefix="v", name="10", lanes="2", shape="s") + reg_v20 = RegisterOperand(prefix="v", name="20", lanes="2", shape="d") reg_1 = [reg_1_1, reg_1_2, reg_1_3, reg_1_4] reg_2 = [reg_2_1, reg_2_2] diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index de04513..005ae5b 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -8,7 +8,7 @@ import unittest from pyparsing import ParseException -from osaca.parser import ParserX86ATT, instructionForm +from osaca.parser import ParserX86ATT, InstructionForm from osaca.parser.register import RegisterOperand from osaca.parser.immediate import ImmediateOperand @@ -33,40 +33,40 @@ class TestParserX86ATT(unittest.TestCase): ) def test_label_parser(self): - self.assertEqual(self._get_label(self.parser, "main:").name, "main") - self.assertEqual(self._get_label(self.parser, "..B1.10:").name, "..B1.10") - self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3") - self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1") + self.assertEqual(self._get_label(self.parser, "main:")[0].name, "main") + self.assertEqual(self._get_label(self.parser, "..B1.10:")[0].name, "..B1.10") + self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:")[0].name, ".2.3_2_pack.3") + self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1")[0].name, ".L1") self.assertEqual( - " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), + " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1")[1]), "label1", ) with self.assertRaises(ParseException): self._get_label(self.parser, "\t.cfi_startproc") def test_directive_parser(self): - self.assertEqual(self._get_directive(self.parser, "\t.text").name, "text") - self.assertEqual(len(self._get_directive(self.parser, "\t.text").parameters), 0) - self.assertEqual(self._get_directive(self.parser, "\t.align\t16,0x90").name, "align") - self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2) - self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0) + self.assertEqual(self._get_directive(self.parser, "\t.text")[0].name, "text") + self.assertEqual(len(self._get_directive(self.parser, "\t.text")[0].parameters), 0) + self.assertEqual(self._get_directive(self.parser, "\t.align\t16,0x90")[0].name, "align") + self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90")[0].parameters), 2) + self.assertEqual(len(self._get_directive(self.parser, ".text")[0].parameters), 0) self.assertEqual( - len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), + len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"')[0].parameters), 2, ) self.assertEqual( - self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1], + self._get_directive(self.parser, '.file\t1 "path/to/file.c"')[0].parameters[1], '"path/to/file.c"', ) self.assertEqual( - self._get_directive(self.parser, "\t.set\tL$set$0,LECIE1-LSCIE1").parameters, + self._get_directive(self.parser, "\t.set\tL$set$0,LECIE1-LSCIE1")[0].parameters, ["L$set$0", "LECIE1-LSCIE1"], ) self.assertEqual( self._get_directive( self.parser, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support", - ).parameters, + )[0].parameters, [ "__TEXT", "__eh_frame", @@ -77,27 +77,27 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual( self._get_directive( self.parser, "\t.section\t__TEXT,__literal16,16byte_literals" - ).parameters, + )[0].parameters, ["__TEXT", "__literal16", "16byte_literals"], ) self.assertEqual( - self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90" + self._get_directive(self.parser, "\t.align\t16,0x90")[0].parameters[1], "0x90" ) self.assertEqual( - self._get_directive(self.parser, " .byte 100,103,144 #IACA START").name, + self._get_directive(self.parser, " .byte 100,103,144 #IACA START")[0].name, "byte", ) self.assertEqual( self._get_directive( self.parser, " .byte 100,103,144 #IACA START" - ).parameters[2], + )[0].parameters[2], "144", ) self.assertEqual( " ".join( self._get_directive( self.parser, " .byte 100,103,144 #IACA START" - ).comment + )[1] ), "IACA START", ) @@ -119,21 +119,21 @@ class TestParserX86ATT(unittest.TestCase): parsed_6 = self.parser.parse_instruction(instr6) parsed_7 = self.parser.parse_instruction(instr7) - self.assertEqual(parsed_1.instruction, "vcvtsi2ss") + self.assertEqual(parsed_1.mnemonic, "vcvtsi2ss") self.assertEqual(parsed_1.operands[0].name, "edx") self.assertEqual(parsed_1.operands[1].name, "xmm2") self.assertEqual(parsed_1.comment, "12.27") - self.assertEqual(parsed_2.instruction, "jb") + self.assertEqual(parsed_2.mnemonic, "jb") self.assertEqual(parsed_2.operands[0].name, "..B1.4") self.assertEqual(len(parsed_2.operands), 1) self.assertIsNone(parsed_2.comment) - self.assertEqual(parsed_3.instruction, "movl") + self.assertEqual(parsed_3.mnemonic, "movl") self.assertEqual(parsed_3.operands[0].value, 222) self.assertEqual(parsed_3.operands[1].name, "ebx") self.assertEqual(parsed_3.comment, "IACA END") - self.assertEqual(parsed_4.instruction, "vmovss") + self.assertEqual(parsed_4.mnemonic, "vmovss") self.assertEqual(parsed_4.operands[1].offset.value, -4) self.assertEqual(parsed_4.operands[1].base.name, "rsp") self.assertEqual(parsed_4.operands[1].index.name, "rax") @@ -141,14 +141,14 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(parsed_4.operands[0].name, "xmm4") self.assertEqual(parsed_4.comment, "12.9") - self.assertEqual(parsed_5.instruction, "mov") + self.assertEqual(parsed_5.mnemonic, "mov") self.assertEqual(parsed_5.operands[1].offset.name, "var") self.assertIsNone(parsed_5.operands[1].base) self.assertIsNone(parsed_5.operands[1].index) self.assertEqual(parsed_5.operands[1].scale, 1) self.assertEqual(parsed_5.operands[0].name, "ebx") - self.assertEqual(parsed_6.instruction, "lea") + self.assertEqual(parsed_6.mnemonic, "lea") self.assertIsNone(parsed_6.operands[0].offset) self.assertIsNone(parsed_6.operands[0].base) self.assertEqual(parsed_6.operands[0].index.name, "rax") @@ -166,8 +166,8 @@ class TestParserX86ATT(unittest.TestCase): line_directive = ".quad .2.3_2__kmpc_loc_pack.2 #qed" line_instruction = "lea 2(%rax,%rax), %ecx #12.9" - instruction_form_1 = instructionForm( - instruction_id=None, + instruction_form_1 = InstructionForm( + mnemonic=None, operands_id=[], directive_id=None, comment_id="-- Begin main", @@ -175,8 +175,8 @@ class TestParserX86ATT(unittest.TestCase): line="# -- Begin main", line_number=1, ) - instruction_form_2 = instructionForm( - instruction_id=None, + instruction_form_2 = InstructionForm( + mnemonic=None, operands_id=[], directive_id=None, comment_id="Preds ..B1.6", @@ -184,8 +184,8 @@ class TestParserX86ATT(unittest.TestCase): line="..B1.7: # Preds ..B1.6", line_number=2, ) - instruction_form_3 = instructionForm( - instruction_id=None, + instruction_form_3 = InstructionForm( + mnemonic=None, operands_id=[], directive_id={"name": "quad", "parameters": [".2.3_2__kmpc_loc_pack.2"]}, comment_id="qed", @@ -193,8 +193,8 @@ class TestParserX86ATT(unittest.TestCase): line=".quad .2.3_2__kmpc_loc_pack.2 #qed", line_number=3, ) - instruction_form_4 = instructionForm( - instruction_id="lea", + instruction_form_4 = InstructionForm( + mnemonic="lea", operands_id=[ { "memory": {