From d553998b902aa7b86a845928f1e6719b96534ea1 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Tue, 4 Jun 2019 12:55:32 +0200 Subject: [PATCH] added AttrDict to parser and refactoring --- osaca/parser/__init__.py | 3 +- osaca/parser/attr_dict.py | 15 ++++ osaca/parser/base_parser.py | 2 + osaca/parser/parser_AArch64v81.py | 139 +++++++++++++++++------------- osaca/parser/parser_x86att.py | 71 ++++++++------- tests/test_analyzer.py | 8 +- tests/test_parser_AArch64v81.py | 137 +++++++++++++++-------------- tests/test_parser_x86att.py | 112 ++++++++++++------------ 8 files changed, 265 insertions(+), 222 deletions(-) create mode 100755 osaca/parser/attr_dict.py diff --git a/osaca/parser/__init__.py b/osaca/parser/__init__.py index a5aa87c..5437530 100644 --- a/osaca/parser/__init__.py +++ b/osaca/parser/__init__.py @@ -3,7 +3,8 @@ Collection of parsers supported by OSACA. Only the parser below will be exported, so please add new parsers to __all__. """ +from .attr_dict import AttrDict from .parser_x86att import ParserX86ATT from .parser_AArch64v81 import ParserAArch64v81 -__all__ = ['ParserX86ATT', 'ParserAArch64v81'] +__all__ = ['AttrDict', 'ParserX86ATT', 'ParserAArch64v81'] diff --git a/osaca/parser/attr_dict.py b/osaca/parser/attr_dict.py new file mode 100755 index 0000000..0230982 --- /dev/null +++ b/osaca/parser/attr_dict.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + @staticmethod + def convert_dict(dictionary): + for key in list(dictionary.keys()): + entry = dictionary[key] + if isinstance(entry, type(dict())) or isinstance(entry, type(AttrDict())): + dictionary[key] = AttrDict.convert_dict(dictionary[key]) + return AttrDict(dictionary) diff --git a/osaca/parser/base_parser.py b/osaca/parser/base_parser.py index 9c82aa7..3982ba8 100755 --- a/osaca/parser/base_parser.py +++ b/osaca/parser/base_parser.py @@ -9,6 +9,8 @@ class BaseParser(object): LABEL_ID = 'label' MEMORY_ID = 'memory' REGISTER_ID = 'register' + INSTRUCTION_ID = 'instruction' + OPERANDS_ID = 'operands' def __init__(self): self.construct_parser() diff --git a/osaca/parser/parser_AArch64v81.py b/osaca/parser/parser_AArch64v81.py index fff1819..09d3720 100755 --- a/osaca/parser/parser_AArch64v81.py +++ b/osaca/parser/parser_AArch64v81.py @@ -3,6 +3,7 @@ import pyparsing as pp +from .attr_dict import AttrDict from .base_parser import BaseParser @@ -136,8 +137,8 @@ class ParserAArch64v81(BaseParser): + pp.Optional(register_index ^ immediate.setResultsName('offset')) + pp.Literal(']') + pp.Optional( - pp.Literal('!').setResultsName('pre-indexed') - | (pp.Suppress(pp.Literal(',')) + immediate.setResultsName('post-indexed')) + pp.Literal('!').setResultsName('pre_indexed') + | (pp.Suppress(pp.Literal(',')) + immediate.setResultsName('post_indexed')) ) ).setResultsName(self.MEMORY_ID) prefetch_op = pp.Group( @@ -174,20 +175,23 @@ class ParserAArch64v81(BaseParser): :param int line_id: default None, identifier of instruction form :return: parsed instruction form """ - instruction_form = { - 'instruction': None, - 'operands': None, - 'directive': None, - 'comment': None, - 'label': None, - 'line_number': line_number, - } + instruction_form = AttrDict( + { + self.INSTRUCTION_ID: None, + self.OPERANDS_ID: None, + self.DIRECTIVE_ID: None, + self.COMMENT_ID: None, + self.LABEL_ID: None, + 'line_number': line_number, + } + ) result = None # 1. Parse comment try: result = self._process_operand(self.comment.parseString(line, parseAll=True).asDict()) - instruction_form['comment'] = ' '.join(result[self.COMMENT_ID]) + result = AttrDict.convert_dict(result) + instruction_form[self.COMMENT_ID] = ' '.join(result[self.COMMENT_ID]) except pp.ParseException: pass @@ -197,9 +201,12 @@ class ParserAArch64v81(BaseParser): result = self._process_operand( self.label.parseString(line, parseAll=True).asDict() ) - instruction_form['label'] = result[self.LABEL_ID]['name'] + result = AttrDict.convert_dict(result) + instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name if self.COMMENT_ID in result[self.LABEL_ID]: - instruction_form['comment'] = ' '.join(result[self.LABEL_ID][self.COMMENT_ID]) + instruction_form[self.COMMENT_ID] = ' '.join( + result[self.LABEL_ID][self.COMMENT_ID] + ) except pp.ParseException: pass @@ -209,12 +216,13 @@ class ParserAArch64v81(BaseParser): result = self._process_operand( self.directive.parseString(line, parseAll=True).asDict() ) - instruction_form['directive'] = { - 'name': result[self.DIRECTIVE_ID]['name'], - 'parameters': result[self.DIRECTIVE_ID]['parameters'], + result = AttrDict.convert_dict(result) + instruction_form[self.DIRECTIVE_ID] = { + 'name': result[self.DIRECTIVE_ID].name, + 'parameters': result[self.DIRECTIVE_ID].parameters, } if self.COMMENT_ID in result[self.DIRECTIVE_ID]: - instruction_form['comment'] = ' '.join( + instruction_form[self.COMMENT_ID] = ' '.join( result[self.DIRECTIVE_ID][self.COMMENT_ID] ) except pp.ParseException: @@ -230,22 +238,23 @@ class ParserAArch64v81(BaseParser): line_number, line ) ) - instruction_form['instruction'] = result['instruction'] - instruction_form['operands'] = result['operands'] - instruction_form['comment'] = result['comment'] + instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID] + instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID] + instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID] return instruction_form def parse_instruction(self, instruction): result = self.instruction_parser.parseString(instruction, parseAll=True).asDict() - operands = {'source': [], 'destination': []} + result = AttrDict.convert_dict(result) + operands = AttrDict({'source': [], 'destination': []}) # ARM specific load store flags is_store = False is_load = False - if result['mnemonic'].lower().startswith('st'): + if result.mnemonic.lower().startswith('st'): # Store instruction --> swap source and destination is_store = True - if result['mnemonic'].lower().startswith('ld'): + if result.mnemonic.lower().startswith('ld'): # Load instruction --> keep in mind for possible multiple loads is_load = True @@ -253,51 +262,55 @@ class ParserAArch64v81(BaseParser): # Check first operand if 'operand1' in result: if is_store: - operands['source'].append(self._process_operand(result['operand1'])) + operands.source.append(self._process_operand(result['operand1'])) else: - operands['destination'].append(self._process_operand(result['operand1'])) + operands.destination.append(self._process_operand(result['operand1'])) # Check second operand if 'operand2' in result: if is_store and 'operand3' not in result or is_load and 'operand3' in result: # destination - operands['destination'].append(self._process_operand(result['operand2'])) + operands.destination.append(self._process_operand(result['operand2'])) else: - operands['source'].append(self._process_operand(result['operand2'])) + operands.source.append(self._process_operand(result['operand2'])) # Check third operand if 'operand3' in result: if is_store and 'operand4' not in result or is_load and 'operand4' in result: - operands['destination'].append(self._process_operand(result['operand3'])) + operands.destination.append(self._process_operand(result['operand3'])) else: - operands['source'].append(self._process_operand(result['operand3'])) + operands.source.append(self._process_operand(result['operand3'])) # Check fourth operand if 'operand4' in result: if is_store: - operands['destination'].append(self._process_operand(result['operand4'])) + operands.destination.append(self._process_operand(result['operand4'])) else: - operands['source'].append(self._process_operand(result['operand4'])) + operands.source.append(self._process_operand(result['operand4'])) - return_dict = { - 'instruction': result['mnemonic'], - 'operands': operands, - 'comment': ' '.join(result['comment']) if 'comment' in result else None, - } + return_dict = AttrDict( + { + self.INSTRUCTION_ID: result.mnemonic, + self.OPERANDS_ID: operands, + self.COMMENT_ID: ' '.join(result[self.COMMENT_ID]) + if self.COMMENT_ID in result + else None, + } + ) return return_dict def _process_operand(self, operand): # structure memory addresses - if 'memory' in operand: - return self.substitute_memory_address(operand['memory']) + if self.MEMORY_ID in operand: + return self.substitute_memory_address(operand[self.MEMORY_ID]) # structure register lists - if 'register' in operand and ( - 'list' in operand['register'] or 'range' in operand['register'] + if self.REGISTER_ID in operand and ( + 'list' in operand[self.REGISTER_ID] or 'range' in operand[self.REGISTER_ID] ): # TODO: discuss if ranges should be converted to lists - return self.substitute_register_list(operand['register']) + return self.substitute_register_list(operand[self.REGISTER_ID]) # add value attribute to floating point immediates without exponent - if 'immediate' in operand: - return self.substitute_immediate(operand['immediate']) - if 'label' in operand: - return self.substitute_label(operand['label']) + if self.IMMEDIATE_ID in operand: + return self.substitute_immediate(operand[self.IMMEDIATE_ID]) + if self.LABEL_ID in operand: + return self.substitute_label(operand[self.LABEL_ID]) return operand def substitute_memory_address(self, memory_address): @@ -311,12 +324,12 @@ class ParserAArch64v81(BaseParser): if 'shift' in memory_address['index']: if memory_address['index']['shift_op'].lower() in valid_shift_ops: scale = str(2 ** int(memory_address['index']['shift']['value'])) - new_dict = {'offset': offset, 'base': base, 'index': index, 'scale': scale} - if 'pre-indexed' in memory_address: - new_dict['pre-indexed'] = True - if 'post-indexed' in memory_address: - new_dict['post-indexed'] = memory_address['post-indexed'] - return {'memory': new_dict} + new_dict = AttrDict({'offset': offset, 'base': base, 'index': index, 'scale': scale}) + if 'pre_indexed' in memory_address: + new_dict['pre_indexed'] = True + if 'post_indexed' in memory_address: + new_dict['post_indexed'] = memory_address['post_indexed'] + return AttrDict({self.MEMORY_ID: new_dict}) def substitute_register_list(self, register_list): # Remove unnecessarily created dictionary entries during parsing @@ -327,10 +340,12 @@ class ParserAArch64v81(BaseParser): if 'range' in register_list: dict_name = 'range' for v in register_list[dict_name]: - vlist.append(self.list_element.parseString(v, parseAll=True).asDict()) + vlist.append( + AttrDict.convert_dict(self.list_element.parseString(v, parseAll=True).asDict()) + ) index = None if 'index' not in register_list else register_list['index'] - new_dict = {dict_name: vlist, 'index': index} - return {'register': new_dict} + new_dict = AttrDict({dict_name: vlist, 'index': index}) + return AttrDict({self.REGISTER_ID: new_dict}) def substitute_immediate(self, immediate): dict_name = '' @@ -339,28 +354,30 @@ class ParserAArch64v81(BaseParser): return immediate if 'value' in immediate: # normal integer value, nothing to do - return {'immediate': immediate} + return AttrDict({self.IMMEDIATE_ID: immediate}) if 'base_immediate' in immediate: # arithmetic immediate, nothing to do - return {'immediate': immediate} + return AttrDict({self.IMMEDIATE_ID: immediate}) if 'float' in immediate: dict_name = 'float' if 'double' in immediate: dict_name = 'double' if 'exponent' in immediate[dict_name]: # nothing to do - return {'immediate': immediate} + return AttrDict({self.IMMEDIATE_ID: immediate}) else: # change 'mantissa' key to 'value' - return {'immediate': {'value': immediate[dict_name]['mantissa']}} + return AttrDict( + {self.IMMEDIATE_ID: AttrDict({'value': immediate[dict_name]['mantissa']})} + ) def substitute_label(self, label): # remove duplicated 'name' level due to identifier label['name'] = label['name']['name'] - return {'label': label} + return AttrDict({self.LABEL_ID: label}) def get_full_reg_name(self, register): - if 'lanes' in 'register': + if 'lanes' in register: return ( register['prefix'] + register['name'] + '.' + register['lanes'] + register['shape'] ) @@ -381,6 +398,6 @@ class ParserAArch64v81(BaseParser): def ieee_to_int(self, ieee_val): exponent = int(ieee_val['exponent'], 10) - if ieee_val['e_sign'] == '-': + if ieee_val.e_sign == '-': exponent *= -1 return float(ieee_val['mantissa']) * (10 ** exponent) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index e35a218..2b25e22 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -3,6 +3,7 @@ import pyparsing as pp from .base_parser import BaseParser +from .attr_dict import AttrDict class ParserX86ATT(BaseParser): @@ -102,20 +103,21 @@ class ParserX86ATT(BaseParser): :param int line_id: default None, identifier of instruction form :return: parsed instruction form """ - instruction_form = { - 'instruction': None, - 'operands': None, - 'directive': None, - 'comment': None, - 'label': None, + instruction_form = AttrDict({ + self.INSTRUCTION_ID: None, + self.OPERANDS_ID: None, + self.DIRECTIVE_ID: None, + self.COMMENT_ID: None, + self.LABEL_ID: None, 'line_number': line_number, - } + }) result = None # 1. Parse comment try: result = self._process_operand(self.comment.parseString(line, parseAll=True).asDict()) - instruction_form['comment'] = ' '.join(result[self.COMMENT_ID]) + result = AttrDict.convert_dict(result) + instruction_form[self.COMMENT_ID] = ' '.join(result[self.COMMENT_ID]) except pp.ParseException: pass @@ -125,9 +127,12 @@ class ParserX86ATT(BaseParser): result = self._process_operand( self.label.parseString(line, parseAll=True).asDict() ) - instruction_form['label'] = result[self.LABEL_ID]['name'] + result = AttrDict.convert_dict(result) + instruction_form[self.LABEL_ID] = result[self.LABEL_ID]['name'] if self.COMMENT_ID in result[self.LABEL_ID]: - instruction_form['comment'] = ' '.join(result[self.LABEL_ID][self.COMMENT_ID]) + instruction_form[self.COMMENT_ID] = ' '.join( + result[self.LABEL_ID][self.COMMENT_ID] + ) except pp.ParseException: pass @@ -137,12 +142,13 @@ class ParserX86ATT(BaseParser): result = self._process_operand( self.directive.parseString(line, parseAll=True).asDict() ) - instruction_form['directive'] = { + result = AttrDict.convert_dict(result) + instruction_form[self.DIRECTIVE_ID] = { 'name': result[self.DIRECTIVE_ID]['name'], 'parameters': result[self.DIRECTIVE_ID]['parameters'], } if self.COMMENT_ID in result[self.DIRECTIVE_ID]: - instruction_form['comment'] = ' '.join( + instruction_form[self.COMMENT_ID] = ' '.join( result[self.DIRECTIVE_ID][self.COMMENT_ID] ) except pp.ParseException: @@ -158,15 +164,16 @@ class ParserX86ATT(BaseParser): line_number, line ) ) - instruction_form['instruction'] = result['instruction'] - instruction_form['operands'] = result['operands'] - instruction_form['comment'] = result['comment'] + instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID] + instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID] + instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID] return instruction_form def parse_instruction(self, instruction): result = self.instruction_parser.parseString(instruction, parseAll=True).asDict() - operands = {'source': [], 'destination': []} + result = AttrDict.convert_dict(result) + operands = AttrDict({'source': [], 'destination': []}) # Check from right to left # Check third operand if 'operand3' in result: @@ -183,21 +190,23 @@ class ParserX86ATT(BaseParser): operands['source'].insert(0, self._process_operand(result['operand1'])) else: operands['destination'].append(self._process_operand(result['operand1'])) - return_dict = { - 'instruction': result['mnemonic'], - 'operands': operands, - 'comment': ' '.join(result['comment']) if 'comment' in result else None, - } + return_dict = AttrDict({ + self.INSTRUCTION_ID: result['mnemonic'], + self.OPERANDS_ID: operands, + self.COMMENT_ID: ' '.join(result[self.COMMENT_ID]) + if self.COMMENT_ID in result + else None, + }) return return_dict def _process_operand(self, operand): # For the moment, only used to structure memory addresses - if 'memory' in operand: - return self.substitute_memory_address(operand['memory']) - if 'immediate' in operand: - return self.substitue_immediate(operand['immediate']) - if 'label' in operand: - return self.substitute_label(operand['label']) + if self.MEMORY_ID in operand: + return self.substitute_memory_address(operand[self.MEMORY_ID]) + if self.IMMEDIATE_ID in operand: + return self.substitue_immediate(operand[self.IMMEDIATE_ID]) + if self.LABEL_ID in operand: + return self.substitute_label(operand[self.LABEL_ID]) return operand def substitute_memory_address(self, memory_address): @@ -206,20 +215,20 @@ class ParserX86ATT(BaseParser): base = None if 'base' not in memory_address else memory_address['base'] index = None if 'index' not in memory_address else memory_address['index'] scale = '1' if 'scale' not in memory_address else memory_address['scale'] - new_dict = {'offset': offset, 'base': base, 'index': index, 'scale': scale} - return {'memory': new_dict} + new_dict = AttrDict({'offset': offset, 'base': base, 'index': index, 'scale': scale}) + return AttrDict({self.MEMORY_ID: new_dict}) def substitute_label(self, label): # remove duplicated 'name' level due to identifier label['name'] = label['name']['name'] - return {'label': label} + return AttrDict({self.LABEL_ID: label}) def substitue_immediate(self, immediate): if 'identifier' in immediate: # actually an identifier, change declaration return immediate # otherwise nothing to do - return {'immediate': immediate} + return AttrDict({self.IMMEDIATE_ID: immediate}) def get_full_reg_name(self, register): # nothing to do diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 8d1ba69..d01f852 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -27,14 +27,14 @@ class TestAnalyzer(unittest.TestCase): def test_marker_detection_AArch64(self): analyzer = Analyzer(self.parsed_AArch, 'AArch64') self.assertEquals(len(analyzer.kernel), 138) - self.assertEquals(analyzer.kernel[0]['line_number'], 307) - self.assertEquals(analyzer.kernel[-1]['line_number'], 444) + self.assertEquals(analyzer.kernel[0].line_number, 307) + self.assertEquals(analyzer.kernel[-1].line_number, 444) def test_marker_detection_x86(self): analyzer = Analyzer(self.parsed_x86, 'x86') self.assertEquals(len(analyzer.kernel), 9) - self.assertEquals(analyzer.kernel[0]['line_number'], 146) - self.assertEquals(analyzer.kernel[-1]['line_number'], 154) + self.assertEquals(analyzer.kernel[0].line_number, 146) + self.assertEquals(analyzer.kernel[-1].line_number, 154) def test_marker_matching_x86(self): # preparation diff --git a/tests/test_parser_AArch64v81.py b/tests/test_parser_AArch64v81.py index dd1bbfb..e47d18b 100755 --- a/tests/test_parser_AArch64v81.py +++ b/tests/test_parser_AArch64v81.py @@ -8,7 +8,7 @@ import unittest from pyparsing import ParseException -from osaca.parser import ParserAArch64v81 +from osaca.parser import AttrDict, ParserAArch64v81 class TestParserAArch64v81(unittest.TestCase): @@ -32,25 +32,23 @@ class TestParserAArch64v81(unittest.TestCase): ) def test_label_parser(self): - self.assertEqual(self._get_label(self.parser, 'main:')['name'], 'main') - self.assertEqual(self._get_label(self.parser, '..B1.10:')['name'], '..B1.10') - self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3') - self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t//label1')['name'], '.L1') + self.assertEqual(self._get_label(self.parser, 'main:').name, 'main') + self.assertEqual(self._get_label(self.parser, '..B1.10:').name, '..B1.10') + self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:').name, '.2.3_2_pack.3') + self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t//label1').name, '.L1') self.assertEqual( - ' '.join(self._get_label(self.parser, '.L1:\t\t\t//label1')['comment']), 'label1' + ' '.join(self._get_label(self.parser, '.L1:\t\t\t//label1').comment), 'label1' ) with self.assertRaises(ParseException): self._get_label(self.parser, '\t.cfi_startproc') def test_directive_parser(self): - self.assertEqual(self._get_directive(self.parser, '\t.text')['name'], 'text') - self.assertEqual(len(self._get_directive(self.parser, '\t.text')['parameters']), 0) - self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align') + self.assertEqual(self._get_directive(self.parser, '\t.text').name, 'text') + self.assertEqual(len(self._get_directive(self.parser, '\t.text').parameters), 0) + self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90').name, 'align') + self.assertEqual(len(self._get_directive(self.parser, '\t.align\t16,0x90').parameters), 2) self.assertEqual( - len(self._get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2 - ) - self.assertEqual( - self._get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90' + self._get_directive(self.parser, '\t.align\t16,0x90').parameters[1], '0x90' ) self.assertEqual( self._get_directive(self.parser, ' .byte 100,103,144 //IACA START')[ @@ -88,55 +86,52 @@ class TestParserAArch64v81(unittest.TestCase): parsed_5 = self.parser.parse_instruction(instr5) parsed_6 = self.parser.parse_instruction(instr6) - self.assertEqual(parsed_1['instruction'], 'vcvt.F32.S32') - self.assertEqual(parsed_1['operands']['destination'][0]['register']['name'], '1') - self.assertEqual(parsed_1['operands']['destination'][0]['register']['prefix'], 'w') - self.assertEqual(parsed_1['operands']['source'][0]['register']['name'], '2') - self.assertEqual(parsed_1['operands']['source'][0]['register']['prefix'], 'w') - self.assertEqual(parsed_1['comment'], '12.27') + self.assertEqual(parsed_1.instruction, 'vcvt.F32.S32') + self.assertEqual(parsed_1.operands.destination[0].register.name, '1') + self.assertEqual(parsed_1.operands.destination[0].register.prefix, 'w') + self.assertEqual(parsed_1.operands.source[0].register.name, '2') + self.assertEqual(parsed_1.operands.source[0].register.prefix, 'w') + self.assertEqual(parsed_1.comment, '12.27') - self.assertEqual(parsed_2['instruction'], 'b.lo') - self.assertEqual(parsed_2['operands']['destination'][0]['identifier']['name'], '..B1.4') - self.assertEqual(len(parsed_2['operands']['source']), 0) - self.assertIsNone(parsed_2['comment']) + self.assertEqual(parsed_2.instruction, 'b.lo') + self.assertEqual(parsed_2.operands.destination[0].identifier.name, '..B1.4') + self.assertEqual(len(parsed_2.operands.source), 0) + self.assertIsNone(parsed_2.comment) - self.assertEqual(parsed_3['instruction'], 'mov') - self.assertEqual(parsed_3['operands']['destination'][0]['register']['name'], '2') - self.assertEqual(parsed_3['operands']['destination'][0]['register']['prefix'], 'x') - self.assertEqual(parsed_3['operands']['source'][0]['immediate']['value'], '0x222') - self.assertEqual(parsed_3['comment'], 'NOT IACA END') + self.assertEqual(parsed_3.instruction, 'mov') + self.assertEqual(parsed_3.operands.destination[0].register.name, '2') + self.assertEqual(parsed_3.operands.destination[0].register.prefix, 'x') + self.assertEqual(parsed_3.operands.source[0].immediate.value, '0x222') + self.assertEqual(parsed_3.comment, 'NOT IACA END') - self.assertEqual(parsed_4['instruction'], 'str') - self.assertIsNone(parsed_4['operands']['destination'][0]['memory']['offset']) - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['base']['name'], 'sp') - self.assertIsNone(parsed_4['operands']['destination'][0]['memory']['base']['prefix']) - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['index']['name'], '1') - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['index']['prefix'], 'x') - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['scale'], '16') - self.assertEqual(parsed_4['operands']['source'][0]['register']['name'], '28') - self.assertEqual(parsed_4['operands']['source'][0]['register']['prefix'], 'x') - self.assertEqual(parsed_4['comment'], '12.9') + self.assertEqual(parsed_4.instruction, 'str') + self.assertIsNone(parsed_4.operands.destination[0].memory.offset) + self.assertEqual(parsed_4.operands.destination[0].memory.base.name, 'sp') + self.assertIsNone(parsed_4.operands.destination[0].memory.base.prefix) + self.assertEqual(parsed_4.operands.destination[0].memory.index.name, '1') + self.assertEqual(parsed_4.operands.destination[0].memory.index.prefix, 'x') + self.assertEqual(parsed_4.operands.destination[0].memory.scale, '16') + self.assertEqual(parsed_4.operands.source[0].register.name, '28') + self.assertEqual(parsed_4.operands.source[0].register.prefix, 'x') + self.assertEqual(parsed_4.comment, '12.9') - self.assertEqual(parsed_5['instruction'], 'ldr') - self.assertEqual(parsed_5['operands']['destination'][0]['register']['name'], '0') - self.assertEqual(parsed_5['operands']['destination'][0]['register']['prefix'], 'x') + self.assertEqual(parsed_5.instruction, 'ldr') + self.assertEqual(parsed_5.operands.destination[0].register.name, '0') + self.assertEqual(parsed_5.operands.destination[0].register.prefix, 'x') + self.assertEqual(parsed_5.operands.source[0].memory.offset.identifier.name, 'q2c') self.assertEqual( - parsed_5['operands']['source'][0]['memory']['offset']['identifier']['name'], 'q2c' + parsed_5.operands.source[0].memory.offset.identifier.relocation, ':got_lo12:' ) - self.assertEqual( - parsed_5['operands']['source'][0]['memory']['offset']['identifier']['relocation'], - ':got_lo12:', - ) - self.assertEqual(parsed_5['operands']['source'][0]['memory']['base']['name'], '0') - self.assertEqual(parsed_5['operands']['source'][0]['memory']['base']['prefix'], 'x') - self.assertIsNone(parsed_5['operands']['source'][0]['memory']['index']) - self.assertEqual(parsed_5['operands']['source'][0]['memory']['scale'], '1') + self.assertEqual(parsed_5.operands.source[0].memory.base.name, '0') + self.assertEqual(parsed_5.operands.source[0].memory.base.prefix, 'x') + self.assertIsNone(parsed_5.operands.source[0].memory.index) + self.assertEqual(parsed_5.operands.source[0].memory.scale, '1') - self.assertEqual(parsed_6['instruction'], 'adrp') - self.assertEqual(parsed_6['operands']['destination'][0]['register']['name'], '0') - self.assertEqual(parsed_6['operands']['destination'][0]['register']['prefix'], 'x') - self.assertEqual(parsed_6['operands']['source'][0]['identifier']['relocation'], ':got:') - self.assertEqual(parsed_6['operands']['source'][0]['identifier']['name'], 'visited') + self.assertEqual(parsed_6.instruction, 'adrp') + self.assertEqual(parsed_6.operands.destination[0].register.name, '0') + self.assertEqual(parsed_6.operands.destination[0].register.prefix, 'x') + self.assertEqual(parsed_6.operands.source[0].identifier.relocation, ':got:') + self.assertEqual(parsed_6.operands.source[0].identifier.name, 'visited') def test_parse_line(self): line_comment = '// -- Begin main' @@ -224,7 +219,7 @@ class TestParserAArch64v81(unittest.TestCase): 'operands': { 'source': [ {'register': {'prefix': 'x', 'name': '29'}}, - {'register': {'prefix': 'x', 'name': '30'}} + {'register': {'prefix': 'x', 'name': '30'}}, ], 'destination': [ { @@ -233,7 +228,7 @@ class TestParserAArch64v81(unittest.TestCase): 'base': {'name': 'sp', 'prefix': None}, 'index': None, 'scale': '1', - 'pre-indexed': True + 'pre_indexed': True, } } ], @@ -241,7 +236,7 @@ class TestParserAArch64v81(unittest.TestCase): 'directive': None, 'comment': None, 'label': None, - 'line_number': 6 + 'line_number': 6, } instruction_form_7 = { 'instruction': 'ldp', @@ -253,7 +248,7 @@ class TestParserAArch64v81(unittest.TestCase): 'base': {'prefix': 'x', 'name': '11'}, 'index': None, 'scale': '1', - 'post-indexed': {'value': '64'}, + 'post_indexed': {'value': '64'}, } } ], @@ -285,7 +280,7 @@ class TestParserAArch64v81(unittest.TestCase): def test_parse_file(self): parsed = self.parser.parse_file(self.triad_code) - self.assertEqual(parsed[0]['line_number'], 1) + self.assertEqual(parsed[0].line_number, 1) self.assertEqual(len(parsed), 645) ################## @@ -293,20 +288,24 @@ class TestParserAArch64v81(unittest.TestCase): ################## def _get_comment(self, parser, comment): return ' '.join( - parser._process_operand(parser.comment.parseString(comment, parseAll=True).asDict())[ - 'comment' - ] + AttrDict.convert_dict( + parser._process_operand( + parser.comment.parseString(comment, parseAll=True).asDict() + ) + ).comment ) def _get_label(self, parser, label): - return parser._process_operand(parser.label.parseString(label, parseAll=True).asDict())[ - 'label' - ] + return AttrDict.convert_dict( + parser._process_operand(parser.label.parseString(label, parseAll=True).asDict()) + ).label def _get_directive(self, parser, directive): - return parser._process_operand( - parser.directive.parseString(directive, parseAll=True).asDict() - )['directive'] + return AttrDict.convert_dict( + parser._process_operand( + parser.directive.parseString(directive, parseAll=True).asDict() + ) + ).directive @staticmethod def _find_file(name): diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index 7bd77c0..a6d5e9c 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -8,7 +8,7 @@ import unittest from pyparsing import ParseException -from osaca.parser import ParserX86ATT +from osaca.parser import AttrDict, ParserX86ATT class TestParserX86ATT(unittest.TestCase): @@ -29,25 +29,23 @@ class TestParserX86ATT(unittest.TestCase): ) def test_label_parser(self): - self.assertEqual(self._get_label(self.parser, 'main:')['name'], 'main') - self.assertEqual(self._get_label(self.parser, '..B1.10:')['name'], '..B1.10') - self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3') - self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t#label1')['name'], '.L1') + self.assertEqual(self._get_label(self.parser, 'main:').name, 'main') + self.assertEqual(self._get_label(self.parser, '..B1.10:').name, '..B1.10') + self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:').name, '.2.3_2_pack.3') + self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t#label1').name, '.L1') self.assertEqual( - ' '.join(self._get_label(self.parser, '.L1:\t\t\t#label1')['comment']), 'label1' + ' '.join(self._get_label(self.parser, '.L1:\t\t\t#label1').comment), 'label1' ) with self.assertRaises(ParseException): self._get_label(self.parser, '\t.cfi_startproc') def test_directive_parser(self): - self.assertEqual(self._get_directive(self.parser, '\t.text')['name'], 'text') - self.assertEqual(len(self._get_directive(self.parser, '\t.text')['parameters']), 0) - self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align') + self.assertEqual(self._get_directive(self.parser, '\t.text').name, 'text') + self.assertEqual(len(self._get_directive(self.parser, '\t.text').parameters), 0) + self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90').name, 'align') + self.assertEqual(len(self._get_directive(self.parser, '\t.align\t16,0x90').parameters), 2) self.assertEqual( - len(self._get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2 - ) - self.assertEqual( - self._get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90' + self._get_directive(self.parser, '\t.align\t16,0x90').parameters[1], '0x90' ) self.assertEqual( self._get_directive(self.parser, ' .byte 100,103,144 #IACA START')[ @@ -85,44 +83,42 @@ class TestParserX86ATT(unittest.TestCase): parsed_5 = self.parser.parse_instruction(instr5) parsed_6 = self.parser.parse_instruction(instr6) - self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss') - self.assertEqual(parsed_1['operands']['destination'][0]['register']['name'], 'xmm2') - self.assertEqual(parsed_1['operands']['source'][0]['register']['name'], 'edx') - self.assertEqual(parsed_1['comment'], '12.27') + self.assertEqual(parsed_1.instruction, 'vcvtsi2ss') + self.assertEqual(parsed_1.operands.destination[0].register.name, 'xmm2') + self.assertEqual(parsed_1.operands.source[0].register.name, 'edx') + self.assertEqual(parsed_1.comment, '12.27') - self.assertEqual(parsed_2['instruction'], 'jb') - self.assertEqual(parsed_2['operands']['destination'][0]['identifier']['name'], '..B1.4') - self.assertEqual(len(parsed_2['operands']['source']), 0) - self.assertIsNone(parsed_2['comment']) + self.assertEqual(parsed_2.instruction, 'jb') + self.assertEqual(parsed_2.operands.destination[0].identifier.name, '..B1.4') + self.assertEqual(len(parsed_2.operands.source), 0) + self.assertIsNone(parsed_2.comment) - self.assertEqual(parsed_3['instruction'], 'movl') - self.assertEqual(parsed_3['operands']['destination'][0]['register']['name'], 'ebx') - self.assertEqual(parsed_3['operands']['source'][0]['immediate']['value'], '222') - self.assertEqual(parsed_3['comment'], 'IACA END') + self.assertEqual(parsed_3.instruction, 'movl') + self.assertEqual(parsed_3.operands.destination[0].register.name, 'ebx') + self.assertEqual(parsed_3.operands.source[0].immediate.value, '222') + self.assertEqual(parsed_3.comment, 'IACA END') - self.assertEqual(parsed_4['instruction'], 'vmovss') - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['offset']['value'], '-4') - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['base']['name'], 'rsp') - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['index']['name'], 'rax') - self.assertEqual(parsed_4['operands']['destination'][0]['memory']['scale'], '8') - self.assertEqual(parsed_4['operands']['source'][0]['register']['name'], 'xmm4') - self.assertEqual(parsed_4['comment'], '12.9') + self.assertEqual(parsed_4.instruction, 'vmovss') + self.assertEqual(parsed_4.operands.destination[0].memory.offset.value, '-4') + self.assertEqual(parsed_4.operands.destination[0].memory.base.name, 'rsp') + self.assertEqual(parsed_4.operands.destination[0].memory.index.name, 'rax') + self.assertEqual(parsed_4.operands.destination[0].memory.scale, '8') + self.assertEqual(parsed_4.operands.source[0].register.name, 'xmm4') + self.assertEqual(parsed_4.comment, '12.9') - self.assertEqual(parsed_5['instruction'], 'mov') - self.assertEqual( - parsed_5['operands']['destination'][0]['memory']['offset']['identifier']['name'], 'var' - ) - self.assertIsNone(parsed_5['operands']['destination'][0]['memory']['base']) - self.assertIsNone(parsed_5['operands']['destination'][0]['memory']['index']) - self.assertEqual(parsed_5['operands']['destination'][0]['memory']['scale'], '1') - self.assertEqual(parsed_5['operands']['source'][0]['register']['name'], 'ebx') + self.assertEqual(parsed_5.instruction, 'mov') + self.assertEqual(parsed_5.operands.destination[0].memory.offset.identifier.name, 'var') + self.assertIsNone(parsed_5.operands.destination[0].memory.base) + self.assertIsNone(parsed_5.operands.destination[0].memory.index) + self.assertEqual(parsed_5.operands.destination[0].memory.scale, '1') + self.assertEqual(parsed_5.operands.source[0].register.name, 'ebx') - self.assertEqual(parsed_6['instruction'], 'lea') - self.assertIsNone(parsed_6['operands']['source'][0]['memory']['offset']) - self.assertIsNone(parsed_6['operands']['source'][0]['memory']['base']) - self.assertEqual(parsed_6['operands']['source'][0]['memory']['index']['name'], 'rax') - self.assertEqual(parsed_6['operands']['source'][0]['memory']['scale'], '8') - self.assertEqual(parsed_6['operands']['destination'][0]['register']['name'], 'rbx') + self.assertEqual(parsed_6.instruction, 'lea') + self.assertIsNone(parsed_6.operands.source[0].memory.offset) + self.assertIsNone(parsed_6.operands.source[0].memory.base) + self.assertEqual(parsed_6.operands.source[0].memory.index.name, 'rax') + self.assertEqual(parsed_6.operands.source[0].memory.scale, '8') + self.assertEqual(parsed_6.operands.destination[0].register.name, 'rbx') def test_parse_line(self): line_comment = '# -- Begin main' @@ -187,7 +183,7 @@ class TestParserX86ATT(unittest.TestCase): def test_parse_file(self): parsed = self.parser.parse_file(self.triad_code) - self.assertEqual(parsed[0]['line_number'], 1) + self.assertEqual(parsed[0].line_number, 1) self.assertEqual(len(parsed), 353) ################## @@ -195,20 +191,24 @@ class TestParserX86ATT(unittest.TestCase): ################## def _get_comment(self, parser, comment): return ' '.join( - parser._process_operand(parser.comment.parseString(comment, parseAll=True).asDict())[ - 'comment' - ] + AttrDict.convert_dict( + parser._process_operand( + parser.comment.parseString(comment, parseAll=True).asDict() + ) + ).comment ) def _get_label(self, parser, label): - return parser._process_operand(parser.label.parseString(label, parseAll=True).asDict())[ - 'label' - ] + return AttrDict.convert_dict( + parser._process_operand(parser.label.parseString(label, parseAll=True).asDict()) + ).label def _get_directive(self, parser, directive): - return parser._process_operand( - parser.directive.parseString(directive, parseAll=True).asDict() - )['directive'] + return AttrDict.convert_dict( + parser._process_operand( + parser.directive.parseString(directive, parseAll=True).asDict() + ) + ).directive @staticmethod def _find_file(name):