From 1f52157e9ca88e0bc15e87d4d747bd9412830aff Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 2 May 2019 18:52:16 +0200 Subject: [PATCH] fixed tests --- osaca/parser/parser_x86att.py | 76 ++++++++++++++++++++++------------- tests/test_parser_x86att.py | 71 +++++++++++++++++++++----------- 2 files changed, 95 insertions(+), 52 deletions(-) diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 7ba05da..723c37c 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -18,13 +18,19 @@ class ParserX86ATT(BaseParser): # Define x86 assembly identifier first = pp.Word(pp.alphas + '_.', exact=1) rest = pp.Word(pp.alphanums + '_.') - identifier = pp.Combine(first + pp.Optional(rest)) + identifier = pp.Combine(first + pp.Optional(rest)).setResultsName('identifier') # Label self.label = pp.Group( identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) ).setResultsName(self.LABEL_ID) # Directive - commaSeparatedList = pp.delimitedList(pp.Optional(pp.quotedString | identifier), delim=',') + decimal_number = pp.Combine( + pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) + ).setResultsName('value') + hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') + commaSeparatedList = pp.delimitedList( + pp.Optional(pp.quotedString | identifier | hex_number | decimal_number), delim=',' + ) self.directive = pp.Group( pp.Literal('.') + pp.Word(pp.alphanums + '_').setResultsName('name') @@ -35,9 +41,9 @@ class ParserX86ATT(BaseParser): ############################## # Instructions # Mnemonic - mnemonic = pp.ZeroOrMore(pp.Literal('data16') ^ pp.Literal('data32')) + pp.Word( + mnemonic = pp.ZeroOrMore(pp.Literal('data16') | pp.Literal('data32')) + pp.Word( pp.alphanums - ) + ).setResultsName('mnemonic') # Register: pp.Regex('^%[0-9a-zA-Z]+,?') register = pp.Group( pp.Literal('%') @@ -52,35 +58,30 @@ class ParserX86ATT(BaseParser): ).setResultsName(self.REGISTER_ID) # Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?') symbol_immediate = '$' - decimal_number = pp.Combine( - pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) - ).setResultsName('value') - hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') immediate = pp.Group( pp.Literal(symbol_immediate) - + (decimal_number ^ hex_number) + + (hex_number | decimal_number) + pp.Optional(pp.Suppress(pp.Literal(','))) ).setResultsName(self.IMMEDIATE_ID) # Memory: offset(base, index, scale) - offset = decimal_number ^ hex_number + offset = identifier | hex_number | decimal_number scale = pp.Word('1248', exact=1) memory = pp.Group( pp.Optional(offset.setResultsName('offset')) + pp.Literal('(') - + register.setResultsName('base') + + (register.setResultsName('base') | (pp.Suppress(pp.Literal(',')) + scale)) + pp.Optional(register.setResultsName('index')) + pp.Optional(scale.setResultsName('scale')) + pp.Literal(')') + pp.Optional(pp.Suppress(pp.Literal(','))) - + pp.Optional(self.comment) ).setResultsName(self.MEMORY_ID) # Combine to instruction form operand1 = pp.Group(register ^ immediate ^ memory ^ identifier).setResultsName('operand1') operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2') operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3') self.instruction_parser = ( - mnemonic.setResultsName('mnemonic') - + operand1 + mnemonic + + pp.Optional(operand1) + pp.Optional(operand2) + pp.Optional(operand3) + pp.Optional(self.comment) @@ -117,7 +118,7 @@ class ParserX86ATT(BaseParser): result = self.label.parseString(line, parseAll=True).asDict() instruction_form['label'] = result[self.LABEL_ID]['name'] if self.COMMENT_ID in result[self.LABEL_ID]: - instruction_form['comment'] = ' '.join(result[self.COMMENT_ID]) + instruction_form['comment'] = ' '.join(result[self.LABEL_ID][self.COMMENT_ID]) except pp.ParseException: pass @@ -125,10 +126,10 @@ class ParserX86ATT(BaseParser): if result is None: try: result = self.directive.parseString(line, parseAll=True).asDict() - instruction_form['directive']['name'] = result[self.DIRECTIVE_ID]['name'] - instruction_form['directive']['parameters'] = result[self.DIRECTIVE_ID][ - 'parameters' - ] + instruction_form['directive'] = { + 'name': result[self.DIRECTIVE_ID]['name'], + 'parameters': result[self.DIRECTIVE_ID]['parameters'], + } if self.COMMENT_ID in result[self.DIRECTIVE_ID]: instruction_form['comment'] = ' '.join( result[self.DIRECTIVE_ID][self.COMMENT_ID] @@ -151,18 +152,19 @@ class ParserX86ATT(BaseParser): # Check from right to left # Check third operand if 'operand3' in result: - operands['destination'] = result['operand3'] + operands['destination'] = self.process_operand(result['operand3']) # Check second operand if 'operand2' in result: if 'destination' in operands: - operands['sources'].insert(0, result['operand2']) + operands['sources'].insert(0, self.process_operand(result['operand2'])) else: - operands['destination'] = result['operand2'] - # Add first operand - if 'destination' in operands: - operands['sources'].insert(0, result['operand1']) - else: - operands['destination'] = result['operand1'] + operands['destination'] = self.process_operand(result['operand2']) + # Check first operand + if 'operand1' in result: + if 'destination' in operands: + operands['sources'].insert(0, self.process_operand(result['operand1'])) + else: + operands['destination'] = self.process_operand(result['operand1']) return_dict = { 'instruction': result['mnemonic'], 'operands': operands, @@ -170,6 +172,22 @@ class ParserX86ATT(BaseParser): } return return_dict + def process_operand(self, operand): + # For the moment, only used to structure memory addresses + if 'memory' in operand: + return self.substitute_memory_address(operand['memory']) + return operand + def substitute_memory_address(self, memory_address): - # remove unecessarily created dictionary entries - raise NotImplementedError + # Remove unecessarily created dictionary entries during memory address parsing + offset = None if 'offset' not in memory_address else memory_address['offset'] + base = None if 'base' not in memory_address else memory_address['base'] + index = None if 'index' not in memory_address else memory_address['index'] + scale = '1' if 'scale' not in memory_address else memory_address['scale'] + new_dict = { + 'offset': offset, + 'base': base, + 'index': index, + 'scale': scale, + } + return {'memory': new_dict} diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index dd8a60f..141b618 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -27,10 +27,12 @@ class TestParserX86ATT(unittest.TestCase): def test_label_parser(self): self.assertEqual(get_label(self.parser, 'main:')['name'], 'main') - self.assertEqual(get_label(self.parser, '..B1.10:')['name'], '.B1.10') + self.assertEqual(get_label(self.parser, '..B1.10:')['name'], '..B1.10') self.assertEqual(get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3') self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['name'], '.L1') - self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['comment'], 'label1') + self.assertEqual( + ' '.join(get_label(self.parser, '.L1:\t\t\t#label1')['comment']), 'label1' + ) with self.assertRaises(ParseException): get_label(self.parser, '\t.cfi_startproc') @@ -39,7 +41,7 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(len(get_directive(self.parser, '\t.text')['parameters']), 0) self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align') self.assertEqual(len(get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2) - self.assertEqual(get_directive('\t.align\t16,0x90')['parameters'][1], '0x90') + self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90') self.assertEqual( get_directive(self.parser, ' .byte 100,103,144 #IACA START')['name'], 'byte', @@ -51,7 +53,11 @@ class TestParserX86ATT(unittest.TestCase): '144', ) self.assertEqual( - get_directive(self.parser, ' .byte 100,103,144 #IACA START')['comment'], + ' '.join( + get_directive(self.parser, ' .byte 100,103,144 #IACA START')[ + 'comment' + ] + ), 'IACA START', ) @@ -60,11 +66,13 @@ class TestParserX86ATT(unittest.TestCase): instr2 = 'jb ..B1.4 \t' instr3 = ' movl $222,%ebx #IACA END' instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9' + instr5 = 'mov %ebx, var(,1)' parsed_1 = self.parser.parse_instruction(instr1) parsed_2 = self.parser.parse_instruction(instr2) parsed_3 = self.parser.parse_instruction(instr3) parsed_4 = self.parser.parse_instruction(instr4) + parsed_5 = self.parser.parse_instruction(instr5) self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss') self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2') @@ -72,7 +80,7 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(parsed_1['comment'], '12.27') self.assertEqual(parsed_2['instruction'], 'jb') - self.assertEqual(parsed_2['operands']['destination'], '..B1.4') + self.assertEqual(parsed_2['operands']['destination']['identifier'], '..B1.4') self.assertEqual(len(parsed_2['operands']['sources']), 0) self.assertIsNone(parsed_2['comment']) @@ -83,17 +91,24 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(parsed_4['instruction'], 'vmovss') self.assertEqual(parsed_4['operands']['destination']['memory']['offset'], '-4') - self.assertEqual(parsed_4['operands']['destination']['memory']['base'], 'rsp') - self.assertEqual(parsed_4['operands']['destination']['memory']['index'], 'rax') + self.assertEqual(parsed_4['operands']['destination']['memory']['base']['name'], 'rsp') + self.assertEqual(parsed_4['operands']['destination']['memory']['index']['name'], 'rax') self.assertEqual(parsed_4['operands']['destination']['memory']['scale'], '8') self.assertEqual(parsed_4['operands']['sources'][0]['register']['name'], 'xmm4') self.assertEqual(parsed_4['comment'], '12.9') + self.assertEqual(parsed_5['instruction'], 'mov') + self.assertEqual(parsed_5['operands']['destination']['memory']['offset'], 'var') + self.assertIsNone(parsed_5['operands']['destination']['memory']['base']) + self.assertIsNone(parsed_5['operands']['destination']['memory']['index']) + self.assertEqual(parsed_5['operands']['destination']['memory']['scale'], '1') + self.assertEqual(parsed_5['operands']['sources'][0]['register']['name'], 'ebx') + def test_parse_line(self): line_comment = '# -- Begin main' line_label = '..B1.7: # Preds ..B1.6' line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed' - # line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9' + line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9' instruction_form_1 = { 'instruction': None, @@ -107,7 +122,7 @@ class TestParserX86ATT(unittest.TestCase): 'instruction': None, 'operands': None, 'directive': None, - 'comment': None, + 'comment': 'Preds ..B1.6', 'label': '..B1.7', 'line_number': 2, } @@ -119,26 +134,36 @@ class TestParserX86ATT(unittest.TestCase): 'label': None, 'line_number': 3, } - # TODO - # instruction_form_4 = { - # 'instruction': 'lea', - # 'operands': {'sources': {'memory': {'offset': '2', 'base': {'name': rax}, ''}}}, - # 'directive': None, - # 'comment': '-- Begin main', - # 'label': None, - # 'line_number': 1, - # } + instruction_form_4 = { + 'instruction': 'lea', + 'operands': { + 'sources': [ + { + 'memory': { + 'offset': '2', + 'base': {'name': 'rax'}, + 'index': {'name': 'rax'}, + 'scale': '1', + } + } + ], + 'destination': {'register': {'name': 'ecx'}}, + }, + 'directive': None, + 'comment': '-- Begin main', + 'label': None, + 'line_number': 1, + } parsed_1 = self.parser.parse_line(line_comment, 1) parsed_2 = self.parser.parse_line(line_label, 2) parsed_3 = self.parser.parse_line(line_directive, 3) - # TODO parsed_4 - # parsed_4 = self.parser.parse_line(line_instruction, 4) + parsed_4 = self.parser.parse_line(line_instruction, 4) self.assertEqual(parsed_1, instruction_form_1) self.assertEqual(parsed_2, instruction_form_2) self.assertEqual(parsed_3, instruction_form_3) - # self.assertEqual(parsed_4, instruction_form_4) + self.assertEqual(parsed_4['operands'], instruction_form_4['operands']) ################## @@ -149,8 +174,8 @@ def get_comment(parser, comment): def get_label(parser, label): - return parser.label.parseString(label, parseAll=True).asDict() + return parser.label.parseString(label, parseAll=True).asDict()['label'] def get_directive(parser, directive): - return parser.directive.parseString(directive, parseAll=True).asDict() + return parser.directive.parseString(directive, parseAll=True).asDict()['directive']