fixed tests

This commit is contained in:
JanLJL
2019-05-02 18:52:16 +02:00
parent daa874b396
commit 1f52157e9c
2 changed files with 95 additions and 52 deletions

View File

@@ -18,13 +18,19 @@ class ParserX86ATT(BaseParser):
# Define x86 assembly identifier # Define x86 assembly identifier
first = pp.Word(pp.alphas + '_.', exact=1) first = pp.Word(pp.alphas + '_.', exact=1)
rest = pp.Word(pp.alphanums + '_.') rest = pp.Word(pp.alphanums + '_.')
identifier = pp.Combine(first + pp.Optional(rest)) identifier = pp.Combine(first + pp.Optional(rest)).setResultsName('identifier')
# Label # Label
self.label = pp.Group( self.label = pp.Group(
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
).setResultsName(self.LABEL_ID) ).setResultsName(self.LABEL_ID)
# Directive # Directive
commaSeparatedList = pp.delimitedList(pp.Optional(pp.quotedString | identifier), delim=',') decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
commaSeparatedList = pp.delimitedList(
pp.Optional(pp.quotedString | identifier | hex_number | decimal_number), delim=','
)
self.directive = pp.Group( self.directive = pp.Group(
pp.Literal('.') pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name') + pp.Word(pp.alphanums + '_').setResultsName('name')
@@ -35,9 +41,9 @@ class ParserX86ATT(BaseParser):
############################## ##############################
# Instructions # Instructions
# Mnemonic # Mnemonic
mnemonic = pp.ZeroOrMore(pp.Literal('data16') ^ pp.Literal('data32')) + pp.Word( mnemonic = pp.ZeroOrMore(pp.Literal('data16') | pp.Literal('data32')) + pp.Word(
pp.alphanums pp.alphanums
) ).setResultsName('mnemonic')
# Register: pp.Regex('^%[0-9a-zA-Z]+,?') # Register: pp.Regex('^%[0-9a-zA-Z]+,?')
register = pp.Group( register = pp.Group(
pp.Literal('%') pp.Literal('%')
@@ -52,35 +58,30 @@ class ParserX86ATT(BaseParser):
).setResultsName(self.REGISTER_ID) ).setResultsName(self.REGISTER_ID)
# Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?') # Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?')
symbol_immediate = '$' symbol_immediate = '$'
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
immediate = pp.Group( immediate = pp.Group(
pp.Literal(symbol_immediate) pp.Literal(symbol_immediate)
+ (decimal_number ^ hex_number) + (hex_number | decimal_number)
+ pp.Optional(pp.Suppress(pp.Literal(','))) + pp.Optional(pp.Suppress(pp.Literal(',')))
).setResultsName(self.IMMEDIATE_ID) ).setResultsName(self.IMMEDIATE_ID)
# Memory: offset(base, index, scale) # Memory: offset(base, index, scale)
offset = decimal_number ^ hex_number offset = identifier | hex_number | decimal_number
scale = pp.Word('1248', exact=1) scale = pp.Word('1248', exact=1)
memory = pp.Group( memory = pp.Group(
pp.Optional(offset.setResultsName('offset')) pp.Optional(offset.setResultsName('offset'))
+ pp.Literal('(') + pp.Literal('(')
+ register.setResultsName('base') + (register.setResultsName('base') | (pp.Suppress(pp.Literal(',')) + scale))
+ pp.Optional(register.setResultsName('index')) + pp.Optional(register.setResultsName('index'))
+ pp.Optional(scale.setResultsName('scale')) + pp.Optional(scale.setResultsName('scale'))
+ pp.Literal(')') + pp.Literal(')')
+ pp.Optional(pp.Suppress(pp.Literal(','))) + pp.Optional(pp.Suppress(pp.Literal(',')))
+ pp.Optional(self.comment)
).setResultsName(self.MEMORY_ID) ).setResultsName(self.MEMORY_ID)
# Combine to instruction form # Combine to instruction form
operand1 = pp.Group(register ^ immediate ^ memory ^ identifier).setResultsName('operand1') operand1 = pp.Group(register ^ immediate ^ memory ^ identifier).setResultsName('operand1')
operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2') operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2')
operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3') operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3')
self.instruction_parser = ( self.instruction_parser = (
mnemonic.setResultsName('mnemonic') mnemonic
+ operand1 + pp.Optional(operand1)
+ pp.Optional(operand2) + pp.Optional(operand2)
+ pp.Optional(operand3) + pp.Optional(operand3)
+ pp.Optional(self.comment) + pp.Optional(self.comment)
@@ -117,7 +118,7 @@ class ParserX86ATT(BaseParser):
result = self.label.parseString(line, parseAll=True).asDict() result = self.label.parseString(line, parseAll=True).asDict()
instruction_form['label'] = result[self.LABEL_ID]['name'] instruction_form['label'] = result[self.LABEL_ID]['name']
if self.COMMENT_ID in result[self.LABEL_ID]: if self.COMMENT_ID in result[self.LABEL_ID]:
instruction_form['comment'] = ' '.join(result[self.COMMENT_ID]) instruction_form['comment'] = ' '.join(result[self.LABEL_ID][self.COMMENT_ID])
except pp.ParseException: except pp.ParseException:
pass pass
@@ -125,10 +126,10 @@ class ParserX86ATT(BaseParser):
if result is None: if result is None:
try: try:
result = self.directive.parseString(line, parseAll=True).asDict() result = self.directive.parseString(line, parseAll=True).asDict()
instruction_form['directive']['name'] = result[self.DIRECTIVE_ID]['name'] instruction_form['directive'] = {
instruction_form['directive']['parameters'] = result[self.DIRECTIVE_ID][ 'name': result[self.DIRECTIVE_ID]['name'],
'parameters' 'parameters': result[self.DIRECTIVE_ID]['parameters'],
] }
if self.COMMENT_ID in result[self.DIRECTIVE_ID]: if self.COMMENT_ID in result[self.DIRECTIVE_ID]:
instruction_form['comment'] = ' '.join( instruction_form['comment'] = ' '.join(
result[self.DIRECTIVE_ID][self.COMMENT_ID] result[self.DIRECTIVE_ID][self.COMMENT_ID]
@@ -151,18 +152,19 @@ class ParserX86ATT(BaseParser):
# Check from right to left # Check from right to left
# Check third operand # Check third operand
if 'operand3' in result: if 'operand3' in result:
operands['destination'] = result['operand3'] operands['destination'] = self.process_operand(result['operand3'])
# Check second operand # Check second operand
if 'operand2' in result: if 'operand2' in result:
if 'destination' in operands: if 'destination' in operands:
operands['sources'].insert(0, result['operand2']) operands['sources'].insert(0, self.process_operand(result['operand2']))
else: else:
operands['destination'] = result['operand2'] operands['destination'] = self.process_operand(result['operand2'])
# Add first operand # Check first operand
if 'destination' in operands: if 'operand1' in result:
operands['sources'].insert(0, result['operand1']) if 'destination' in operands:
else: operands['sources'].insert(0, self.process_operand(result['operand1']))
operands['destination'] = result['operand1'] else:
operands['destination'] = self.process_operand(result['operand1'])
return_dict = { return_dict = {
'instruction': result['mnemonic'], 'instruction': result['mnemonic'],
'operands': operands, 'operands': operands,
@@ -170,6 +172,22 @@ class ParserX86ATT(BaseParser):
} }
return return_dict return return_dict
def process_operand(self, operand):
# For the moment, only used to structure memory addresses
if 'memory' in operand:
return self.substitute_memory_address(operand['memory'])
return operand
def substitute_memory_address(self, memory_address): def substitute_memory_address(self, memory_address):
# remove unecessarily created dictionary entries # Remove unecessarily created dictionary entries during memory address parsing
raise NotImplementedError offset = None if 'offset' not in memory_address else memory_address['offset']
base = None if 'base' not in memory_address else memory_address['base']
index = None if 'index' not in memory_address else memory_address['index']
scale = '1' if 'scale' not in memory_address else memory_address['scale']
new_dict = {
'offset': offset,
'base': base,
'index': index,
'scale': scale,
}
return {'memory': new_dict}

View File

@@ -27,10 +27,12 @@ class TestParserX86ATT(unittest.TestCase):
def test_label_parser(self): def test_label_parser(self):
self.assertEqual(get_label(self.parser, 'main:')['name'], 'main') self.assertEqual(get_label(self.parser, 'main:')['name'], 'main')
self.assertEqual(get_label(self.parser, '..B1.10:')['name'], '.B1.10') self.assertEqual(get_label(self.parser, '..B1.10:')['name'], '..B1.10')
self.assertEqual(get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3') self.assertEqual(get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3')
self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['name'], '.L1') self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['name'], '.L1')
self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['comment'], 'label1') self.assertEqual(
' '.join(get_label(self.parser, '.L1:\t\t\t#label1')['comment']), 'label1'
)
with self.assertRaises(ParseException): with self.assertRaises(ParseException):
get_label(self.parser, '\t.cfi_startproc') get_label(self.parser, '\t.cfi_startproc')
@@ -39,7 +41,7 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(len(get_directive(self.parser, '\t.text')['parameters']), 0) self.assertEqual(len(get_directive(self.parser, '\t.text')['parameters']), 0)
self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align') self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align')
self.assertEqual(len(get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2) self.assertEqual(len(get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2)
self.assertEqual(get_directive('\t.align\t16,0x90')['parameters'][1], '0x90') self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90')
self.assertEqual( self.assertEqual(
get_directive(self.parser, ' .byte 100,103,144 #IACA START')['name'], get_directive(self.parser, ' .byte 100,103,144 #IACA START')['name'],
'byte', 'byte',
@@ -51,7 +53,11 @@ class TestParserX86ATT(unittest.TestCase):
'144', '144',
) )
self.assertEqual( self.assertEqual(
get_directive(self.parser, ' .byte 100,103,144 #IACA START')['comment'], ' '.join(
get_directive(self.parser, ' .byte 100,103,144 #IACA START')[
'comment'
]
),
'IACA START', 'IACA START',
) )
@@ -60,11 +66,13 @@ class TestParserX86ATT(unittest.TestCase):
instr2 = 'jb ..B1.4 \t' instr2 = 'jb ..B1.4 \t'
instr3 = ' movl $222,%ebx #IACA END' instr3 = ' movl $222,%ebx #IACA END'
instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9' instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9'
instr5 = 'mov %ebx, var(,1)'
parsed_1 = self.parser.parse_instruction(instr1) parsed_1 = self.parser.parse_instruction(instr1)
parsed_2 = self.parser.parse_instruction(instr2) parsed_2 = self.parser.parse_instruction(instr2)
parsed_3 = self.parser.parse_instruction(instr3) parsed_3 = self.parser.parse_instruction(instr3)
parsed_4 = self.parser.parse_instruction(instr4) parsed_4 = self.parser.parse_instruction(instr4)
parsed_5 = self.parser.parse_instruction(instr5)
self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss') self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss')
self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2') self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2')
@@ -72,7 +80,7 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(parsed_1['comment'], '12.27') self.assertEqual(parsed_1['comment'], '12.27')
self.assertEqual(parsed_2['instruction'], 'jb') self.assertEqual(parsed_2['instruction'], 'jb')
self.assertEqual(parsed_2['operands']['destination'], '..B1.4') self.assertEqual(parsed_2['operands']['destination']['identifier'], '..B1.4')
self.assertEqual(len(parsed_2['operands']['sources']), 0) self.assertEqual(len(parsed_2['operands']['sources']), 0)
self.assertIsNone(parsed_2['comment']) self.assertIsNone(parsed_2['comment'])
@@ -83,17 +91,24 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(parsed_4['instruction'], 'vmovss') self.assertEqual(parsed_4['instruction'], 'vmovss')
self.assertEqual(parsed_4['operands']['destination']['memory']['offset'], '-4') self.assertEqual(parsed_4['operands']['destination']['memory']['offset'], '-4')
self.assertEqual(parsed_4['operands']['destination']['memory']['base'], 'rsp') self.assertEqual(parsed_4['operands']['destination']['memory']['base']['name'], 'rsp')
self.assertEqual(parsed_4['operands']['destination']['memory']['index'], 'rax') self.assertEqual(parsed_4['operands']['destination']['memory']['index']['name'], 'rax')
self.assertEqual(parsed_4['operands']['destination']['memory']['scale'], '8') self.assertEqual(parsed_4['operands']['destination']['memory']['scale'], '8')
self.assertEqual(parsed_4['operands']['sources'][0]['register']['name'], 'xmm4') self.assertEqual(parsed_4['operands']['sources'][0]['register']['name'], 'xmm4')
self.assertEqual(parsed_4['comment'], '12.9') self.assertEqual(parsed_4['comment'], '12.9')
self.assertEqual(parsed_5['instruction'], 'mov')
self.assertEqual(parsed_5['operands']['destination']['memory']['offset'], 'var')
self.assertIsNone(parsed_5['operands']['destination']['memory']['base'])
self.assertIsNone(parsed_5['operands']['destination']['memory']['index'])
self.assertEqual(parsed_5['operands']['destination']['memory']['scale'], '1')
self.assertEqual(parsed_5['operands']['sources'][0]['register']['name'], 'ebx')
def test_parse_line(self): def test_parse_line(self):
line_comment = '# -- Begin main' line_comment = '# -- Begin main'
line_label = '..B1.7: # Preds ..B1.6' line_label = '..B1.7: # Preds ..B1.6'
line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed' line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed'
# line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9' line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
instruction_form_1 = { instruction_form_1 = {
'instruction': None, 'instruction': None,
@@ -107,7 +122,7 @@ class TestParserX86ATT(unittest.TestCase):
'instruction': None, 'instruction': None,
'operands': None, 'operands': None,
'directive': None, 'directive': None,
'comment': None, 'comment': 'Preds ..B1.6',
'label': '..B1.7', 'label': '..B1.7',
'line_number': 2, 'line_number': 2,
} }
@@ -119,26 +134,36 @@ class TestParserX86ATT(unittest.TestCase):
'label': None, 'label': None,
'line_number': 3, 'line_number': 3,
} }
# TODO instruction_form_4 = {
# instruction_form_4 = { 'instruction': 'lea',
# 'instruction': 'lea', 'operands': {
# 'operands': {'sources': {'memory': {'offset': '2', 'base': {'name': rax}, ''}}}, 'sources': [
# 'directive': None, {
# 'comment': '-- Begin main', 'memory': {
# 'label': None, 'offset': '2',
# 'line_number': 1, 'base': {'name': 'rax'},
# } 'index': {'name': 'rax'},
'scale': '1',
}
}
],
'destination': {'register': {'name': 'ecx'}},
},
'directive': None,
'comment': '-- Begin main',
'label': None,
'line_number': 1,
}
parsed_1 = self.parser.parse_line(line_comment, 1) parsed_1 = self.parser.parse_line(line_comment, 1)
parsed_2 = self.parser.parse_line(line_label, 2) parsed_2 = self.parser.parse_line(line_label, 2)
parsed_3 = self.parser.parse_line(line_directive, 3) parsed_3 = self.parser.parse_line(line_directive, 3)
# TODO parsed_4 parsed_4 = self.parser.parse_line(line_instruction, 4)
# parsed_4 = self.parser.parse_line(line_instruction, 4)
self.assertEqual(parsed_1, instruction_form_1) self.assertEqual(parsed_1, instruction_form_1)
self.assertEqual(parsed_2, instruction_form_2) self.assertEqual(parsed_2, instruction_form_2)
self.assertEqual(parsed_3, instruction_form_3) self.assertEqual(parsed_3, instruction_form_3)
# self.assertEqual(parsed_4, instruction_form_4) self.assertEqual(parsed_4['operands'], instruction_form_4['operands'])
################## ##################
@@ -149,8 +174,8 @@ def get_comment(parser, comment):
def get_label(parser, label): def get_label(parser, label):
return parser.label.parseString(label, parseAll=True).asDict() return parser.label.parseString(label, parseAll=True).asDict()['label']
def get_directive(parser, directive): def get_directive(parser, directive):
return parser.directive.parseString(directive, parseAll=True).asDict() return parser.directive.parseString(directive, parseAll=True).asDict()['directive']