fixed tests

This commit is contained in:
JanLJL
2019-05-02 18:52:16 +02:00
parent daa874b396
commit 1f52157e9c
2 changed files with 95 additions and 52 deletions

View File

@@ -18,13 +18,19 @@ class ParserX86ATT(BaseParser):
# Define x86 assembly identifier
first = pp.Word(pp.alphas + '_.', exact=1)
rest = pp.Word(pp.alphanums + '_.')
identifier = pp.Combine(first + pp.Optional(rest))
identifier = pp.Combine(first + pp.Optional(rest)).setResultsName('identifier')
# Label
self.label = pp.Group(
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
# Directive
commaSeparatedList = pp.delimitedList(pp.Optional(pp.quotedString | identifier), delim=',')
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
commaSeparatedList = pp.delimitedList(
pp.Optional(pp.quotedString | identifier | hex_number | decimal_number), delim=','
)
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
@@ -35,9 +41,9 @@ class ParserX86ATT(BaseParser):
##############################
# Instructions
# Mnemonic
mnemonic = pp.ZeroOrMore(pp.Literal('data16') ^ pp.Literal('data32')) + pp.Word(
mnemonic = pp.ZeroOrMore(pp.Literal('data16') | pp.Literal('data32')) + pp.Word(
pp.alphanums
)
).setResultsName('mnemonic')
# Register: pp.Regex('^%[0-9a-zA-Z]+,?')
register = pp.Group(
pp.Literal('%')
@@ -52,35 +58,30 @@ class ParserX86ATT(BaseParser):
).setResultsName(self.REGISTER_ID)
# Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?')
symbol_immediate = '$'
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
immediate = pp.Group(
pp.Literal(symbol_immediate)
+ (decimal_number ^ hex_number)
+ (hex_number | decimal_number)
+ pp.Optional(pp.Suppress(pp.Literal(',')))
).setResultsName(self.IMMEDIATE_ID)
# Memory: offset(base, index, scale)
offset = decimal_number ^ hex_number
offset = identifier | hex_number | decimal_number
scale = pp.Word('1248', exact=1)
memory = pp.Group(
pp.Optional(offset.setResultsName('offset'))
+ pp.Literal('(')
+ register.setResultsName('base')
+ (register.setResultsName('base') | (pp.Suppress(pp.Literal(',')) + scale))
+ pp.Optional(register.setResultsName('index'))
+ pp.Optional(scale.setResultsName('scale'))
+ pp.Literal(')')
+ pp.Optional(pp.Suppress(pp.Literal(',')))
+ pp.Optional(self.comment)
).setResultsName(self.MEMORY_ID)
# Combine to instruction form
operand1 = pp.Group(register ^ immediate ^ memory ^ identifier).setResultsName('operand1')
operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2')
operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3')
self.instruction_parser = (
mnemonic.setResultsName('mnemonic')
+ operand1
mnemonic
+ pp.Optional(operand1)
+ pp.Optional(operand2)
+ pp.Optional(operand3)
+ pp.Optional(self.comment)
@@ -117,7 +118,7 @@ class ParserX86ATT(BaseParser):
result = self.label.parseString(line, parseAll=True).asDict()
instruction_form['label'] = result[self.LABEL_ID]['name']
if self.COMMENT_ID in result[self.LABEL_ID]:
instruction_form['comment'] = ' '.join(result[self.COMMENT_ID])
instruction_form['comment'] = ' '.join(result[self.LABEL_ID][self.COMMENT_ID])
except pp.ParseException:
pass
@@ -125,10 +126,10 @@ class ParserX86ATT(BaseParser):
if result is None:
try:
result = self.directive.parseString(line, parseAll=True).asDict()
instruction_form['directive']['name'] = result[self.DIRECTIVE_ID]['name']
instruction_form['directive']['parameters'] = result[self.DIRECTIVE_ID][
'parameters'
]
instruction_form['directive'] = {
'name': result[self.DIRECTIVE_ID]['name'],
'parameters': result[self.DIRECTIVE_ID]['parameters'],
}
if self.COMMENT_ID in result[self.DIRECTIVE_ID]:
instruction_form['comment'] = ' '.join(
result[self.DIRECTIVE_ID][self.COMMENT_ID]
@@ -151,18 +152,19 @@ class ParserX86ATT(BaseParser):
# Check from right to left
# Check third operand
if 'operand3' in result:
operands['destination'] = result['operand3']
operands['destination'] = self.process_operand(result['operand3'])
# Check second operand
if 'operand2' in result:
if 'destination' in operands:
operands['sources'].insert(0, result['operand2'])
operands['sources'].insert(0, self.process_operand(result['operand2']))
else:
operands['destination'] = result['operand2']
# Add first operand
if 'destination' in operands:
operands['sources'].insert(0, result['operand1'])
else:
operands['destination'] = result['operand1']
operands['destination'] = self.process_operand(result['operand2'])
# Check first operand
if 'operand1' in result:
if 'destination' in operands:
operands['sources'].insert(0, self.process_operand(result['operand1']))
else:
operands['destination'] = self.process_operand(result['operand1'])
return_dict = {
'instruction': result['mnemonic'],
'operands': operands,
@@ -170,6 +172,22 @@ class ParserX86ATT(BaseParser):
}
return return_dict
def process_operand(self, operand):
# For the moment, only used to structure memory addresses
if 'memory' in operand:
return self.substitute_memory_address(operand['memory'])
return operand
def substitute_memory_address(self, memory_address):
# remove unecessarily created dictionary entries
raise NotImplementedError
# Remove unecessarily created dictionary entries during memory address parsing
offset = None if 'offset' not in memory_address else memory_address['offset']
base = None if 'base' not in memory_address else memory_address['base']
index = None if 'index' not in memory_address else memory_address['index']
scale = '1' if 'scale' not in memory_address else memory_address['scale']
new_dict = {
'offset': offset,
'base': base,
'index': index,
'scale': scale,
}
return {'memory': new_dict}

View File

@@ -27,10 +27,12 @@ class TestParserX86ATT(unittest.TestCase):
def test_label_parser(self):
self.assertEqual(get_label(self.parser, 'main:')['name'], 'main')
self.assertEqual(get_label(self.parser, '..B1.10:')['name'], '.B1.10')
self.assertEqual(get_label(self.parser, '..B1.10:')['name'], '..B1.10')
self.assertEqual(get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3')
self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['name'], '.L1')
self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['comment'], 'label1')
self.assertEqual(
' '.join(get_label(self.parser, '.L1:\t\t\t#label1')['comment']), 'label1'
)
with self.assertRaises(ParseException):
get_label(self.parser, '\t.cfi_startproc')
@@ -39,7 +41,7 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(len(get_directive(self.parser, '\t.text')['parameters']), 0)
self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align')
self.assertEqual(len(get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2)
self.assertEqual(get_directive('\t.align\t16,0x90')['parameters'][1], '0x90')
self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90')
self.assertEqual(
get_directive(self.parser, ' .byte 100,103,144 #IACA START')['name'],
'byte',
@@ -51,7 +53,11 @@ class TestParserX86ATT(unittest.TestCase):
'144',
)
self.assertEqual(
get_directive(self.parser, ' .byte 100,103,144 #IACA START')['comment'],
' '.join(
get_directive(self.parser, ' .byte 100,103,144 #IACA START')[
'comment'
]
),
'IACA START',
)
@@ -60,11 +66,13 @@ class TestParserX86ATT(unittest.TestCase):
instr2 = 'jb ..B1.4 \t'
instr3 = ' movl $222,%ebx #IACA END'
instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9'
instr5 = 'mov %ebx, var(,1)'
parsed_1 = self.parser.parse_instruction(instr1)
parsed_2 = self.parser.parse_instruction(instr2)
parsed_3 = self.parser.parse_instruction(instr3)
parsed_4 = self.parser.parse_instruction(instr4)
parsed_5 = self.parser.parse_instruction(instr5)
self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss')
self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2')
@@ -72,7 +80,7 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(parsed_1['comment'], '12.27')
self.assertEqual(parsed_2['instruction'], 'jb')
self.assertEqual(parsed_2['operands']['destination'], '..B1.4')
self.assertEqual(parsed_2['operands']['destination']['identifier'], '..B1.4')
self.assertEqual(len(parsed_2['operands']['sources']), 0)
self.assertIsNone(parsed_2['comment'])
@@ -83,17 +91,24 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(parsed_4['instruction'], 'vmovss')
self.assertEqual(parsed_4['operands']['destination']['memory']['offset'], '-4')
self.assertEqual(parsed_4['operands']['destination']['memory']['base'], 'rsp')
self.assertEqual(parsed_4['operands']['destination']['memory']['index'], 'rax')
self.assertEqual(parsed_4['operands']['destination']['memory']['base']['name'], 'rsp')
self.assertEqual(parsed_4['operands']['destination']['memory']['index']['name'], 'rax')
self.assertEqual(parsed_4['operands']['destination']['memory']['scale'], '8')
self.assertEqual(parsed_4['operands']['sources'][0]['register']['name'], 'xmm4')
self.assertEqual(parsed_4['comment'], '12.9')
self.assertEqual(parsed_5['instruction'], 'mov')
self.assertEqual(parsed_5['operands']['destination']['memory']['offset'], 'var')
self.assertIsNone(parsed_5['operands']['destination']['memory']['base'])
self.assertIsNone(parsed_5['operands']['destination']['memory']['index'])
self.assertEqual(parsed_5['operands']['destination']['memory']['scale'], '1')
self.assertEqual(parsed_5['operands']['sources'][0]['register']['name'], 'ebx')
def test_parse_line(self):
line_comment = '# -- Begin main'
line_label = '..B1.7: # Preds ..B1.6'
line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed'
# line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
instruction_form_1 = {
'instruction': None,
@@ -107,7 +122,7 @@ class TestParserX86ATT(unittest.TestCase):
'instruction': None,
'operands': None,
'directive': None,
'comment': None,
'comment': 'Preds ..B1.6',
'label': '..B1.7',
'line_number': 2,
}
@@ -119,26 +134,36 @@ class TestParserX86ATT(unittest.TestCase):
'label': None,
'line_number': 3,
}
# TODO
# instruction_form_4 = {
# 'instruction': 'lea',
# 'operands': {'sources': {'memory': {'offset': '2', 'base': {'name': rax}, ''}}},
# 'directive': None,
# 'comment': '-- Begin main',
# 'label': None,
# 'line_number': 1,
# }
instruction_form_4 = {
'instruction': 'lea',
'operands': {
'sources': [
{
'memory': {
'offset': '2',
'base': {'name': 'rax'},
'index': {'name': 'rax'},
'scale': '1',
}
}
],
'destination': {'register': {'name': 'ecx'}},
},
'directive': None,
'comment': '-- Begin main',
'label': None,
'line_number': 1,
}
parsed_1 = self.parser.parse_line(line_comment, 1)
parsed_2 = self.parser.parse_line(line_label, 2)
parsed_3 = self.parser.parse_line(line_directive, 3)
# TODO parsed_4
# parsed_4 = self.parser.parse_line(line_instruction, 4)
parsed_4 = self.parser.parse_line(line_instruction, 4)
self.assertEqual(parsed_1, instruction_form_1)
self.assertEqual(parsed_2, instruction_form_2)
self.assertEqual(parsed_3, instruction_form_3)
# self.assertEqual(parsed_4, instruction_form_4)
self.assertEqual(parsed_4['operands'], instruction_form_4['operands'])
##################
@@ -149,8 +174,8 @@ def get_comment(parser, comment):
def get_label(parser, label):
return parser.label.parseString(label, parseAll=True).asDict()
return parser.label.parseString(label, parseAll=True).asDict()['label']
def get_directive(parser, directive):
return parser.directive.parseString(directive, parseAll=True).asDict()
return parser.directive.parseString(directive, parseAll=True).asDict()['directive']