added tests and functionalities for x86_att parser

This commit is contained in:
JanLJL
2019-04-30 18:37:07 +02:00
parent bc9b380429
commit 38bbf2712b
4 changed files with 205 additions and 45 deletions

View File

@@ -3,6 +3,7 @@
class BaseParser(object):
# Identifiers for operand types
COMMENT_ID = 'comment'
DIRECTIVE_ID = 'directive'
IMMEDIATE_ID = 'immediate'
LABEL_ID = 'label'

View File

@@ -2,7 +2,7 @@
import pyparsing as pp
from .parser import BaseParser
from .base_parser import BaseParser
class ParserX86ATT(BaseParser):
@@ -14,7 +14,7 @@ class ParserX86ATT(BaseParser):
symbol_comment = '#'
self.comment = pp.Literal(symbol_comment) + pp.Group(
pp.ZeroOrMore(pp.Word(pp.printables))
).setResultsName('comment')
).setResultsName(self.COMMENT_ID)
# Define x86 assembly identifier
first = pp.Word(pp.alphas + '_.', exact=1)
rest = pp.Word(pp.alphanums + '_.')
@@ -24,15 +24,13 @@ class ParserX86ATT(BaseParser):
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
# Directive
commaSeparatedList = pp.delimitedList(
pp.Optional(pp.quotedString | pp.Word(pp.alphanums)), delim=','
)
commaSeparatedList = pp.delimitedList(pp.Optional(pp.quotedString | identifier), delim=',')
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
+ commaSeparatedList.setResultsName('parameters')
+ pp.Optional(self.comment)
).setResultsName(self.DIRECTIVE_LABEL)
).setResultsName(self.DIRECTIVE_ID)
##############################
# Instructions
@@ -77,7 +75,7 @@ class ParserX86ATT(BaseParser):
+ pp.Optional(self.comment)
).setResultsName(self.MEMORY_ID)
# Combine to instruction form
operand1 = pp.Group(register ^ immediate ^ memory ^ self.label).setResultsName('operand1')
operand1 = pp.Group(register ^ immediate ^ memory ^ identifier).setResultsName('operand1')
operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2')
operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3')
self.instruction_parser = (
@@ -99,69 +97,79 @@ class ParserX86ATT(BaseParser):
instruction_form = {
'instruction': None,
'operands': None,
'directive': None,
'comment': None,
'label_name': None,
'id': line_number,
'label': None,
'line_number': line_number,
}
result = None
# 1. Parse comment
try:
result = self.comment.parseString(line, parseAll=True)
instruction_form['comment'] = result['comment'].join(' ')
result = self.comment.parseString(line, parseAll=True).asDict()
instruction_form['comment'] = ' '.join(result[self.COMMENT_ID])
except pp.ParseException:
pass
# 2. Parse label
if result is None:
try:
result = self.label.parseString(line, parseAll=True)
instruction_form['comment'] = result['comment'].join(' ')
instruction_form['label_name'] = result['label_name']
result = self.label.parseString(line, parseAll=True).asDict()
instruction_form['label'] = result[self.LABEL_ID]['name']
if self.COMMENT_ID in result[self.LABEL_ID]:
instruction_form['comment'] = ' '.join(result[self.COMMENT_ID])
except pp.ParseException:
pass
# 3. Parse directive
# TODO
if result is None:
try:
result = self.directive.parseString(line, parseAll=True).asDict()
instruction_form['directive']['name'] = result[self.DIRECTIVE_ID]['name']
instruction_form['directive']['parameters'] = result[self.DIRECTIVE_ID][
'parameters'
]
if self.COMMENT_ID in result[self.DIRECTIVE_ID]:
instruction_form['comment'] = ' '.join(
result[self.DIRECTIVE_ID][self.COMMENT_ID]
)
except pp.ParseException:
pass
# 4. Parse instruction
if result is None:
result = self.parse_instruction(line)
# TODO
instruction_form['instruction'] = result['instruction']
instruction_form['operands'] = result['operands']
instruction_form['comment'] = result['comment'].join(' ')
instruction_form['comment'] = result['comment']
return instruction_form
def parse_instruction(self, instruction):
result = self.instruction_parser.parseString(instruction, parseAll=True)
# Check first operand
# Check for register
if self.REGISTER_ID in result['operand1']:
# TODO
pass
# Check for immediate
elif self.IMMEDIATE_ID in result['operand1']:
# TODO
pass
# Check for memory address
elif self.MEMORY_ID in result['operand1']:
# TODO
pass
# Check for label
elif self.LABEL_ID in result['operand1']:
# TODO
pass
# Check second operand
if 'operand2' in result:
# if('reg' in op2): ...
# TODO
pass
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
operands = {'sources': []}
# Check from right to left
# Check third operand
if 'operand3' in result:
# TODO
pass
return result
operands['destination'] = result['operand3']
# Check second operand
if 'operand2' in result:
if 'destination' in operands:
operands['sources'].insert(0, result['operand2'])
else:
operands['destination'] = result['operand2']
# Add first operand
if 'destination' in operands:
operands['sources'].insert(0, result['operand1'])
else:
operands['destination'] = result['operand1']
return_dict = {
'instruction': result['mnemonic'],
'operands': operands,
'comment': ' '.join(result['comment']) if 'comment' in result else None,
}
return return_dict
def substitute_memory_address(self, memory_address):
# remove unecessarily created dictionary entries
raise NotImplementedError

View File

@@ -6,6 +6,7 @@ import unittest
sys.path[0:0] = ['.', '..']
suite = unittest.TestLoader().loadTestsFromNames(
[
'test_parser_x86_att'
]
)

150
tests/test_parser_x86att.py Normal file
View File

@@ -0,0 +1,150 @@
#!/usr/bin/env python3
"""
Unit tests for x86 AT&T assembly parser
"""
import unittest
from pyparsing import ParseException
from osaca.parser import ParserX86ATT
class TestParserX86ATT(unittest.TestCase):
def setUp(self):
self.parser = ParserX86ATT()
##################
# Test
##################
def test_comment_parser(self):
self.assertEqual(self.get_comment('# some comments'), 'some comments')
self.assertEqual(self.get_comment('\t\t#AA BB CC \t end \t'), 'AA BB CC end')
self.assertEqual(self.get_comment('\t## comment ## comment'), '# comment ## comment')
def test_label_parser(self):
self.assertEqual(self.get_label('main:')['name'], 'main')
self.assertEqual(self.get_label('..B1.10:')['name'], '.B1.10')
self.assertEqual(self.get_label('.2.3_2_pack.3:')['name'], '.2.3_2_pack.3')
self.assertEqual(self.get_label('.L1:\t\t\t#label1')['name'], '.L1')
self.assertEqual(self.get_label('.L1:\t\t\t#label1')['comment'], 'label1')
with self.assertRaises(ParseException):
self.get_label('\t.cfi_startproc')
def test_directive_parser(self):
self.assertEqual(self.get_directive('\t.text')['name'], 'text')
self.assertEqual(len(self.get_directive('\t.text')['parameters']), 0)
self.assertEqual(self.get_directive('\t.align\t16,0x90')['name'], 'align')
self.assertEqual(len(self.get_directive('\t.align\t16,0x90')['parameters']), 2)
self.assertEqual(self.get_directive('\t.align\t16,0x90')['parameters'][1], '0x90')
self.assertEqual(
self.get_directive(' .byte 100,103,144 #IACA START')['name'], 'byte'
)
self.assertEqual(
self.get_directive(' .byte 100,103,144 #IACA START')['parameters'][2],
'144',
)
self.assertEqual(
self.get_directive(' .byte 100,103,144 #IACA START')['comment'],
'IACA START',
)
def test_parse_instruciton(self):
instr1 = '\t\tvcvtsi2ss %edx, %xmm2, %xmm2\t\t\t#12.27'
instr2 = 'jb ..B1.4 \t'
instr3 = ' movl $222,%ebx #IACA END'
instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9'
parsed_1 = self.parser.parse_instruction(instr1)
parsed_2 = self.parser.parse_instruction(instr2)
parsed_3 = self.parser.parse_instruction(instr3)
parsed_4 = self.parser.parse_instruction(instr4)
self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss')
self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2')
self.assertEqual(parsed_1['operands']['sources'][0]['register']['name'], 'edx')
self.assertEqual(parsed_1['comment'], '12.27')
self.assertEqual(parsed_2['instruction'], 'jb')
self.assertEqual(parsed_2['operands']['destination'], '..B1.4')
self.assertEqual(len(parsed_2['operands']['sources']), 0)
self.assertIsNone(parsed_2['comment'])
self.assertEqual(parsed_3['instruction'], 'movl')
self.assertEqual(parsed_3['operands']['destination']['register']['name'], 'ebx')
self.assertEqual(parsed_3['operands']['sources'][0]['immediate']['value'], '222')
self.assertEqual(parsed_3['comment'], 'IACA END')
self.assertEqual(parsed_4['instruction'], 'vmovss')
self.assertEqual(parsed_4['operands']['destination']['memory']['offset'], '-4')
self.assertEqual(parsed_4['operands']['destination']['memory']['base'], 'rsp')
self.assertEqual(parsed_4['operands']['destination']['memory']['index'], 'rax')
self.assertEqual(parsed_4['operands']['destination']['memory']['scale'], '8')
self.assertEqual(parsed_4['operands']['sources'][0]['register']['name'], 'xmm4')
self.assertEqual(parsed_4['comment'], '12.9')
def test_parse_line(self):
line_comment = '# -- Begin main'
line_label = '..B1.7: # Preds ..B1.6'
line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed'
# line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
instruction_form_1 = {
'instruction': None,
'operands': None,
'directive': None,
'comment': '-- Begin main',
'label': None,
'line_number': 1,
}
instruction_form_2 = {
'instruction': None,
'operands': None,
'directive': None,
'comment': None,
'label': '..B1.7',
'line_number': 2,
}
instruction_form_3 = {
'instruction': None,
'operands': None,
'directive': {'name': 'quad', 'parameters': ['.2.3_2__kmpc_loc_pack.2']},
'comment': 'qed',
'label': None,
'line_number': 3,
}
# TODO
# instruction_form_4 = {
# 'instruction': 'lea',
# 'operands': {'sources': {'memory': {'offset': '2', 'base': {'name': rax}, ''}}},
# 'directive': None,
# 'comment': '-- Begin main',
# 'label': None,
# 'line_number': 1,
# }
parsed_1 = self.parser.parse_line(line_comment, 1)
parsed_2 = self.parser.parse_line(line_label, 2)
parsed_3 = self.parser.parse_line(line_directive, 3)
# TODO parsed_4
# parsed_4 = self.parser.parse_line(line_instruction, 4)
self.assertEqual(parsed_1, instruction_form_1)
self.assertEqual(parsed_2, instruction_form_2)
self.assertEqual(parsed_3, instruction_form_3)
# self.assertEqual(parsed_4, instruction_form_4)
##################
# Helper functions
##################
def get_comment(self, comment):
return ' '.join(
self.parser.comment.parseString(comment, parseAll=True).asDict()['comment']
)
def get_label(self, label):
return self.parser.label.parseString(label, parseAll=True).asDict()
def get_directive(self, directive):
return self.parser.directive.parseString(directive, parseAll=True).asDict()