mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
added tests and functionalities for x86_att parser
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
|
||||
class BaseParser(object):
|
||||
# Identifiers for operand types
|
||||
COMMENT_ID = 'comment'
|
||||
DIRECTIVE_ID = 'directive'
|
||||
IMMEDIATE_ID = 'immediate'
|
||||
LABEL_ID = 'label'
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import pyparsing as pp
|
||||
|
||||
from .parser import BaseParser
|
||||
from .base_parser import BaseParser
|
||||
|
||||
|
||||
class ParserX86ATT(BaseParser):
|
||||
@@ -14,7 +14,7 @@ class ParserX86ATT(BaseParser):
|
||||
symbol_comment = '#'
|
||||
self.comment = pp.Literal(symbol_comment) + pp.Group(
|
||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||
).setResultsName('comment')
|
||||
).setResultsName(self.COMMENT_ID)
|
||||
# Define x86 assembly identifier
|
||||
first = pp.Word(pp.alphas + '_.', exact=1)
|
||||
rest = pp.Word(pp.alphanums + '_.')
|
||||
@@ -24,15 +24,13 @@ class ParserX86ATT(BaseParser):
|
||||
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
|
||||
).setResultsName(self.LABEL_ID)
|
||||
# Directive
|
||||
commaSeparatedList = pp.delimitedList(
|
||||
pp.Optional(pp.quotedString | pp.Word(pp.alphanums)), delim=','
|
||||
)
|
||||
commaSeparatedList = pp.delimitedList(pp.Optional(pp.quotedString | identifier), delim=',')
|
||||
self.directive = pp.Group(
|
||||
pp.Literal('.')
|
||||
+ pp.Word(pp.alphanums + '_').setResultsName('name')
|
||||
+ commaSeparatedList.setResultsName('parameters')
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.DIRECTIVE_LABEL)
|
||||
).setResultsName(self.DIRECTIVE_ID)
|
||||
|
||||
##############################
|
||||
# Instructions
|
||||
@@ -77,7 +75,7 @@ class ParserX86ATT(BaseParser):
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.MEMORY_ID)
|
||||
# Combine to instruction form
|
||||
operand1 = pp.Group(register ^ immediate ^ memory ^ self.label).setResultsName('operand1')
|
||||
operand1 = pp.Group(register ^ immediate ^ memory ^ identifier).setResultsName('operand1')
|
||||
operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2')
|
||||
operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3')
|
||||
self.instruction_parser = (
|
||||
@@ -99,69 +97,79 @@ class ParserX86ATT(BaseParser):
|
||||
instruction_form = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': None,
|
||||
'comment': None,
|
||||
'label_name': None,
|
||||
'id': line_number,
|
||||
'label': None,
|
||||
'line_number': line_number,
|
||||
}
|
||||
result = None
|
||||
|
||||
# 1. Parse comment
|
||||
try:
|
||||
result = self.comment.parseString(line, parseAll=True)
|
||||
instruction_form['comment'] = result['comment'].join(' ')
|
||||
result = self.comment.parseString(line, parseAll=True).asDict()
|
||||
instruction_form['comment'] = ' '.join(result[self.COMMENT_ID])
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 2. Parse label
|
||||
if result is None:
|
||||
try:
|
||||
result = self.label.parseString(line, parseAll=True)
|
||||
instruction_form['comment'] = result['comment'].join(' ')
|
||||
instruction_form['label_name'] = result['label_name']
|
||||
result = self.label.parseString(line, parseAll=True).asDict()
|
||||
instruction_form['label'] = result[self.LABEL_ID]['name']
|
||||
if self.COMMENT_ID in result[self.LABEL_ID]:
|
||||
instruction_form['comment'] = ' '.join(result[self.COMMENT_ID])
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 3. Parse directive
|
||||
# TODO
|
||||
if result is None:
|
||||
try:
|
||||
result = self.directive.parseString(line, parseAll=True).asDict()
|
||||
instruction_form['directive']['name'] = result[self.DIRECTIVE_ID]['name']
|
||||
instruction_form['directive']['parameters'] = result[self.DIRECTIVE_ID][
|
||||
'parameters'
|
||||
]
|
||||
if self.COMMENT_ID in result[self.DIRECTIVE_ID]:
|
||||
instruction_form['comment'] = ' '.join(
|
||||
result[self.DIRECTIVE_ID][self.COMMENT_ID]
|
||||
)
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 4. Parse instruction
|
||||
if result is None:
|
||||
result = self.parse_instruction(line)
|
||||
# TODO
|
||||
instruction_form['instruction'] = result['instruction']
|
||||
instruction_form['operands'] = result['operands']
|
||||
instruction_form['comment'] = result['comment'].join(' ')
|
||||
instruction_form['comment'] = result['comment']
|
||||
|
||||
return instruction_form
|
||||
|
||||
def parse_instruction(self, instruction):
|
||||
result = self.instruction_parser.parseString(instruction, parseAll=True)
|
||||
# Check first operand
|
||||
# Check for register
|
||||
if self.REGISTER_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
# Check for immediate
|
||||
elif self.IMMEDIATE_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
# Check for memory address
|
||||
elif self.MEMORY_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
# Check for label
|
||||
elif self.LABEL_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
|
||||
# Check second operand
|
||||
if 'operand2' in result:
|
||||
# if('reg' in op2): ...
|
||||
# TODO
|
||||
pass
|
||||
|
||||
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
|
||||
operands = {'sources': []}
|
||||
# Check from right to left
|
||||
# Check third operand
|
||||
if 'operand3' in result:
|
||||
# TODO
|
||||
pass
|
||||
return result
|
||||
operands['destination'] = result['operand3']
|
||||
# Check second operand
|
||||
if 'operand2' in result:
|
||||
if 'destination' in operands:
|
||||
operands['sources'].insert(0, result['operand2'])
|
||||
else:
|
||||
operands['destination'] = result['operand2']
|
||||
# Add first operand
|
||||
if 'destination' in operands:
|
||||
operands['sources'].insert(0, result['operand1'])
|
||||
else:
|
||||
operands['destination'] = result['operand1']
|
||||
return_dict = {
|
||||
'instruction': result['mnemonic'],
|
||||
'operands': operands,
|
||||
'comment': ' '.join(result['comment']) if 'comment' in result else None,
|
||||
}
|
||||
return return_dict
|
||||
|
||||
def substitute_memory_address(self, memory_address):
|
||||
# remove unecessarily created dictionary entries
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -6,6 +6,7 @@ import unittest
|
||||
sys.path[0:0] = ['.', '..']
|
||||
suite = unittest.TestLoader().loadTestsFromNames(
|
||||
[
|
||||
'test_parser_x86_att'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
150
tests/test_parser_x86att.py
Normal file
150
tests/test_parser_x86att.py
Normal file
@@ -0,0 +1,150 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for x86 AT&T assembly parser
|
||||
"""
|
||||
|
||||
import unittest
|
||||
|
||||
from pyparsing import ParseException
|
||||
|
||||
from osaca.parser import ParserX86ATT
|
||||
|
||||
|
||||
class TestParserX86ATT(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.parser = ParserX86ATT()
|
||||
|
||||
##################
|
||||
# Test
|
||||
##################
|
||||
|
||||
def test_comment_parser(self):
|
||||
self.assertEqual(self.get_comment('# some comments'), 'some comments')
|
||||
self.assertEqual(self.get_comment('\t\t#AA BB CC \t end \t'), 'AA BB CC end')
|
||||
self.assertEqual(self.get_comment('\t## comment ## comment'), '# comment ## comment')
|
||||
|
||||
def test_label_parser(self):
|
||||
self.assertEqual(self.get_label('main:')['name'], 'main')
|
||||
self.assertEqual(self.get_label('..B1.10:')['name'], '.B1.10')
|
||||
self.assertEqual(self.get_label('.2.3_2_pack.3:')['name'], '.2.3_2_pack.3')
|
||||
self.assertEqual(self.get_label('.L1:\t\t\t#label1')['name'], '.L1')
|
||||
self.assertEqual(self.get_label('.L1:\t\t\t#label1')['comment'], 'label1')
|
||||
with self.assertRaises(ParseException):
|
||||
self.get_label('\t.cfi_startproc')
|
||||
|
||||
def test_directive_parser(self):
|
||||
self.assertEqual(self.get_directive('\t.text')['name'], 'text')
|
||||
self.assertEqual(len(self.get_directive('\t.text')['parameters']), 0)
|
||||
self.assertEqual(self.get_directive('\t.align\t16,0x90')['name'], 'align')
|
||||
self.assertEqual(len(self.get_directive('\t.align\t16,0x90')['parameters']), 2)
|
||||
self.assertEqual(self.get_directive('\t.align\t16,0x90')['parameters'][1], '0x90')
|
||||
self.assertEqual(
|
||||
self.get_directive(' .byte 100,103,144 #IACA START')['name'], 'byte'
|
||||
)
|
||||
self.assertEqual(
|
||||
self.get_directive(' .byte 100,103,144 #IACA START')['parameters'][2],
|
||||
'144',
|
||||
)
|
||||
self.assertEqual(
|
||||
self.get_directive(' .byte 100,103,144 #IACA START')['comment'],
|
||||
'IACA START',
|
||||
)
|
||||
|
||||
def test_parse_instruciton(self):
|
||||
instr1 = '\t\tvcvtsi2ss %edx, %xmm2, %xmm2\t\t\t#12.27'
|
||||
instr2 = 'jb ..B1.4 \t'
|
||||
instr3 = ' movl $222,%ebx #IACA END'
|
||||
instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9'
|
||||
|
||||
parsed_1 = self.parser.parse_instruction(instr1)
|
||||
parsed_2 = self.parser.parse_instruction(instr2)
|
||||
parsed_3 = self.parser.parse_instruction(instr3)
|
||||
parsed_4 = self.parser.parse_instruction(instr4)
|
||||
|
||||
self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss')
|
||||
self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2')
|
||||
self.assertEqual(parsed_1['operands']['sources'][0]['register']['name'], 'edx')
|
||||
self.assertEqual(parsed_1['comment'], '12.27')
|
||||
|
||||
self.assertEqual(parsed_2['instruction'], 'jb')
|
||||
self.assertEqual(parsed_2['operands']['destination'], '..B1.4')
|
||||
self.assertEqual(len(parsed_2['operands']['sources']), 0)
|
||||
self.assertIsNone(parsed_2['comment'])
|
||||
|
||||
self.assertEqual(parsed_3['instruction'], 'movl')
|
||||
self.assertEqual(parsed_3['operands']['destination']['register']['name'], 'ebx')
|
||||
self.assertEqual(parsed_3['operands']['sources'][0]['immediate']['value'], '222')
|
||||
self.assertEqual(parsed_3['comment'], 'IACA END')
|
||||
|
||||
self.assertEqual(parsed_4['instruction'], 'vmovss')
|
||||
self.assertEqual(parsed_4['operands']['destination']['memory']['offset'], '-4')
|
||||
self.assertEqual(parsed_4['operands']['destination']['memory']['base'], 'rsp')
|
||||
self.assertEqual(parsed_4['operands']['destination']['memory']['index'], 'rax')
|
||||
self.assertEqual(parsed_4['operands']['destination']['memory']['scale'], '8')
|
||||
self.assertEqual(parsed_4['operands']['sources'][0]['register']['name'], 'xmm4')
|
||||
self.assertEqual(parsed_4['comment'], '12.9')
|
||||
|
||||
def test_parse_line(self):
|
||||
line_comment = '# -- Begin main'
|
||||
line_label = '..B1.7: # Preds ..B1.6'
|
||||
line_directive = '\t\t.quad .2.3_2__kmpc_loc_pack.2 #qed'
|
||||
# line_instruction = '\t\tlea 2(%rax,%rax), %ecx #12.9'
|
||||
|
||||
instruction_form_1 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': None,
|
||||
'comment': '-- Begin main',
|
||||
'label': None,
|
||||
'line_number': 1,
|
||||
}
|
||||
instruction_form_2 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': None,
|
||||
'comment': None,
|
||||
'label': '..B1.7',
|
||||
'line_number': 2,
|
||||
}
|
||||
instruction_form_3 = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'directive': {'name': 'quad', 'parameters': ['.2.3_2__kmpc_loc_pack.2']},
|
||||
'comment': 'qed',
|
||||
'label': None,
|
||||
'line_number': 3,
|
||||
}
|
||||
# TODO
|
||||
# instruction_form_4 = {
|
||||
# 'instruction': 'lea',
|
||||
# 'operands': {'sources': {'memory': {'offset': '2', 'base': {'name': rax}, ''}}},
|
||||
# 'directive': None,
|
||||
# 'comment': '-- Begin main',
|
||||
# 'label': None,
|
||||
# 'line_number': 1,
|
||||
# }
|
||||
|
||||
parsed_1 = self.parser.parse_line(line_comment, 1)
|
||||
parsed_2 = self.parser.parse_line(line_label, 2)
|
||||
parsed_3 = self.parser.parse_line(line_directive, 3)
|
||||
# TODO parsed_4
|
||||
# parsed_4 = self.parser.parse_line(line_instruction, 4)
|
||||
|
||||
self.assertEqual(parsed_1, instruction_form_1)
|
||||
self.assertEqual(parsed_2, instruction_form_2)
|
||||
self.assertEqual(parsed_3, instruction_form_3)
|
||||
# self.assertEqual(parsed_4, instruction_form_4)
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
def get_comment(self, comment):
|
||||
return ' '.join(
|
||||
self.parser.comment.parseString(comment, parseAll=True).asDict()['comment']
|
||||
)
|
||||
|
||||
def get_label(self, label):
|
||||
return self.parser.label.parseString(label, parseAll=True).asDict()
|
||||
|
||||
def get_directive(self, directive):
|
||||
return self.parser.directive.parseString(directive, parseAll=True).asDict()
|
||||
Reference in New Issue
Block a user