initial parser structure added

This commit is contained in:
JanLJL
2019-04-30 14:06:24 +02:00
parent 9e3aaa7336
commit 2d32b3a92a
5 changed files with 245 additions and 0 deletions

View File

@@ -20,6 +20,9 @@ analysis and throughput prediction for a innermost loop kernel.
:target: https://landscape.io/github/RRZE-HPC/OSACA/master
:alt: Code Health
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
:target: https://github.com/ambv/black
Getting started
===============

9
osaca/parser/__init__.py Normal file
View File

@@ -0,0 +1,9 @@
"""
Collection of parsers supported by OSACA.
Only the parser below will be exported, so please add new parsers to __all__.
"""
from .parser_x86att import ParserX86ATT
from .parser_ARMv81 import ParserARMv81
__all__ = ['ParserX86ATT', 'ParserARMv81']

55
osaca/parser/base_parser.py Executable file
View File

@@ -0,0 +1,55 @@
#!usr/bin/env python3
class BaseParser(object):
# Identifiers for operand types
DIRECTIVE_ID = 'directive'
IMMEDIATE_ID = 'immediate'
LABEL_ID = 'label'
MEMORY_ID = 'memory'
REGISTER_ID = 'register'
def __init__(self):
self.construct_parser()
def parse_file(self, file_content):
'''
Parse assembly file. This includes extracting of the marked kernel and
the parsing of the instruction forms.
:param str file_content: assembly code
:return: list of instruction forms
:raises ValueError: if the marker_type attribute is unknown by the
function
'''
# Create instruction form list
asm_instructions = []
lines = file_content.split('\n')
for i, line in enumerate(lines):
if line == '':
continue
asm_instructions.append(self.parseLine(line, i + 1))
return asm_instructions
def parse_line(self, line, line_number):
# Done in derived classes
raise NotImplementedError()
def parse_instruction(self, instruction):
# Done in derived classes
raise NotImplementedError()
def parse_register(self, register):
# Done in derived classed
raise NotImplementedError()
def parse_memory(self, memory_address):
# Done in derived classed
raise NotImplementedError()
def parse_immediate(self, immediate):
# Done in derived classed
raise NotImplementedError()
def construct_parser(self):
raise NotImplementedError()

11
osaca/parser/parser_ARMv81.py Executable file
View File

@@ -0,0 +1,11 @@
#!/usr/bin/env python3
from .parser import Parser
class ParserARMv81(Parser):
# TODO
def __init__(self):
# TODO
raise NotImplementedError

167
osaca/parser/parser_x86att.py Executable file
View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
import pyparsing as pp
from .parser import BaseParser
class ParserX86ATT(BaseParser):
def __init__(self):
super().__init__()
def construct_parser(self):
# Comment
symbol_comment = '#'
self.comment = pp.Literal(symbol_comment) + pp.Group(
pp.ZeroOrMore(pp.Word(pp.printables))
).setResultsName('comment')
# Define x86 assembly identifier
first = pp.Word(pp.alphas + '_.', exact=1)
rest = pp.Word(pp.alphanums + '_.')
identifier = pp.Combine(first + pp.Optional(rest))
# Label
self.label = pp.Group(
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
# Directive
commaSeparatedList = pp.delimitedList(
pp.Optional(pp.quotedString | pp.Word(pp.alphanums)), delim=','
)
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
+ commaSeparatedList.setResultsName('parameters')
+ pp.Optional(self.comment)
).setResultsName(self.DIRECTIVE_LABEL)
##############################
# Instructions
# Mnemonic
mnemonic = pp.ZeroOrMore(pp.Literal('data16') ^ pp.Literal('data32')) + pp.Word(
pp.alphanums
)
# Register: pp.Regex('^%[0-9a-zA-Z]+,?')
register = pp.Group(
pp.Literal('%')
+ pp.Word(pp.alphanums).setResultsName('name')
+ pp.Optional(
pp.Literal('{')
+ pp.Literal('%')
+ pp.Word(pp.alphanums).setResultsName('mask')
+ pp.Literal('}')
)
+ pp.Optional(pp.Suppress(pp.Literal(',')))
).setResultsName(self.REGISTER_ID)
# Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?')
symbol_immediate = '$'
decimal_number = pp.Combine(
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
).setResultsName('value')
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
immediate = pp.Group(
pp.Literal(symbol_immediate)
+ (decimal_number ^ hex_number)
+ pp.Optional(pp.Suppress(pp.Literal(',')))
).setResultsName(self.IMMEDIATE_ID)
# Memory: offset(base, index, scale)
offset = decimal_number ^ hex_number
scale = pp.Word('1248', exact=1)
memory = pp.Group(
pp.Optional(offset.setResultsName('offset'))
+ pp.Literal('(')
+ register.setResultsName('base')
+ pp.Optional(register.setResultsName('index'))
+ pp.Optional(scale.setResultsName('scale'))
+ pp.Literal(')')
+ pp.Optional(pp.Suppress(pp.Literal(',')))
+ pp.Optional(self.comment)
).setResultsName(self.MEMORY_ID)
# Combine to instruction form
operand1 = pp.Group(register ^ immediate ^ memory ^ self.label).setResultsName('operand1')
operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2')
operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3')
self.instruction_parser = (
mnemonic.setResultsName('mnemonic')
+ operand1
+ pp.Optional(operand2)
+ pp.Optional(operand3)
+ pp.Optional(self.comment)
)
def parse_line(self, line, line_number=None):
"""
Parse line and return instruction form.
:param str line: line of assembly code
:param int line_id: default None, identifier of instruction form
:return: parsed instruction form
"""
instruction_form = {
'instruction': None,
'operands': None,
'comment': None,
'label_name': None,
'id': line_number,
}
result = None
# 1. Parse comment
try:
result = self.comment.parseString(line, parseAll=True)
instruction_form['comment'] = result['comment'].join(' ')
except pp.ParseException:
pass
# 2. Parse label
if result is None:
try:
result = self.label.parseString(line, parseAll=True)
instruction_form['comment'] = result['comment'].join(' ')
instruction_form['label_name'] = result['label_name']
except pp.ParseException:
pass
# 3. Parse directive
# TODO
# 4. Parse instruction
if result is None:
result = self.parse_instruction(line)
# TODO
instruction_form['instruction'] = result['instruction']
instruction_form['operands'] = result['operands']
instruction_form['comment'] = result['comment'].join(' ')
return instruction_form
def parse_instruction(self, instruction):
result = self.instruction_parser.parseString(instruction, parseAll=True)
# Check first operand
# Check for register
if self.REGISTER_ID in result['operand1']:
# TODO
pass
# Check for immediate
elif self.IMMEDIATE_ID in result['operand1']:
# TODO
pass
# Check for memory address
elif self.MEMORY_ID in result['operand1']:
# TODO
pass
# Check for label
elif self.LABEL_ID in result['operand1']:
# TODO
pass
# Check second operand
if 'operand2' in result:
# if('reg' in op2): ...
# TODO
pass
# Check third operand
if 'operand3' in result:
# TODO
pass
return result