mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-15 16:40:05 +01:00
initial parser structure added
This commit is contained in:
@@ -20,6 +20,9 @@ analysis and throughput prediction for a innermost loop kernel.
|
||||
:target: https://landscape.io/github/RRZE-HPC/OSACA/master
|
||||
:alt: Code Health
|
||||
|
||||
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
|
||||
:target: https://github.com/ambv/black
|
||||
|
||||
Getting started
|
||||
===============
|
||||
|
||||
|
||||
9
osaca/parser/__init__.py
Normal file
9
osaca/parser/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
Collection of parsers supported by OSACA.
|
||||
|
||||
Only the parser below will be exported, so please add new parsers to __all__.
|
||||
"""
|
||||
from .parser_x86att import ParserX86ATT
|
||||
from .parser_ARMv81 import ParserARMv81
|
||||
|
||||
__all__ = ['ParserX86ATT', 'ParserARMv81']
|
||||
55
osaca/parser/base_parser.py
Executable file
55
osaca/parser/base_parser.py
Executable file
@@ -0,0 +1,55 @@
|
||||
#!usr/bin/env python3
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
# Identifiers for operand types
|
||||
DIRECTIVE_ID = 'directive'
|
||||
IMMEDIATE_ID = 'immediate'
|
||||
LABEL_ID = 'label'
|
||||
MEMORY_ID = 'memory'
|
||||
REGISTER_ID = 'register'
|
||||
|
||||
def __init__(self):
|
||||
self.construct_parser()
|
||||
|
||||
def parse_file(self, file_content):
|
||||
'''
|
||||
Parse assembly file. This includes extracting of the marked kernel and
|
||||
the parsing of the instruction forms.
|
||||
|
||||
:param str file_content: assembly code
|
||||
:return: list of instruction forms
|
||||
:raises ValueError: if the marker_type attribute is unknown by the
|
||||
function
|
||||
'''
|
||||
# Create instruction form list
|
||||
asm_instructions = []
|
||||
lines = file_content.split('\n')
|
||||
for i, line in enumerate(lines):
|
||||
if line == '':
|
||||
continue
|
||||
asm_instructions.append(self.parseLine(line, i + 1))
|
||||
return asm_instructions
|
||||
|
||||
def parse_line(self, line, line_number):
|
||||
# Done in derived classes
|
||||
raise NotImplementedError()
|
||||
|
||||
def parse_instruction(self, instruction):
|
||||
# Done in derived classes
|
||||
raise NotImplementedError()
|
||||
|
||||
def parse_register(self, register):
|
||||
# Done in derived classed
|
||||
raise NotImplementedError()
|
||||
|
||||
def parse_memory(self, memory_address):
|
||||
# Done in derived classed
|
||||
raise NotImplementedError()
|
||||
|
||||
def parse_immediate(self, immediate):
|
||||
# Done in derived classed
|
||||
raise NotImplementedError()
|
||||
|
||||
def construct_parser(self):
|
||||
raise NotImplementedError()
|
||||
11
osaca/parser/parser_ARMv81.py
Executable file
11
osaca/parser/parser_ARMv81.py
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
class ParserARMv81(Parser):
|
||||
# TODO
|
||||
|
||||
def __init__(self):
|
||||
# TODO
|
||||
raise NotImplementedError
|
||||
167
osaca/parser/parser_x86att.py
Executable file
167
osaca/parser/parser_x86att.py
Executable file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import pyparsing as pp
|
||||
|
||||
from .parser import BaseParser
|
||||
|
||||
|
||||
class ParserX86ATT(BaseParser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def construct_parser(self):
|
||||
# Comment
|
||||
symbol_comment = '#'
|
||||
self.comment = pp.Literal(symbol_comment) + pp.Group(
|
||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||
).setResultsName('comment')
|
||||
# Define x86 assembly identifier
|
||||
first = pp.Word(pp.alphas + '_.', exact=1)
|
||||
rest = pp.Word(pp.alphanums + '_.')
|
||||
identifier = pp.Combine(first + pp.Optional(rest))
|
||||
# Label
|
||||
self.label = pp.Group(
|
||||
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
|
||||
).setResultsName(self.LABEL_ID)
|
||||
# Directive
|
||||
commaSeparatedList = pp.delimitedList(
|
||||
pp.Optional(pp.quotedString | pp.Word(pp.alphanums)), delim=','
|
||||
)
|
||||
self.directive = pp.Group(
|
||||
pp.Literal('.')
|
||||
+ pp.Word(pp.alphanums + '_').setResultsName('name')
|
||||
+ commaSeparatedList.setResultsName('parameters')
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.DIRECTIVE_LABEL)
|
||||
|
||||
##############################
|
||||
# Instructions
|
||||
# Mnemonic
|
||||
mnemonic = pp.ZeroOrMore(pp.Literal('data16') ^ pp.Literal('data32')) + pp.Word(
|
||||
pp.alphanums
|
||||
)
|
||||
# Register: pp.Regex('^%[0-9a-zA-Z]+,?')
|
||||
register = pp.Group(
|
||||
pp.Literal('%')
|
||||
+ pp.Word(pp.alphanums).setResultsName('name')
|
||||
+ pp.Optional(
|
||||
pp.Literal('{')
|
||||
+ pp.Literal('%')
|
||||
+ pp.Word(pp.alphanums).setResultsName('mask')
|
||||
+ pp.Literal('}')
|
||||
)
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
).setResultsName(self.REGISTER_ID)
|
||||
# Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?')
|
||||
symbol_immediate = '$'
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
|
||||
).setResultsName('value')
|
||||
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
|
||||
immediate = pp.Group(
|
||||
pp.Literal(symbol_immediate)
|
||||
+ (decimal_number ^ hex_number)
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
).setResultsName(self.IMMEDIATE_ID)
|
||||
# Memory: offset(base, index, scale)
|
||||
offset = decimal_number ^ hex_number
|
||||
scale = pp.Word('1248', exact=1)
|
||||
memory = pp.Group(
|
||||
pp.Optional(offset.setResultsName('offset'))
|
||||
+ pp.Literal('(')
|
||||
+ register.setResultsName('base')
|
||||
+ pp.Optional(register.setResultsName('index'))
|
||||
+ pp.Optional(scale.setResultsName('scale'))
|
||||
+ pp.Literal(')')
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(',')))
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.MEMORY_ID)
|
||||
# Combine to instruction form
|
||||
operand1 = pp.Group(register ^ immediate ^ memory ^ self.label).setResultsName('operand1')
|
||||
operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2')
|
||||
operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3')
|
||||
self.instruction_parser = (
|
||||
mnemonic.setResultsName('mnemonic')
|
||||
+ operand1
|
||||
+ pp.Optional(operand2)
|
||||
+ pp.Optional(operand3)
|
||||
+ pp.Optional(self.comment)
|
||||
)
|
||||
|
||||
def parse_line(self, line, line_number=None):
|
||||
"""
|
||||
Parse line and return instruction form.
|
||||
|
||||
:param str line: line of assembly code
|
||||
:param int line_id: default None, identifier of instruction form
|
||||
:return: parsed instruction form
|
||||
"""
|
||||
instruction_form = {
|
||||
'instruction': None,
|
||||
'operands': None,
|
||||
'comment': None,
|
||||
'label_name': None,
|
||||
'id': line_number,
|
||||
}
|
||||
result = None
|
||||
|
||||
# 1. Parse comment
|
||||
try:
|
||||
result = self.comment.parseString(line, parseAll=True)
|
||||
instruction_form['comment'] = result['comment'].join(' ')
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 2. Parse label
|
||||
if result is None:
|
||||
try:
|
||||
result = self.label.parseString(line, parseAll=True)
|
||||
instruction_form['comment'] = result['comment'].join(' ')
|
||||
instruction_form['label_name'] = result['label_name']
|
||||
except pp.ParseException:
|
||||
pass
|
||||
|
||||
# 3. Parse directive
|
||||
# TODO
|
||||
|
||||
# 4. Parse instruction
|
||||
if result is None:
|
||||
result = self.parse_instruction(line)
|
||||
# TODO
|
||||
instruction_form['instruction'] = result['instruction']
|
||||
instruction_form['operands'] = result['operands']
|
||||
instruction_form['comment'] = result['comment'].join(' ')
|
||||
|
||||
return instruction_form
|
||||
|
||||
def parse_instruction(self, instruction):
|
||||
result = self.instruction_parser.parseString(instruction, parseAll=True)
|
||||
# Check first operand
|
||||
# Check for register
|
||||
if self.REGISTER_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
# Check for immediate
|
||||
elif self.IMMEDIATE_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
# Check for memory address
|
||||
elif self.MEMORY_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
# Check for label
|
||||
elif self.LABEL_ID in result['operand1']:
|
||||
# TODO
|
||||
pass
|
||||
|
||||
# Check second operand
|
||||
if 'operand2' in result:
|
||||
# if('reg' in op2): ...
|
||||
# TODO
|
||||
pass
|
||||
|
||||
# Check third operand
|
||||
if 'operand3' in result:
|
||||
# TODO
|
||||
pass
|
||||
return result
|
||||
Reference in New Issue
Block a user