diff --git a/README.rst b/README.rst index 017e0ab..388501b 100644 --- a/README.rst +++ b/README.rst @@ -20,6 +20,9 @@ analysis and throughput prediction for a innermost loop kernel. :target: https://landscape.io/github/RRZE-HPC/OSACA/master :alt: Code Health +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/ambv/black + Getting started =============== diff --git a/osaca/parser/__init__.py b/osaca/parser/__init__.py new file mode 100644 index 0000000..9d550cf --- /dev/null +++ b/osaca/parser/__init__.py @@ -0,0 +1,9 @@ +""" +Collection of parsers supported by OSACA. + +Only the parser below will be exported, so please add new parsers to __all__. +""" +from .parser_x86att import ParserX86ATT +from .parser_ARMv81 import ParserARMv81 + +__all__ = ['ParserX86ATT', 'ParserARMv81'] diff --git a/osaca/parser/base_parser.py b/osaca/parser/base_parser.py new file mode 100755 index 0000000..540e34f --- /dev/null +++ b/osaca/parser/base_parser.py @@ -0,0 +1,55 @@ +#!usr/bin/env python3 + + +class BaseParser(object): + # Identifiers for operand types + DIRECTIVE_ID = 'directive' + IMMEDIATE_ID = 'immediate' + LABEL_ID = 'label' + MEMORY_ID = 'memory' + REGISTER_ID = 'register' + + def __init__(self): + self.construct_parser() + + def parse_file(self, file_content): + ''' + Parse assembly file. This includes extracting of the marked kernel and + the parsing of the instruction forms. + + :param str file_content: assembly code + :return: list of instruction forms + :raises ValueError: if the marker_type attribute is unknown by the + function + ''' + # Create instruction form list + asm_instructions = [] + lines = file_content.split('\n') + for i, line in enumerate(lines): + if line == '': + continue + asm_instructions.append(self.parseLine(line, i + 1)) + return asm_instructions + + def parse_line(self, line, line_number): + # Done in derived classes + raise NotImplementedError() + + def parse_instruction(self, instruction): + # Done in derived classes + raise NotImplementedError() + + def parse_register(self, register): + # Done in derived classed + raise NotImplementedError() + + def parse_memory(self, memory_address): + # Done in derived classed + raise NotImplementedError() + + def parse_immediate(self, immediate): + # Done in derived classed + raise NotImplementedError() + + def construct_parser(self): + raise NotImplementedError() diff --git a/osaca/parser/parser_ARMv81.py b/osaca/parser/parser_ARMv81.py new file mode 100755 index 0000000..04e40a8 --- /dev/null +++ b/osaca/parser/parser_ARMv81.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +from .parser import Parser + + +class ParserARMv81(Parser): + # TODO + + def __init__(self): + # TODO + raise NotImplementedError diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py new file mode 100755 index 0000000..8ffa2d9 --- /dev/null +++ b/osaca/parser/parser_x86att.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 + +import pyparsing as pp + +from .parser import BaseParser + + +class ParserX86ATT(BaseParser): + def __init__(self): + super().__init__() + + def construct_parser(self): + # Comment + symbol_comment = '#' + self.comment = pp.Literal(symbol_comment) + pp.Group( + pp.ZeroOrMore(pp.Word(pp.printables)) + ).setResultsName('comment') + # Define x86 assembly identifier + first = pp.Word(pp.alphas + '_.', exact=1) + rest = pp.Word(pp.alphanums + '_.') + identifier = pp.Combine(first + pp.Optional(rest)) + # Label + self.label = pp.Group( + identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) + ).setResultsName(self.LABEL_ID) + # Directive + commaSeparatedList = pp.delimitedList( + pp.Optional(pp.quotedString | pp.Word(pp.alphanums)), delim=',' + ) + self.directive = pp.Group( + pp.Literal('.') + + pp.Word(pp.alphanums + '_').setResultsName('name') + + commaSeparatedList.setResultsName('parameters') + + pp.Optional(self.comment) + ).setResultsName(self.DIRECTIVE_LABEL) + + ############################## + # Instructions + # Mnemonic + mnemonic = pp.ZeroOrMore(pp.Literal('data16') ^ pp.Literal('data32')) + pp.Word( + pp.alphanums + ) + # Register: pp.Regex('^%[0-9a-zA-Z]+,?') + register = pp.Group( + pp.Literal('%') + + pp.Word(pp.alphanums).setResultsName('name') + + pp.Optional( + pp.Literal('{') + + pp.Literal('%') + + pp.Word(pp.alphanums).setResultsName('mask') + + pp.Literal('}') + ) + + pp.Optional(pp.Suppress(pp.Literal(','))) + ).setResultsName(self.REGISTER_ID) + # Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?') + symbol_immediate = '$' + decimal_number = pp.Combine( + pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) + ).setResultsName('value') + hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') + immediate = pp.Group( + pp.Literal(symbol_immediate) + + (decimal_number ^ hex_number) + + pp.Optional(pp.Suppress(pp.Literal(','))) + ).setResultsName(self.IMMEDIATE_ID) + # Memory: offset(base, index, scale) + offset = decimal_number ^ hex_number + scale = pp.Word('1248', exact=1) + memory = pp.Group( + pp.Optional(offset.setResultsName('offset')) + + pp.Literal('(') + + register.setResultsName('base') + + pp.Optional(register.setResultsName('index')) + + pp.Optional(scale.setResultsName('scale')) + + pp.Literal(')') + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(self.comment) + ).setResultsName(self.MEMORY_ID) + # Combine to instruction form + operand1 = pp.Group(register ^ immediate ^ memory ^ self.label).setResultsName('operand1') + operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2') + operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3') + self.instruction_parser = ( + mnemonic.setResultsName('mnemonic') + + operand1 + + pp.Optional(operand2) + + pp.Optional(operand3) + + pp.Optional(self.comment) + ) + + def parse_line(self, line, line_number=None): + """ + Parse line and return instruction form. + + :param str line: line of assembly code + :param int line_id: default None, identifier of instruction form + :return: parsed instruction form + """ + instruction_form = { + 'instruction': None, + 'operands': None, + 'comment': None, + 'label_name': None, + 'id': line_number, + } + result = None + + # 1. Parse comment + try: + result = self.comment.parseString(line, parseAll=True) + instruction_form['comment'] = result['comment'].join(' ') + except pp.ParseException: + pass + + # 2. Parse label + if result is None: + try: + result = self.label.parseString(line, parseAll=True) + instruction_form['comment'] = result['comment'].join(' ') + instruction_form['label_name'] = result['label_name'] + except pp.ParseException: + pass + + # 3. Parse directive + # TODO + + # 4. Parse instruction + if result is None: + result = self.parse_instruction(line) + # TODO + instruction_form['instruction'] = result['instruction'] + instruction_form['operands'] = result['operands'] + instruction_form['comment'] = result['comment'].join(' ') + + return instruction_form + + def parse_instruction(self, instruction): + result = self.instruction_parser.parseString(instruction, parseAll=True) + # Check first operand + # Check for register + if self.REGISTER_ID in result['operand1']: + # TODO + pass + # Check for immediate + elif self.IMMEDIATE_ID in result['operand1']: + # TODO + pass + # Check for memory address + elif self.MEMORY_ID in result['operand1']: + # TODO + pass + # Check for label + elif self.LABEL_ID in result['operand1']: + # TODO + pass + + # Check second operand + if 'operand2' in result: + # if('reg' in op2): ... + # TODO + pass + + # Check third operand + if 'operand3' in result: + # TODO + pass + return result