OSACA/osaca/parser/base_parser.py

#!/usr/bin/env python3
"""Parser superclass of specific parsers."""
import operator
import re


class BaseParser(object):
    # Identifiers for operand types
    COMMENT_ID = "comment"
    DIRECTIVE_ID = "directive"
    IMMEDIATE_ID = "immediate"
    LABEL_ID = "label"
    IDENTIFIER_ID = "identifier"
    MEMORY_ID = "memory"
    REGISTER_ID = "register"
    SEGMENT_EXT_ID = "segment_extension"
    INSTRUCTION_ID = "instruction"
    OPERANDS_ID = "operands"
    _parser_constructed = False

    def __init__(self):
        if not self._parser_constructed:
            self.construct_parser()
            self._parser_constructed = True

    @staticmethod
    def detect_ISA(file_content):
        """Detect the ISA of the assembly based on the used registers and return the ISA code."""
        # Check for the amount of registers in the code to determine the ISA
        # 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86
        heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"]
        # 2) check for v and z vector registers and x/w general-purpose registers
        heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"]
        matches = {"x86": 0, "aarch64": 0}

        for h in heuristics_x86ATT:
            matches["x86"] += len(re.findall(h, file_content))
        for h in heuristics_aarch64:
            matches["aarch64"] += len(re.findall(h, file_content))

        return max(matches.items(), key=operator.itemgetter(1))[0]

    def parse_file(self, file_content, start_line=0):
        """
        Parse assembly file. This includes *not* extracting of the marked kernel and
        the parsing of the instruction forms.

        :param str file_content: assembly code
        :param int start_line: offset, if first line in file_content is meant to be not 1
        :return: list of instruction forms
        """
        # Create instruction form list
        asm_instructions = []
        lines = file_content.split("\n")
        for i, line in enumerate(lines):
            if line.strip() == "":
                continue
            asm_instructions.append(self.parse_line(line, i + 1 + start_line))
        return asm_instructions

    def parse_line(self, line, line_number=None):
        # Done in derived classes
        raise NotImplementedError

    def parse_instruction(self, instruction):
        # Done in derived classes
        raise NotImplementedError

    def parse_register(self, register_string):
        raise NotImplementedError

    def is_gpr(self, register):
        raise NotImplementedError

    def is_vector_register(self, register):
        raise NotImplementedError

    def get_reg_type(self, register):
        raise NotImplementedError

    def construct_parser(self):
        return
        # raise NotImplementedError

    ##################
    # Helper functions
    ##################

    def process_operand(self, operand):
        raise NotImplementedError

    def get_full_reg_name(self, register):
        raise NotImplementedError

    def normalize_imd(self, imd):
        raise NotImplementedError

    def is_reg_dependend_of(self, reg_a, reg_b):
        raise NotImplementedError