mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 00:50:06 +01:00
- Applied black formatting with line length 99 - Fixed flake8 linting issues (E265 block comments) - All 115 tests still pass after formatting - Code style is now consistent across the codebase Changes: - osaca/parser/base_parser.py: improved line breaks and comment formatting - osaca/osaca.py: added missing blank line - osaca/db_interface.py: reformatted long lines and comments - osaca/parser/parser_RISCV.py: extensive formatting improvements - osaca/semantics/kernel_dg.py: improved formatting and readability - osaca/semantics/hw_model.py: fixed shebang and formatting - osaca/semantics/marker_utils.py: removed TODO comment and formatting
164 lines
6.2 KiB
Python
164 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Parser superclass of specific parsers."""
|
|
import operator
|
|
import re
|
|
|
|
|
|
class BaseParser(object):
|
|
# Identifiers for operand types
|
|
comment_id = "comment"
|
|
directive_id = "directive"
|
|
immediate_id = "immediate"
|
|
label_id = "label"
|
|
identifier = "identifier"
|
|
memory_id = "memory"
|
|
register_id = "register"
|
|
condition_id = "condition"
|
|
segment_ext = "segment_extension"
|
|
mnemonic = "instruction"
|
|
operands = "operands"
|
|
prefetch = "prfop"
|
|
_parser_constructed = False
|
|
|
|
def __init__(self):
|
|
if not self._parser_constructed:
|
|
self.construct_parser()
|
|
self._parser_constructed = True
|
|
|
|
def isa(self):
|
|
# Done in derived classes
|
|
raise NotImplementedError
|
|
|
|
# The marker functions return lists of `InstructionForm` that are used to find the IACA markers
|
|
# in the parsed code. In addition to just a list, the marker may have a structure like
|
|
# [I1, [I2, I3], I4, ...] where the nested list indicates that at least one of I2 and I3 must
|
|
# match the second instruction in the fragment of parsed code.
|
|
# If an instruction form is a `DirectiveOperand`, the match may happen over several directive
|
|
# operands in the parsed code, provided that the directives have the same name and the
|
|
# parameters are in sequence with respect to the pattern. This provides an easy way to describe
|
|
# a sequence of bytes irrespective of the way it was grouped in the assembly source.
|
|
# Note that markers must be matched *before* normalization.
|
|
def start_marker(self):
|
|
# Done in derived classes
|
|
raise NotImplementedError
|
|
|
|
def end_marker(self):
|
|
# Done in derived classes
|
|
raise NotImplementedError
|
|
|
|
# Performs all the normalization needed to match the instruction to the ISO/arch model. This
|
|
# method must set the `normalized` property of the instruction and must be idempotent.
|
|
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def detect_ISA(file_content):
|
|
"""
|
|
Detect the ISA of the assembly based on the used registers and return the ISA code.
|
|
|
|
:param str file_content: assembly code.
|
|
:return: a tuple isa, syntax describing the architecture and the assembly syntax,
|
|
if appropriate. If there is no notion of syntax, the second element is None.
|
|
"""
|
|
# Check for the amount of registers in the code to determine the ISA
|
|
# 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86
|
|
# AT&T syntax. There is a % before each register name.
|
|
heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"]
|
|
# 2) Same as above, but for the Intel syntax. There is no % before the register names.
|
|
heuristics_x86Intel = [r"[^%][xyz]mm[0-9]", r"[^%][er][abcd]x[0-9]"]
|
|
# 3) check for v and z vector registers and x/w general-purpose registers
|
|
heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"]
|
|
# 3) check for RISC-V registers (x0-x31, a0-a7, t0-t6, s0-s11) and instructions
|
|
heuristics_riscv = [
|
|
r"\bx[0-9]|x[1-2][0-9]|x3[0-1]\b", # x0-x31 registers
|
|
r"\ba[0-7]\b", # a0-a7 registers
|
|
r"\bt[0-6]\b", # t0-t6 registers
|
|
r"\bs[0-9]|s1[0-1]\b", # s0-s11 registers
|
|
r"\bzero\b|\bra\b|\bsp\b|\bgp\b", # zero, ra, sp, gp registers
|
|
r"\bvsetvli\b|\bvle\b|\bvse\b", # RV Vector instructions
|
|
r"\baddi\b|\bsd\b|\bld\b|\bjal\b", # Common RISC-V instructions
|
|
]
|
|
matches = {
|
|
("x86", "ATT"): 0,
|
|
("x86", "INTEL"): 0,
|
|
("aarch64", None): 0,
|
|
("riscv", None): 0,
|
|
}
|
|
|
|
for h in heuristics_x86ATT:
|
|
matches[("x86", "ATT")] += len(re.findall(h, file_content))
|
|
for h in heuristics_x86Intel:
|
|
matches[("x86", "INTEL")] += len(re.findall(h, file_content))
|
|
for h in heuristics_aarch64:
|
|
matches[("aarch64", None)] += len(re.findall(h, file_content))
|
|
for h in heuristics_riscv:
|
|
matches[("riscv", None)] += len(re.findall(h, file_content))
|
|
|
|
return max(matches.items(), key=operator.itemgetter(1))[0]
|
|
|
|
def parse_file(self, file_content, start_line=0):
|
|
"""
|
|
Parse assembly file. This includes *not* extracting of the marked kernel and
|
|
the parsing of the instruction forms.
|
|
|
|
:param str file_content: assembly code
|
|
:param int start_line: offset, if first line in file_content is meant to be not 1
|
|
:return: list of instruction forms
|
|
"""
|
|
# Create instruction form list
|
|
asm_instructions = []
|
|
lines = file_content.split("\n")
|
|
for i, line in enumerate(lines):
|
|
if line.strip() == "":
|
|
continue
|
|
asm_instructions.append(self.parse_line(line, i + 1 + start_line))
|
|
return asm_instructions
|
|
|
|
def parse_line(self, line, line_number=None):
|
|
# Done in derived classes
|
|
raise NotImplementedError
|
|
|
|
def parse_instruction(self, instruction):
|
|
# Done in derived classes
|
|
raise NotImplementedError
|
|
|
|
def parse_register(self, register_string):
|
|
raise NotImplementedError
|
|
|
|
def is_gpr(self, register):
|
|
raise NotImplementedError
|
|
|
|
def is_vector_register(self, register):
|
|
raise NotImplementedError
|
|
|
|
def get_reg_type(self, register):
|
|
raise NotImplementedError
|
|
|
|
def construct_parser(self):
|
|
return
|
|
# raise NotImplementedError
|
|
|
|
##################
|
|
# Helper functions
|
|
##################
|
|
|
|
def process_operand(self, operand):
|
|
raise NotImplementedError
|
|
|
|
def get_full_reg_name(self, register):
|
|
raise NotImplementedError
|
|
|
|
# Must be called on a *normalized* instruction.
|
|
def get_regular_source_operands(self, instruction_form):
|
|
raise NotImplementedError
|
|
|
|
# Must be called on a *normalized* instruction.
|
|
def get_regular_destination_operands(self, instruction_form):
|
|
raise NotImplementedError
|
|
|
|
def normalize_imd(self, imd):
|
|
raise NotImplementedError
|
|
|
|
def is_reg_dependend_of(self, reg_a, reg_b):
|
|
raise NotImplementedError
|