mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-15 16:40:05 +01:00
- Modified RISC-V parser to use x-register names instead of ABI names - Added new vector instructions (vsetvli, vle8.v, vse8.v, vfmacc.vv, vfmul.vf) - Added floating point instructions (fmul.d) - Added unconditional jump instruction (j) - Updated tests to match new register naming convention - Added new RISC-V example files - Updated .gitignore to exclude test environment and old examples
769 lines
30 KiB
Python
769 lines
30 KiB
Python
#!/usr/bin/env python3
|
|
import re
|
|
import os
|
|
from copy import deepcopy
|
|
import pyparsing as pp
|
|
|
|
from osaca.parser import BaseParser
|
|
from osaca.parser.instruction_form import InstructionForm
|
|
from osaca.parser.operand import Operand
|
|
from osaca.parser.directive import DirectiveOperand
|
|
from osaca.parser.memory import MemoryOperand
|
|
from osaca.parser.label import LabelOperand
|
|
from osaca.parser.register import RegisterOperand
|
|
from osaca.parser.identifier import IdentifierOperand
|
|
from osaca.parser.immediate import ImmediateOperand
|
|
from osaca.parser.condition import ConditionOperand
|
|
|
|
|
|
class ParserRISCV(BaseParser):
|
|
_instance = None
|
|
|
|
# Singleton pattern, as this is created very many times
|
|
def __new__(cls):
|
|
if cls._instance is None:
|
|
cls._instance = super(ParserRISCV, cls).__new__(cls)
|
|
return cls._instance
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
# Initialize parser, but don't set 'isa' directly as an attribute
|
|
self._isa_str = "riscv"
|
|
|
|
def isa(self):
|
|
"""Return the ISA string."""
|
|
return self._isa_str
|
|
|
|
def start_marker(self):
|
|
"""Return the OSACA start marker for RISC-V assembly."""
|
|
# Parse the RISC-V start marker (li a1, 111 followed by NOP)
|
|
# This matches how start marker is defined in marker_utils.py for RISC-V
|
|
marker_str = (
|
|
"li a1, 111 # OSACA START MARKER\n"
|
|
".byte 19,0,0,0 # OSACA START MARKER\n"
|
|
)
|
|
return self.parse_file(marker_str)
|
|
|
|
def end_marker(self):
|
|
"""Return the OSACA end marker for RISC-V assembly."""
|
|
# Parse the RISC-V end marker (li a1, 222 followed by NOP)
|
|
# This matches how end marker is defined in marker_utils.py for RISC-V
|
|
marker_str = (
|
|
"li a1, 222 # OSACA END MARKER\n"
|
|
".byte 19,0,0,0 # OSACA END MARKER\n"
|
|
)
|
|
return self.parse_file(marker_str)
|
|
|
|
def construct_parser(self):
|
|
"""Create parser for RISC-V ISA."""
|
|
# Comment - RISC-V uses # for comments
|
|
symbol_comment = "#"
|
|
self.comment = pp.Literal(symbol_comment) + pp.Group(
|
|
pp.ZeroOrMore(pp.Word(pp.printables))
|
|
).setResultsName(self.comment_id)
|
|
|
|
# Define RISC-V assembly identifier
|
|
decimal_number = pp.Combine(
|
|
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
|
|
).setResultsName("value")
|
|
hex_number = pp.Combine(
|
|
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
|
|
).setResultsName("value")
|
|
|
|
# Additional identifiers used in vector instructions
|
|
vector_identifier = pp.Word(pp.alphas, pp.alphanums)
|
|
special_identifier = pp.Word(pp.alphas + "%")
|
|
|
|
# First character of an identifier
|
|
first = pp.Word(pp.alphas + "_.", exact=1)
|
|
# Rest of the identifier
|
|
rest = pp.Word(pp.alphanums + "_.")
|
|
# PLT suffix (@plt) for calls to shared libraries
|
|
plt_suffix = pp.Optional(pp.Literal("@") + pp.Word(pp.alphas))
|
|
|
|
identifier = pp.Group(
|
|
(pp.Combine(first + pp.Optional(rest) + plt_suffix)).setResultsName("name")
|
|
+ pp.Optional(
|
|
pp.Suppress(pp.Literal("+"))
|
|
+ (hex_number | decimal_number).setResultsName("offset")
|
|
)
|
|
).setResultsName(self.identifier)
|
|
|
|
# Label
|
|
self.label = pp.Group(
|
|
identifier.setResultsName("name") + pp.Literal(":") + pp.Optional(self.comment)
|
|
).setResultsName(self.label_id)
|
|
|
|
# Directive
|
|
directive_option = pp.Combine(
|
|
pp.Word(pp.alphas + "#@.%", exact=1)
|
|
+ pp.Optional(pp.Word(pp.printables + " ", excludeChars=","))
|
|
)
|
|
|
|
# For vector instructions
|
|
vector_parameter = pp.Word(pp.alphas)
|
|
directive_parameter = (
|
|
pp.quotedString | directive_option | identifier | hex_number | decimal_number
|
|
)
|
|
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=",")
|
|
self.directive = pp.Group(
|
|
pp.Literal(".")
|
|
+ pp.Word(pp.alphanums + "_").setResultsName("name")
|
|
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName("parameters")
|
|
+ pp.Optional(self.comment)
|
|
).setResultsName(self.directive_id)
|
|
|
|
# LLVM-MCA markers
|
|
self.llvm_markers = pp.Group(
|
|
pp.Literal("#")
|
|
+ pp.Combine(
|
|
pp.CaselessLiteral("LLVM-MCA-")
|
|
+ (pp.CaselessLiteral("BEGIN") | pp.CaselessLiteral("END"))
|
|
)
|
|
+ pp.Optional(self.comment)
|
|
).setResultsName(self.comment_id)
|
|
|
|
##############################
|
|
# Instructions
|
|
# Mnemonic
|
|
mnemonic = pp.Word(pp.alphanums + ".").setResultsName("mnemonic")
|
|
|
|
# Immediate:
|
|
# int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+
|
|
immediate = pp.Group(
|
|
(hex_number ^ decimal_number)
|
|
| identifier
|
|
).setResultsName(self.immediate_id)
|
|
|
|
# Register:
|
|
# RISC-V has two main types of registers:
|
|
# 1. Integer registers (x0-x31 or ABI names)
|
|
# 2. Floating-point registers (f0-f31 or ABI names)
|
|
|
|
# Integer register ABI names
|
|
integer_reg_abi = (
|
|
pp.CaselessLiteral("zero") |
|
|
pp.CaselessLiteral("ra") |
|
|
pp.CaselessLiteral("sp") |
|
|
pp.CaselessLiteral("gp") |
|
|
pp.CaselessLiteral("tp") |
|
|
pp.Regex(r"[tas][0-9]+") # t0-t6, a0-a7, s0-s11
|
|
).setResultsName("name")
|
|
|
|
# Integer registers x0-x31
|
|
integer_reg_x = (
|
|
pp.CaselessLiteral("x").setResultsName("prefix") +
|
|
pp.Word(pp.nums).setResultsName("name")
|
|
)
|
|
|
|
# Floating point registers
|
|
fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName("name") # ft0-ft11, fa0-fa7, fs0-fs11
|
|
|
|
fp_reg_f = (
|
|
pp.CaselessLiteral("f").setResultsName("prefix") +
|
|
pp.Word(pp.nums).setResultsName("name")
|
|
)
|
|
|
|
# Control and status registers (CSRs)
|
|
csr_reg = pp.Combine(
|
|
pp.CaselessLiteral("csr") + pp.Word(pp.alphanums + "_")
|
|
).setResultsName("name")
|
|
|
|
# Vector registers (for the "V" extension)
|
|
vector_reg = (
|
|
pp.CaselessLiteral("v").setResultsName("prefix") +
|
|
pp.Word(pp.nums).setResultsName("name")
|
|
)
|
|
|
|
# Combined register definition
|
|
register = pp.Group(
|
|
integer_reg_x | integer_reg_abi | fp_reg_f | fp_reg_abi | vector_reg | csr_reg
|
|
).setResultsName(self.register_id)
|
|
|
|
self.register = register
|
|
|
|
# Memory addressing mode in RISC-V: offset(base_register)
|
|
memory = pp.Group(
|
|
pp.Optional(immediate.setResultsName("offset"))
|
|
+ pp.Suppress(pp.Literal("("))
|
|
+ register.setResultsName("base")
|
|
+ pp.Suppress(pp.Literal(")"))
|
|
).setResultsName(self.memory_id)
|
|
|
|
# Combine to instruction form
|
|
operand_first = pp.Group(
|
|
register ^ immediate ^ memory ^ identifier
|
|
)
|
|
operand_rest = pp.Group(
|
|
register ^ immediate ^ memory ^ identifier
|
|
)
|
|
|
|
# Vector instruction special parameters (e.g., e32, m4, ta, ma)
|
|
vector_param = pp.Word(pp.alphas + pp.nums)
|
|
|
|
# Handle additional vector parameters
|
|
additional_params = pp.ZeroOrMore(
|
|
pp.Suppress(pp.Literal(",")) +
|
|
vector_param.setResultsName("vector_param", listAllMatches=True)
|
|
)
|
|
|
|
# Main instruction parser
|
|
self.instruction_parser = (
|
|
mnemonic
|
|
+ pp.Optional(operand_first.setResultsName("operand1"))
|
|
+ pp.Optional(pp.Suppress(pp.Literal(",")))
|
|
+ pp.Optional(operand_rest.setResultsName("operand2"))
|
|
+ pp.Optional(pp.Suppress(pp.Literal(",")))
|
|
+ pp.Optional(operand_rest.setResultsName("operand3"))
|
|
+ pp.Optional(pp.Suppress(pp.Literal(",")))
|
|
+ pp.Optional(operand_rest.setResultsName("operand4"))
|
|
+ pp.Optional(additional_params) # For vector instructions with more params
|
|
+ pp.Optional(self.comment)
|
|
)
|
|
|
|
def parse_line(self, line, line_number=None):
|
|
"""
|
|
Parse line and return instruction form.
|
|
|
|
:param str line: line of assembly code
|
|
:param line_number: identifier of instruction form, defaults to None
|
|
:type line_number: int, optional
|
|
:return: `dict` -- parsed asm line (comment, label, directive or instruction form)
|
|
"""
|
|
instruction_form = InstructionForm(
|
|
mnemonic=None,
|
|
operands=[],
|
|
directive_id=None,
|
|
comment_id=None,
|
|
label_id=None,
|
|
line=line,
|
|
line_number=line_number,
|
|
)
|
|
result = None
|
|
|
|
# 1. Parse comment
|
|
try:
|
|
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
|
|
instruction_form.comment = " ".join(result[self.comment_id])
|
|
except pp.ParseException:
|
|
pass
|
|
|
|
# 1.2 check for llvm-mca marker
|
|
try:
|
|
result = self.process_operand(
|
|
self.llvm_markers.parseString(line, parseAll=True).asDict()
|
|
)
|
|
instruction_form.comment = " ".join(result[self.comment_id])
|
|
except pp.ParseException:
|
|
pass
|
|
|
|
# 2. Parse label
|
|
if result is None:
|
|
try:
|
|
# returns tuple with label operand and comment, if any
|
|
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
|
instruction_form.label = result[0].name
|
|
if result[1] is not None:
|
|
instruction_form.comment = " ".join(result[1])
|
|
except pp.ParseException:
|
|
pass
|
|
|
|
# 3. Parse directive
|
|
if result is None:
|
|
try:
|
|
# returns directive with label operand and comment, if any
|
|
result = self.process_operand(
|
|
self.directive.parseString(line, parseAll=True).asDict()
|
|
)
|
|
instruction_form.directive = DirectiveOperand(
|
|
name=result[0].name, parameters=result[0].parameters
|
|
)
|
|
if result[1] is not None:
|
|
instruction_form.comment = " ".join(result[1])
|
|
except pp.ParseException:
|
|
pass
|
|
|
|
# 4. Parse instruction
|
|
if result is None:
|
|
try:
|
|
result = self.parse_instruction(line)
|
|
except (pp.ParseException, KeyError) as e:
|
|
raise ValueError(
|
|
"Unable to parse {!r} on line {}".format(line, line_number)
|
|
) from e
|
|
instruction_form.mnemonic = result.mnemonic
|
|
instruction_form.operands = result.operands
|
|
instruction_form.comment = result.comment
|
|
|
|
return instruction_form
|
|
|
|
def parse_instruction(self, instruction):
|
|
"""
|
|
Parse instruction in asm line.
|
|
|
|
:param str instruction: Assembly line string.
|
|
:returns: `dict` -- parsed instruction form
|
|
"""
|
|
# Special handling for vector instructions like vsetvli with many parameters
|
|
if instruction.startswith("vsetvli"):
|
|
parts = instruction.split("#")[0].strip().split()
|
|
mnemonic = parts[0]
|
|
|
|
# Split operands by commas
|
|
if len(parts) > 1:
|
|
operand_part = parts[1]
|
|
operands_list = [op.strip() for op in operand_part.split(",")]
|
|
|
|
# Process each operand
|
|
operands = []
|
|
for op in operands_list:
|
|
if op.startswith("x") or op in ["zero", "ra", "sp", "gp", "tp"] or re.match(r"[tas][0-9]+", op):
|
|
operands.append(RegisterOperand(name=op))
|
|
elif op in ["e8", "e16", "e32", "e64", "m1", "m2", "m4", "m8", "ta", "tu", "ma", "mu"]:
|
|
operands.append(IdentifierOperand(name=op))
|
|
else:
|
|
operands.append(IdentifierOperand(name=op))
|
|
|
|
# Get comment if present
|
|
comment = None
|
|
if "#" in instruction:
|
|
comment = instruction.split("#", 1)[1].strip()
|
|
|
|
return InstructionForm(
|
|
mnemonic=mnemonic,
|
|
operands=operands,
|
|
comment_id=comment
|
|
)
|
|
|
|
# Regular instruction parsing
|
|
try:
|
|
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
|
|
operands = []
|
|
# Add operands to list
|
|
# Check first operand
|
|
if "operand1" in result:
|
|
operand = self.process_operand(result["operand1"])
|
|
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
|
# Check second operand
|
|
if "operand2" in result:
|
|
operand = self.process_operand(result["operand2"])
|
|
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
|
# Check third operand
|
|
if "operand3" in result:
|
|
operand = self.process_operand(result["operand3"])
|
|
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
|
# Check fourth operand
|
|
if "operand4" in result:
|
|
operand = self.process_operand(result["operand4"])
|
|
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
|
|
|
# Handle vector_param for vector instructions
|
|
if "vector_param" in result:
|
|
if isinstance(result["vector_param"], list):
|
|
for param in result["vector_param"]:
|
|
operands.append(IdentifierOperand(name=param))
|
|
else:
|
|
operands.append(IdentifierOperand(name=result["vector_param"]))
|
|
|
|
return_dict = InstructionForm(
|
|
mnemonic=result["mnemonic"],
|
|
operands=operands,
|
|
comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None,
|
|
)
|
|
return return_dict
|
|
|
|
except Exception as e:
|
|
# For special vector instructions or ones with % in them
|
|
if "%" in instruction or instruction.startswith("v"):
|
|
parts = instruction.split("#")[0].strip().split(None, 1)
|
|
mnemonic = parts[0]
|
|
operands = []
|
|
if len(parts) > 1:
|
|
operand_part = parts[1]
|
|
operands_list = [op.strip() for op in operand_part.split(",")]
|
|
for op in operands_list:
|
|
# Process '%hi(data)' to 'data' for certain operands
|
|
if op.startswith("%") and '(' in op and ')' in op:
|
|
# Extract data from %hi(data) format
|
|
data = op[op.index('(')+1:op.index(')')]
|
|
operands.append(IdentifierOperand(name=data))
|
|
else:
|
|
operands.append(IdentifierOperand(name=op))
|
|
|
|
comment = None
|
|
if "#" in instruction:
|
|
comment = instruction.split("#", 1)[1].strip()
|
|
|
|
return InstructionForm(
|
|
mnemonic=mnemonic,
|
|
operands=operands,
|
|
comment_id=comment
|
|
)
|
|
else:
|
|
raise
|
|
|
|
def process_operand(self, operand):
|
|
"""Post-process operand"""
|
|
# structure memory addresses
|
|
if self.memory_id in operand:
|
|
return self.process_memory_address(operand[self.memory_id])
|
|
# add value attribute to immediates
|
|
if self.immediate_id in operand:
|
|
return self.process_immediate(operand[self.immediate_id])
|
|
if self.label_id in operand:
|
|
return self.process_label(operand[self.label_id])
|
|
if self.identifier in operand:
|
|
return self.process_identifier(operand[self.identifier])
|
|
if self.register_id in operand:
|
|
return self.process_register_operand(operand[self.register_id])
|
|
if self.directive_id in operand:
|
|
return self.process_directive_operand(operand[self.directive_id])
|
|
return operand
|
|
|
|
def process_directive_operand(self, operand):
|
|
return (
|
|
DirectiveOperand(
|
|
name=operand["name"],
|
|
parameters=operand["parameters"],
|
|
),
|
|
operand["comment"] if "comment" in operand else None,
|
|
)
|
|
|
|
def process_register_operand(self, operand):
|
|
"""Process register operands, including ABI name to x-register mapping"""
|
|
# If already has prefix (x#, f#, v#), just return as is
|
|
if "prefix" in operand:
|
|
return RegisterOperand(
|
|
prefix=operand["prefix"].lower(),
|
|
name=operand["name"]
|
|
)
|
|
|
|
# Handle ABI names by converting to x-register numbers
|
|
name = operand["name"].lower()
|
|
|
|
# ABI name mapping for integer registers
|
|
abi_to_x = {
|
|
"zero": "0", "ra": "1", "sp": "2", "gp": "3", "tp": "4",
|
|
"t0": "5", "t1": "6", "t2": "7",
|
|
"s0": "8", "fp": "8", "s1": "9",
|
|
"a0": "10", "a1": "11", "a2": "12", "a3": "13",
|
|
"a4": "14", "a5": "15", "a6": "16", "a7": "17",
|
|
"s2": "18", "s3": "19", "s4": "20", "s5": "21",
|
|
"s6": "22", "s7": "23", "s8": "24", "s9": "25",
|
|
"s10": "26", "s11": "27",
|
|
"t3": "28", "t4": "29", "t5": "30", "t6": "31"
|
|
}
|
|
|
|
# Integer register ABI names
|
|
if name in abi_to_x:
|
|
return RegisterOperand(
|
|
prefix="x",
|
|
name=abi_to_x[name]
|
|
)
|
|
# Floating point register ABI names
|
|
elif name.startswith("f") and name[1] in ["t", "a", "s"]:
|
|
if name[1] == "a": # fa0-fa7
|
|
idx = int(name[2:])
|
|
return RegisterOperand(prefix="f", name=str(idx + 10))
|
|
elif name[1] == "s": # fs0-fs11
|
|
idx = int(name[2:])
|
|
if idx <= 1:
|
|
return RegisterOperand(prefix="f", name=str(idx + 8))
|
|
else:
|
|
return RegisterOperand(prefix="f", name=str(idx + 16))
|
|
elif name[1] == "t": # ft0-ft11
|
|
idx = int(name[2:])
|
|
if idx <= 7:
|
|
return RegisterOperand(prefix="f", name=str(idx))
|
|
else:
|
|
return RegisterOperand(prefix="f", name=str(idx + 20))
|
|
# CSR registers
|
|
elif name.startswith("csr"):
|
|
return RegisterOperand(prefix="", name=name)
|
|
|
|
# If no mapping found, return as is
|
|
return RegisterOperand(prefix="", name=name)
|
|
|
|
def process_memory_address(self, memory_address):
|
|
"""Post-process memory address operand"""
|
|
# Process offset
|
|
offset = memory_address.get("offset", None)
|
|
if isinstance(offset, list) and len(offset) == 1:
|
|
offset = offset[0]
|
|
if offset is not None and "value" in offset:
|
|
offset = ImmediateOperand(value=int(offset["value"], 0))
|
|
if isinstance(offset, dict) and "identifier" in offset:
|
|
offset = self.process_identifier(offset["identifier"])
|
|
|
|
# Process base register
|
|
base = memory_address.get("base", None)
|
|
if base is not None:
|
|
base = self.process_register_operand(base)
|
|
|
|
# Create memory operand
|
|
return MemoryOperand(
|
|
offset=offset,
|
|
base=base,
|
|
index=None,
|
|
scale=1
|
|
)
|
|
|
|
def process_label(self, label):
|
|
"""Post-process label asm line"""
|
|
return (
|
|
LabelOperand(name=label["name"]["name"]),
|
|
label["comment"] if self.comment_id in label else None,
|
|
)
|
|
|
|
def process_identifier(self, identifier):
|
|
"""Post-process identifier operand"""
|
|
return IdentifierOperand(
|
|
name=identifier["name"] if "name" in identifier else None,
|
|
offset=identifier["offset"] if "offset" in identifier else None
|
|
)
|
|
|
|
def process_immediate(self, immediate):
|
|
"""Post-process immediate operand"""
|
|
if "identifier" in immediate:
|
|
# actually an identifier, change declaration
|
|
return self.process_identifier(immediate["identifier"])
|
|
if "value" in immediate:
|
|
# normal integer value
|
|
immediate["type"] = "int"
|
|
# convert hex/bin immediates to dec
|
|
new_immediate = ImmediateOperand(imd_type=immediate["type"], value=immediate["value"])
|
|
new_immediate.value = self.normalize_imd(new_immediate)
|
|
return new_immediate
|
|
return immediate
|
|
|
|
def get_full_reg_name(self, register):
|
|
"""Return one register name string including all attributes"""
|
|
if register.prefix and register.name:
|
|
return register.prefix + str(register.name)
|
|
return str(register.name)
|
|
|
|
def normalize_imd(self, imd):
|
|
"""Normalize immediate to decimal based representation"""
|
|
if isinstance(imd, IdentifierOperand):
|
|
return imd
|
|
elif imd.value is not None:
|
|
if isinstance(imd.value, str):
|
|
# hex or bin, return decimal
|
|
return int(imd.value, 0)
|
|
else:
|
|
return imd.value
|
|
# identifier
|
|
return imd
|
|
|
|
def parse_register(self, register_string):
|
|
"""
|
|
Parse register string and return register dictionary.
|
|
|
|
:param str register_string: register representation as string
|
|
:returns: dict with register info
|
|
"""
|
|
# Remove any leading/trailing whitespace
|
|
register_string = register_string.strip()
|
|
|
|
# Check for integer registers (x0-x31)
|
|
x_match = re.match(r'^x([0-9]|[1-2][0-9]|3[0-1])$', register_string)
|
|
if x_match:
|
|
reg_num = int(x_match.group(1))
|
|
return {"class": "register", "register": {"prefix": "x", "name": str(reg_num)}}
|
|
|
|
# Check for floating-point registers (f0-f31)
|
|
f_match = re.match(r'^f([0-9]|[1-2][0-9]|3[0-1])$', register_string)
|
|
if f_match:
|
|
reg_num = int(f_match.group(1))
|
|
return {"class": "register", "register": {"prefix": "f", "name": str(reg_num)}}
|
|
|
|
# Check for vector registers (v0-v31)
|
|
v_match = re.match(r'^v([0-9]|[1-2][0-9]|3[0-1])$', register_string)
|
|
if v_match:
|
|
reg_num = int(v_match.group(1))
|
|
return {"class": "register", "register": {"prefix": "v", "name": str(reg_num)}}
|
|
|
|
# Check for ABI names
|
|
abi_names = {
|
|
"zero": 0, "ra": 1, "sp": 2, "gp": 3, "tp": 4,
|
|
"t0": 5, "t1": 6, "t2": 7,
|
|
"s0": 8, "fp": 8, "s1": 9,
|
|
"a0": 10, "a1": 11, "a2": 12, "a3": 13, "a4": 14, "a5": 15, "a6": 16, "a7": 17,
|
|
"s2": 18, "s3": 19, "s4": 20, "s5": 21, "s6": 22, "s7": 23, "s8": 24, "s9": 25, "s10": 26, "s11": 27,
|
|
"t3": 28, "t4": 29, "t5": 30, "t6": 31
|
|
}
|
|
|
|
if register_string in abi_names:
|
|
return {"class": "register", "register": {"prefix": "", "name": register_string}}
|
|
|
|
# If no match is found
|
|
return None
|
|
|
|
def is_gpr(self, register):
|
|
"""Check if register is a general purpose register"""
|
|
# Integer registers: x0-x31 or ABI names
|
|
if register.prefix == "x":
|
|
return True
|
|
if not register.prefix and register.name in ["zero", "ra", "sp", "gp", "tp"]:
|
|
return True
|
|
if not register.prefix and register.name[0] in ["t", "a", "s"]:
|
|
return True
|
|
return False
|
|
|
|
def is_vector_register(self, register):
|
|
"""Check if register is a vector register"""
|
|
# Vector registers: v0-v31
|
|
if register.prefix == "v":
|
|
return True
|
|
return False
|
|
|
|
def is_flag_dependend_of(self, flag_a, flag_b):
|
|
"""Check if ``flag_a`` is dependent on ``flag_b``"""
|
|
# RISC-V doesn't have explicit flags like x86 or AArch64
|
|
return flag_a.name == flag_b.name
|
|
|
|
def is_reg_dependend_of(self, reg_a, reg_b):
|
|
"""Check if ``reg_a`` is dependent on ``reg_b``"""
|
|
if not isinstance(reg_a, Operand):
|
|
reg_a = RegisterOperand(name=reg_a["name"])
|
|
|
|
# Get canonical register names
|
|
reg_a_canonical = self._get_canonical_reg_name(reg_a)
|
|
reg_b_canonical = self._get_canonical_reg_name(reg_b)
|
|
|
|
# Same register type and number means dependency
|
|
return reg_a_canonical == reg_b_canonical
|
|
|
|
def _get_canonical_reg_name(self, register):
|
|
"""Get the canonical form of a register (x-form for integer, f-form for FP)"""
|
|
# If already in canonical form (x# or f#)
|
|
if register.prefix in ["x", "f", "v"] and register.name.isdigit():
|
|
return f"{register.prefix}{register.name}"
|
|
|
|
# ABI name mapping for integer registers
|
|
abi_to_x = {
|
|
"zero": "x0", "ra": "x1", "sp": "x2", "gp": "x3", "tp": "x4",
|
|
"t0": "x5", "t1": "x6", "t2": "x7",
|
|
"s0": "x8", "s1": "x9",
|
|
"a0": "x10", "a1": "x11", "a2": "x12", "a3": "x13",
|
|
"a4": "x14", "a5": "x15", "a6": "x16", "a7": "x17",
|
|
"s2": "x18", "s3": "x19", "s4": "x20", "s5": "x21",
|
|
"s6": "x22", "s7": "x23", "s8": "x24", "s9": "x25",
|
|
"s10": "x26", "s11": "x27",
|
|
"t3": "x28", "t4": "x29", "t5": "x30", "t6": "x31"
|
|
}
|
|
|
|
# For integer register ABI names
|
|
name = register.name.lower()
|
|
if name in abi_to_x:
|
|
return abi_to_x[name]
|
|
|
|
# For FP register ABI names like fa0, fs1, etc.
|
|
if name.startswith("f") and len(name) > 1:
|
|
if name[1] == "a": # fa0-fa7
|
|
idx = int(name[2:])
|
|
return f"f{idx + 10}"
|
|
elif name[1] == "s": # fs0-fs11
|
|
idx = int(name[2:])
|
|
if idx <= 1:
|
|
return f"f{idx + 8}"
|
|
else:
|
|
return f"f{idx + 16}"
|
|
elif name[1] == "t": # ft0-ft11
|
|
idx = int(name[2:])
|
|
if idx <= 7:
|
|
return f"f{idx}"
|
|
else:
|
|
return f"f{idx + 20}"
|
|
|
|
# Return as is if no mapping found
|
|
return f"{register.prefix}{register.name}"
|
|
|
|
def get_reg_type(self, register):
|
|
"""Get register type"""
|
|
# Return register prefix if exists
|
|
if register.prefix:
|
|
return register.prefix
|
|
|
|
# Determine type from ABI name
|
|
name = register.name.lower()
|
|
if name in ["zero", "ra", "sp", "gp", "tp"] or name[0] in ["t", "a", "s"]:
|
|
return "x" # Integer register
|
|
elif name.startswith("f"):
|
|
return "f" # Floating point register
|
|
elif name.startswith("csr"):
|
|
return "csr" # Control and Status Register
|
|
|
|
return "unknown"
|
|
|
|
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
|
|
"""
|
|
Normalize instruction form for RISC-V instructions.
|
|
|
|
:param instruction_form: instruction form to normalize
|
|
:param isa_model: ISA model to use for normalization
|
|
:param arch_model: architecture model to use for normalization
|
|
"""
|
|
if instruction_form.normalized:
|
|
return
|
|
|
|
if instruction_form.mnemonic is None:
|
|
instruction_form.normalized = True
|
|
return
|
|
|
|
# Normalize the mnemonic if needed
|
|
if instruction_form.mnemonic:
|
|
# Handle any RISC-V specific mnemonic normalization
|
|
# For example, convert aliases or pseudo-instructions to their base form
|
|
pass
|
|
|
|
# Normalize the operands if needed
|
|
for i, operand in enumerate(instruction_form.operands):
|
|
if isinstance(operand, ImmediateOperand):
|
|
# Normalize immediate operands
|
|
instruction_form.operands[i] = self.normalize_imd(operand)
|
|
elif isinstance(operand, RegisterOperand):
|
|
# Convert register names to canonical form if needed
|
|
pass
|
|
|
|
instruction_form.normalized = True
|
|
|
|
def get_regular_source_operands(self, instruction_form):
|
|
"""Get source operand of given instruction form assuming regular src/dst behavior."""
|
|
# For RISC-V, the first operand is typically the destination,
|
|
# and the rest are sources
|
|
if len(instruction_form.operands) == 1:
|
|
return [instruction_form.operands[0]]
|
|
else:
|
|
return [op for op in instruction_form.operands[1:]]
|
|
|
|
def get_regular_destination_operands(self, instruction_form):
|
|
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
|
|
# For RISC-V, the first operand is typically the destination
|
|
if len(instruction_form.operands) == 1:
|
|
return []
|
|
else:
|
|
return instruction_form.operands[:1]
|
|
|
|
def process_immediate_operand(self, operand):
|
|
"""Process immediate operands, converting them to ImmediateOperand objects"""
|
|
if isinstance(operand, (int, str)):
|
|
# For raw integer values or string immediates
|
|
return ImmediateOperand(
|
|
imd_type="int",
|
|
value=str(operand) if isinstance(operand, int) else operand
|
|
)
|
|
elif isinstance(operand, dict) and "imd" in operand:
|
|
# For immediate operands from instruction definitions
|
|
return ImmediateOperand(
|
|
imd_type=operand["imd"],
|
|
value=operand.get("value"),
|
|
identifier=operand.get("identifier"),
|
|
shift=operand.get("shift")
|
|
)
|
|
else:
|
|
# For any other immediate format
|
|
return ImmediateOperand(
|
|
imd_type="int",
|
|
value=str(operand)
|
|
) |