mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-15 16:40:05 +01:00
Apply selected improvements from 1ceac6e: enhanced RISC-V parser, ImmediateOperand enhancements, and rv6→rv64 file renames
- Enhanced ImmediateOperand with reloc_type and symbol attributes for better RISC-V support - Updated RISC-V parser with relocation type support (%hi, %lo, %pcrel_hi, etc.) - Renamed example files from rv6 to rv64 for consistency - Updated related configuration and test files - All 115 tests pass successfully
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,7 +1,5 @@
|
||||
# OSACA specific files and folders
|
||||
*.*.pickle
|
||||
osaca_testfront_venv/
|
||||
examples/riscy_asm_files/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
@@ -10,6 +10,8 @@ class ImmediateOperand(Operand):
|
||||
imd_type=None,
|
||||
value=None,
|
||||
shift=None,
|
||||
reloc_type=None,
|
||||
symbol=None,
|
||||
source=False,
|
||||
destination=False,
|
||||
):
|
||||
@@ -18,6 +20,8 @@ class ImmediateOperand(Operand):
|
||||
self._imd_type = imd_type
|
||||
self._value = value
|
||||
self._shift = shift
|
||||
self._reloc_type = reloc_type
|
||||
self._symbol = symbol
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
@@ -33,7 +37,15 @@ class ImmediateOperand(Operand):
|
||||
|
||||
@property
|
||||
def shift(self):
|
||||
return self._imd_type
|
||||
return self._shift
|
||||
|
||||
@property
|
||||
def reloc_type(self):
|
||||
return self._reloc_type
|
||||
|
||||
@property
|
||||
def symbol(self):
|
||||
return self._symbol
|
||||
|
||||
@imd_type.setter
|
||||
def imd_type(self, itype):
|
||||
@@ -51,10 +63,19 @@ class ImmediateOperand(Operand):
|
||||
def shift(self, shift):
|
||||
self._shift = shift
|
||||
|
||||
@reloc_type.setter
|
||||
def reloc_type(self, reloc_type):
|
||||
self._reloc_type = reloc_type
|
||||
|
||||
@symbol.setter
|
||||
def symbol(self, symbol):
|
||||
self._symbol = symbol
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
f"Immediate(identifier={self._identifier}, imd_type={self._imd_type}, "
|
||||
f"value={self._value}, shift={self._shift}, source={self._source}, destination={self._destination})"
|
||||
f"value={self._value}, shift={self._shift}, reloc_type={self._reloc_type}, "
|
||||
f"symbol={self._symbol}, source={self._source}, destination={self._destination})"
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
@@ -62,10 +83,18 @@ class ImmediateOperand(Operand):
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, ImmediateOperand):
|
||||
# Handle cases where old instances might not have the new attributes
|
||||
self_reloc_type = getattr(self, "_reloc_type", None)
|
||||
self_symbol = getattr(self, "_symbol", None)
|
||||
other_reloc_type = getattr(other, "_reloc_type", None)
|
||||
other_symbol = getattr(other, "_symbol", None)
|
||||
|
||||
return (
|
||||
self._identifier == other._identifier
|
||||
and self._imd_type == other._imd_type
|
||||
and self._value == other._value
|
||||
and self._shift == other._shift
|
||||
and self_reloc_type == other_reloc_type
|
||||
and self_symbol == other_symbol
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
import os
|
||||
from copy import deepcopy
|
||||
import pyparsing as pp
|
||||
|
||||
from osaca.parser import BaseParser
|
||||
@@ -13,7 +11,6 @@ from osaca.parser.label import LabelOperand
|
||||
from osaca.parser.register import RegisterOperand
|
||||
from osaca.parser.identifier import IdentifierOperand
|
||||
from osaca.parser.immediate import ImmediateOperand
|
||||
from osaca.parser.condition import ConditionOperand
|
||||
|
||||
|
||||
class ParserRISCV(BaseParser):
|
||||
@@ -70,9 +67,23 @@ class ParserRISCV(BaseParser):
|
||||
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
|
||||
).setResultsName("value")
|
||||
|
||||
# Additional identifiers used in vector instructions
|
||||
vector_identifier = pp.Word(pp.alphas, pp.alphanums)
|
||||
special_identifier = pp.Word(pp.alphas + "%")
|
||||
# RISC-V specific relocation attributes
|
||||
reloc_type = (
|
||||
pp.Literal("%hi")
|
||||
| pp.Literal("%lo")
|
||||
| pp.Literal("%pcrel_hi")
|
||||
| pp.Literal("%pcrel_lo")
|
||||
| pp.Literal("%tprel_hi")
|
||||
| pp.Literal("%tprel_lo")
|
||||
| pp.Literal("%tprel_add")
|
||||
).setResultsName("reloc_type")
|
||||
|
||||
reloc_expr = pp.Group(
|
||||
reloc_type
|
||||
+ pp.Suppress("(")
|
||||
+ pp.Word(pp.alphas + pp.nums + "_").setResultsName("symbol")
|
||||
+ pp.Suppress(")")
|
||||
).setResultsName("relocation")
|
||||
|
||||
# First character of an identifier
|
||||
first = pp.Word(pp.alphas + "_.", exact=1)
|
||||
@@ -89,9 +100,16 @@ class ParserRISCV(BaseParser):
|
||||
)
|
||||
).setResultsName(self.identifier)
|
||||
|
||||
# Immediate with optional relocation
|
||||
immediate = pp.Group(
|
||||
reloc_expr | (hex_number ^ decimal_number) | identifier
|
||||
).setResultsName(self.immediate_id)
|
||||
|
||||
# Label
|
||||
self.label = pp.Group(
|
||||
identifier.setResultsName("name") + pp.Literal(":") + pp.Optional(self.comment)
|
||||
identifier.setResultsName("name")
|
||||
+ pp.Literal(":")
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.label_id)
|
||||
|
||||
# Directive
|
||||
@@ -100,16 +118,22 @@ class ParserRISCV(BaseParser):
|
||||
+ pp.Optional(pp.Word(pp.printables + " ", excludeChars=","))
|
||||
)
|
||||
|
||||
# For vector instructions
|
||||
vector_parameter = pp.Word(pp.alphas)
|
||||
directive_parameter = (
|
||||
pp.quotedString | directive_option | identifier | hex_number | decimal_number
|
||||
pp.quotedString
|
||||
| directive_option
|
||||
| identifier
|
||||
| hex_number
|
||||
| decimal_number
|
||||
)
|
||||
commaSeparatedList = pp.delimitedList(
|
||||
pp.Optional(directive_parameter), delim=","
|
||||
)
|
||||
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=",")
|
||||
self.directive = pp.Group(
|
||||
pp.Literal(".")
|
||||
+ pp.Word(pp.alphanums + "_").setResultsName("name")
|
||||
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName("parameters")
|
||||
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName(
|
||||
"parameters"
|
||||
)
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.directive_id)
|
||||
|
||||
@@ -128,13 +152,6 @@ class ParserRISCV(BaseParser):
|
||||
# Mnemonic
|
||||
mnemonic = pp.Word(pp.alphanums + ".").setResultsName("mnemonic")
|
||||
|
||||
# Immediate:
|
||||
# int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+
|
||||
immediate = pp.Group(
|
||||
(hex_number ^ decimal_number)
|
||||
| identifier
|
||||
).setResultsName(self.immediate_id)
|
||||
|
||||
# Register:
|
||||
# RISC-V has two main types of registers:
|
||||
# 1. Integer registers (x0-x31 or ABI names)
|
||||
@@ -142,27 +159,27 @@ class ParserRISCV(BaseParser):
|
||||
|
||||
# Integer register ABI names
|
||||
integer_reg_abi = (
|
||||
pp.CaselessLiteral("zero") |
|
||||
pp.CaselessLiteral("ra") |
|
||||
pp.CaselessLiteral("sp") |
|
||||
pp.CaselessLiteral("gp") |
|
||||
pp.CaselessLiteral("tp") |
|
||||
pp.Regex(r"[tas][0-9]+") # t0-t6, a0-a7, s0-s11
|
||||
pp.CaselessLiteral("zero")
|
||||
| pp.CaselessLiteral("ra")
|
||||
| pp.CaselessLiteral("sp")
|
||||
| pp.CaselessLiteral("gp")
|
||||
| pp.CaselessLiteral("tp")
|
||||
| pp.Regex(r"[tas][0-9]+") # t0-t6, a0-a7, s0-s11
|
||||
).setResultsName("name")
|
||||
|
||||
# Integer registers x0-x31
|
||||
integer_reg_x = (
|
||||
pp.CaselessLiteral("x").setResultsName("prefix") +
|
||||
pp.Word(pp.nums).setResultsName("name")
|
||||
)
|
||||
integer_reg_x = pp.CaselessLiteral("x").setResultsName("prefix") + pp.Word(
|
||||
pp.nums
|
||||
).setResultsName("name")
|
||||
|
||||
# Floating point registers
|
||||
fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName("name") # ft0-ft11, fa0-fa7, fs0-fs11
|
||||
fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName(
|
||||
"name"
|
||||
) # ft0-ft11, fa0-fa7, fs0-fs11
|
||||
|
||||
fp_reg_f = (
|
||||
pp.CaselessLiteral("f").setResultsName("prefix") +
|
||||
pp.Word(pp.nums).setResultsName("name")
|
||||
)
|
||||
fp_reg_f = pp.CaselessLiteral("f").setResultsName("prefix") + pp.Word(
|
||||
pp.nums
|
||||
).setResultsName("name")
|
||||
|
||||
# Control and status registers (CSRs)
|
||||
csr_reg = pp.Combine(
|
||||
@@ -170,14 +187,18 @@ class ParserRISCV(BaseParser):
|
||||
).setResultsName("name")
|
||||
|
||||
# Vector registers (for the "V" extension)
|
||||
vector_reg = (
|
||||
pp.CaselessLiteral("v").setResultsName("prefix") +
|
||||
pp.Word(pp.nums).setResultsName("name")
|
||||
)
|
||||
vector_reg = pp.CaselessLiteral("v").setResultsName("prefix") + pp.Word(
|
||||
pp.nums
|
||||
).setResultsName("name")
|
||||
|
||||
# Combined register definition
|
||||
register = pp.Group(
|
||||
integer_reg_x | integer_reg_abi | fp_reg_f | fp_reg_abi | vector_reg | csr_reg
|
||||
integer_reg_x
|
||||
| integer_reg_abi
|
||||
| fp_reg_f
|
||||
| fp_reg_abi
|
||||
| vector_reg
|
||||
| csr_reg
|
||||
).setResultsName(self.register_id)
|
||||
|
||||
self.register = register
|
||||
@@ -191,20 +212,15 @@ class ParserRISCV(BaseParser):
|
||||
).setResultsName(self.memory_id)
|
||||
|
||||
# Combine to instruction form
|
||||
operand_first = pp.Group(
|
||||
register ^ immediate ^ memory ^ identifier
|
||||
)
|
||||
operand_rest = pp.Group(
|
||||
register ^ immediate ^ memory ^ identifier
|
||||
)
|
||||
|
||||
# Vector instruction special parameters (e.g., e32, m4, ta, ma)
|
||||
vector_param = pp.Word(pp.alphas + pp.nums)
|
||||
operand_first = pp.Group(register ^ immediate ^ memory ^ identifier)
|
||||
operand_rest = pp.Group(register ^ immediate ^ memory ^ identifier)
|
||||
|
||||
# Handle additional vector parameters
|
||||
additional_params = pp.ZeroOrMore(
|
||||
pp.Suppress(pp.Literal(",")) +
|
||||
vector_param.setResultsName("vector_param", listAllMatches=True)
|
||||
pp.Suppress(pp.Literal(","))
|
||||
+ pp.Word(pp.alphas + pp.nums).setResultsName(
|
||||
"vector_param", listAllMatches=True
|
||||
)
|
||||
)
|
||||
|
||||
# Main instruction parser
|
||||
@@ -217,7 +233,7 @@ class ParserRISCV(BaseParser):
|
||||
+ pp.Optional(operand_rest.setResultsName("operand3"))
|
||||
+ pp.Optional(pp.Suppress(pp.Literal(",")))
|
||||
+ pp.Optional(operand_rest.setResultsName("operand4"))
|
||||
+ pp.Optional(additional_params) # For vector instructions with more params
|
||||
+ pp.Optional(additional_params)
|
||||
+ pp.Optional(self.comment)
|
||||
)
|
||||
|
||||
@@ -228,7 +244,8 @@ class ParserRISCV(BaseParser):
|
||||
:param str line: line of assembly code
|
||||
:param line_number: identifier of instruction form, defaults to None
|
||||
:type line_number: int, optional
|
||||
:return: `dict` -- parsed asm line (comment, label, directive or instruction form)
|
||||
:return: `dict` -- parsed asm line (comment, label, directive or
|
||||
instruction form)
|
||||
"""
|
||||
instruction_form = InstructionForm(
|
||||
mnemonic=None,
|
||||
@@ -243,7 +260,9 @@ class ParserRISCV(BaseParser):
|
||||
|
||||
# 1. Parse comment
|
||||
try:
|
||||
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
|
||||
result = self.process_operand(
|
||||
self.comment.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
instruction_form.comment = " ".join(result[self.comment_id])
|
||||
except pp.ParseException:
|
||||
pass
|
||||
@@ -261,7 +280,9 @@ class ParserRISCV(BaseParser):
|
||||
if result is None:
|
||||
try:
|
||||
# returns tuple with label operand and comment, if any
|
||||
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
||||
result = self.process_operand(
|
||||
self.label.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
instruction_form.label = result[0].name
|
||||
if result[1] is not None:
|
||||
instruction_form.comment = " ".join(result[1])
|
||||
@@ -304,9 +325,20 @@ class ParserRISCV(BaseParser):
|
||||
:param str instruction: Assembly line string.
|
||||
:returns: `dict` -- parsed instruction form
|
||||
"""
|
||||
# Store current instruction for context in operand processing
|
||||
if instruction.startswith("vsetvli"):
|
||||
self.current_instruction = "vsetvli"
|
||||
else:
|
||||
# Extract mnemonic for context
|
||||
parts = instruction.split("#")[0].strip().split()
|
||||
self.current_instruction = parts[0] if parts else None
|
||||
|
||||
# Special handling for vector instructions like vsetvli with many parameters
|
||||
if instruction.startswith("vsetvli"):
|
||||
parts = instruction.split("#")[0].strip().split()
|
||||
# Split into mnemonic and operands part
|
||||
parts = (
|
||||
instruction.split("#")[0].strip().split(None, 1)
|
||||
) # Split on first whitespace only
|
||||
mnemonic = parts[0]
|
||||
|
||||
# Split operands by commas
|
||||
@@ -317,9 +349,19 @@ class ParserRISCV(BaseParser):
|
||||
# Process each operand
|
||||
operands = []
|
||||
for op in operands_list:
|
||||
if op.startswith("x") or op in ["zero", "ra", "sp", "gp", "tp"] or re.match(r"[tas][0-9]+", op):
|
||||
if (
|
||||
op.startswith("x")
|
||||
or op in ["zero", "ra", "sp", "gp", "tp"]
|
||||
or re.match(r"[tas][0-9]+", op)
|
||||
):
|
||||
operands.append(RegisterOperand(name=op))
|
||||
elif op in ["e8", "e16", "e32", "e64", "m1", "m2", "m4", "m8", "ta", "tu", "ma", "mu"]:
|
||||
else:
|
||||
# Vector parameters get appropriate attributes
|
||||
if op.startswith("e"): # Element width
|
||||
operands.append(IdentifierOperand(name=op))
|
||||
elif op.startswith("m"): # LMUL setting
|
||||
operands.append(IdentifierOperand(name=op))
|
||||
elif op in ["ta", "tu", "ma", "mu"]: # Tail/mask policies
|
||||
operands.append(IdentifierOperand(name=op))
|
||||
else:
|
||||
operands.append(IdentifierOperand(name=op))
|
||||
@@ -330,37 +372,36 @@ class ParserRISCV(BaseParser):
|
||||
comment = instruction.split("#", 1)[1].strip()
|
||||
|
||||
return InstructionForm(
|
||||
mnemonic=mnemonic,
|
||||
operands=operands,
|
||||
comment_id=comment
|
||||
mnemonic=mnemonic, operands=operands, comment_id=comment
|
||||
)
|
||||
|
||||
# Regular instruction parsing
|
||||
try:
|
||||
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
|
||||
result = self.instruction_parser.parseString(
|
||||
instruction, parseAll=True
|
||||
).asDict()
|
||||
operands = []
|
||||
# Add operands to list
|
||||
# Check first operand
|
||||
if "operand1" in result:
|
||||
operand = self.process_operand(result["operand1"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check second operand
|
||||
if "operand2" in result:
|
||||
operand = self.process_operand(result["operand2"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check third operand
|
||||
if "operand3" in result:
|
||||
operand = self.process_operand(result["operand3"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check fourth operand
|
||||
if "operand4" in result:
|
||||
operand = self.process_operand(result["operand4"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
|
||||
# Handle vector_param for vector instructions
|
||||
# Process operands
|
||||
for i in range(1, 5):
|
||||
operand_key = f"operand{i}"
|
||||
if operand_key in result:
|
||||
operand = self.process_operand(result[operand_key])
|
||||
(
|
||||
operands.extend(operand)
|
||||
if isinstance(operand, list)
|
||||
else operands.append(operand)
|
||||
)
|
||||
|
||||
# Handle vector parameters as identifiers with appropriate attributes
|
||||
if "vector_param" in result:
|
||||
if isinstance(result["vector_param"], list):
|
||||
for param in result["vector_param"]:
|
||||
if param.startswith("e"): # Element width
|
||||
operands.append(IdentifierOperand(name=param))
|
||||
elif param.startswith("m"): # LMUL setting
|
||||
operands.append(IdentifierOperand(name=param))
|
||||
else:
|
||||
operands.append(IdentifierOperand(name=param))
|
||||
else:
|
||||
operands.append(IdentifierOperand(name=result["vector_param"]))
|
||||
@@ -368,11 +409,15 @@ class ParserRISCV(BaseParser):
|
||||
return_dict = InstructionForm(
|
||||
mnemonic=result["mnemonic"],
|
||||
operands=operands,
|
||||
comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None,
|
||||
comment_id=(
|
||||
" ".join(result[self.comment_id])
|
||||
if self.comment_id in result
|
||||
else None
|
||||
),
|
||||
)
|
||||
return return_dict
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# For special vector instructions or ones with % in them
|
||||
if "%" in instruction or instruction.startswith("v"):
|
||||
parts = instruction.split("#")[0].strip().split(None, 1)
|
||||
@@ -383,10 +428,17 @@ class ParserRISCV(BaseParser):
|
||||
operands_list = [op.strip() for op in operand_part.split(",")]
|
||||
for op in operands_list:
|
||||
# Process '%hi(data)' to 'data' for certain operands
|
||||
if op.startswith("%") and '(' in op and ')' in op:
|
||||
# Extract data from %hi(data) format
|
||||
data = op[op.index('(')+1:op.index(')')]
|
||||
operands.append(IdentifierOperand(name=data))
|
||||
if op.startswith("%") and "(" in op and ")" in op:
|
||||
reloc_type = op[: op.index("(")]
|
||||
symbol = op[op.index("(") + 1 : op.index(")")]
|
||||
operands.append(
|
||||
ImmediateOperand(
|
||||
imd_type="reloc",
|
||||
value=None,
|
||||
reloc_type=reloc_type,
|
||||
symbol=symbol,
|
||||
)
|
||||
)
|
||||
else:
|
||||
operands.append(IdentifierOperand(name=op))
|
||||
|
||||
@@ -395,9 +447,7 @@ class ParserRISCV(BaseParser):
|
||||
comment = instruction.split("#", 1)[1].strip()
|
||||
|
||||
return InstructionForm(
|
||||
mnemonic=mnemonic,
|
||||
operands=operands,
|
||||
comment_id=comment
|
||||
mnemonic=mnemonic, operands=operands, comment_id=comment
|
||||
)
|
||||
else:
|
||||
raise
|
||||
@@ -430,12 +480,42 @@ class ParserRISCV(BaseParser):
|
||||
)
|
||||
|
||||
def process_register_operand(self, operand):
|
||||
"""Process register operands, including ABI name to x-register mapping"""
|
||||
# If already has prefix (x#, f#, v#), just return as is
|
||||
"""Process register operands, including ABI name to x-register mapping
|
||||
and vector attributes"""
|
||||
# If already has prefix (x#, f#, v#), process with appropriate attributes
|
||||
if "prefix" in operand:
|
||||
prefix = operand["prefix"].lower()
|
||||
|
||||
# Special handling for vector registers
|
||||
if prefix == "v":
|
||||
return RegisterOperand(
|
||||
prefix=operand["prefix"].lower(),
|
||||
name=operand["name"]
|
||||
prefix=prefix,
|
||||
name=operand["name"],
|
||||
regtype="vector",
|
||||
# Vector registers can have different element widths (e8,e16,e32,e64)
|
||||
width=operand.get("width", None),
|
||||
# Number of elements (m1,m2,m4,m8)
|
||||
lanes=operand.get("lanes", None),
|
||||
# For vector mask registers
|
||||
mask=operand.get("mask", False),
|
||||
# For tail agnostic/undisturbed policies
|
||||
zeroing=operand.get("zeroing", False),
|
||||
)
|
||||
# For floating point registers
|
||||
elif prefix == "f":
|
||||
return RegisterOperand(
|
||||
prefix=prefix,
|
||||
name=operand["name"],
|
||||
regtype="float",
|
||||
width=64, # RISC-V typically uses 64-bit float registers
|
||||
)
|
||||
# For integer registers
|
||||
elif prefix == "x":
|
||||
return RegisterOperand(
|
||||
prefix=prefix,
|
||||
name=operand["name"],
|
||||
regtype="int",
|
||||
width=64, # RV64 uses 64-bit registers
|
||||
)
|
||||
|
||||
# Handle ABI names by converting to x-register numbers
|
||||
@@ -443,49 +523,84 @@ class ParserRISCV(BaseParser):
|
||||
|
||||
# ABI name mapping for integer registers
|
||||
abi_to_x = {
|
||||
"zero": "0", "ra": "1", "sp": "2", "gp": "3", "tp": "4",
|
||||
"t0": "5", "t1": "6", "t2": "7",
|
||||
"s0": "8", "fp": "8", "s1": "9",
|
||||
"a0": "10", "a1": "11", "a2": "12", "a3": "13",
|
||||
"a4": "14", "a5": "15", "a6": "16", "a7": "17",
|
||||
"s2": "18", "s3": "19", "s4": "20", "s5": "21",
|
||||
"s6": "22", "s7": "23", "s8": "24", "s9": "25",
|
||||
"s10": "26", "s11": "27",
|
||||
"t3": "28", "t4": "29", "t5": "30", "t6": "31"
|
||||
"zero": "x0",
|
||||
"ra": "x1",
|
||||
"sp": "x2",
|
||||
"gp": "x3",
|
||||
"tp": "x4",
|
||||
"t0": "x5",
|
||||
"t1": "x6",
|
||||
"t2": "x7",
|
||||
"s0": "x8",
|
||||
"s1": "x9",
|
||||
"a0": "x10",
|
||||
"a1": "x11",
|
||||
"a2": "x12",
|
||||
"a3": "x13",
|
||||
"a4": "x14",
|
||||
"a5": "x15",
|
||||
"a6": "x16",
|
||||
"a7": "x17",
|
||||
"s2": "x18",
|
||||
"s3": "x19",
|
||||
"s4": "x20",
|
||||
"s5": "x21",
|
||||
"s6": "x22",
|
||||
"s7": "x23",
|
||||
"s8": "x24",
|
||||
"s9": "x25",
|
||||
"s10": "x26",
|
||||
"s11": "x27",
|
||||
"t3": "x28",
|
||||
"t4": "x29",
|
||||
"t5": "x30",
|
||||
"t6": "x31",
|
||||
}
|
||||
|
||||
# Integer register ABI names
|
||||
if name in abi_to_x:
|
||||
return RegisterOperand(
|
||||
prefix="x",
|
||||
name=abi_to_x[name]
|
||||
name=abi_to_x[name],
|
||||
regtype="int",
|
||||
width=64, # RV64 uses 64-bit registers
|
||||
)
|
||||
# Floating point register ABI names
|
||||
elif name.startswith("f") and name[1] in ["t", "a", "s"]:
|
||||
if name[1] == "a": # fa0-fa7
|
||||
idx = int(name[2:])
|
||||
return RegisterOperand(prefix="f", name=str(idx + 10))
|
||||
return RegisterOperand(
|
||||
prefix="f", name=str(idx + 10), regtype="float", width=64
|
||||
)
|
||||
elif name[1] == "s": # fs0-fs11
|
||||
idx = int(name[2:])
|
||||
if idx <= 1:
|
||||
return RegisterOperand(prefix="f", name=str(idx + 8))
|
||||
return RegisterOperand(
|
||||
prefix="f", name=str(idx + 8), regtype="float", width=64
|
||||
)
|
||||
else:
|
||||
return RegisterOperand(prefix="f", name=str(idx + 16))
|
||||
return RegisterOperand(
|
||||
prefix="f", name=str(idx + 16), regtype="float", width=64
|
||||
)
|
||||
elif name[1] == "t": # ft0-ft11
|
||||
idx = int(name[2:])
|
||||
if idx <= 7:
|
||||
return RegisterOperand(prefix="f", name=str(idx))
|
||||
return RegisterOperand(
|
||||
prefix="f", name=str(idx), regtype="float", width=64
|
||||
)
|
||||
else:
|
||||
return RegisterOperand(prefix="f", name=str(idx + 20))
|
||||
return RegisterOperand(
|
||||
prefix="f", name=str(idx + 20), regtype="float", width=64
|
||||
)
|
||||
# CSR registers
|
||||
elif name.startswith("csr"):
|
||||
return RegisterOperand(prefix="", name=name)
|
||||
return RegisterOperand(prefix="", name=name, regtype="csr")
|
||||
|
||||
# If no mapping found, return as is
|
||||
return RegisterOperand(prefix="", name=name)
|
||||
|
||||
def process_memory_address(self, memory_address):
|
||||
"""Post-process memory address operand"""
|
||||
"""Post-process memory address operand with RISC-V specific attributes"""
|
||||
# Process offset
|
||||
offset = memory_address.get("offset", None)
|
||||
if isinstance(offset, list) and len(offset) == 1:
|
||||
@@ -500,12 +615,32 @@ class ParserRISCV(BaseParser):
|
||||
if base is not None:
|
||||
base = self.process_register_operand(base)
|
||||
|
||||
# Create memory operand
|
||||
# Determine data type from instruction context if available
|
||||
# RISC-V load/store instructions encode the data width in the mnemonic
|
||||
# e.g., lw (word), lh (half), lb (byte), etc.
|
||||
data_type = None
|
||||
if hasattr(self, "current_instruction"):
|
||||
mnemonic = self.current_instruction.lower()
|
||||
if any(x in mnemonic for x in ["b", "bu"]): # byte operations
|
||||
data_type = "byte"
|
||||
elif any(x in mnemonic for x in ["h", "hu"]): # halfword operations
|
||||
data_type = "halfword"
|
||||
elif any(x in mnemonic for x in ["w", "wu"]): # word operations
|
||||
data_type = "word"
|
||||
elif "d" in mnemonic: # doubleword operations
|
||||
data_type = "doubleword"
|
||||
|
||||
# Create memory operand with enhanced attributes
|
||||
return MemoryOperand(
|
||||
offset=offset,
|
||||
base=base,
|
||||
index=None,
|
||||
scale=1
|
||||
index=None, # RISC-V doesn't use index registers
|
||||
scale=1, # RISC-V doesn't use scaling
|
||||
data_type=data_type,
|
||||
# Handle vector memory operations
|
||||
mask=memory_address.get("mask", None), # For vector masked loads/stores
|
||||
src=memory_address.get("src", None), # Source register type for stores
|
||||
dst=memory_address.get("dst", None), # Destination register type for loads
|
||||
)
|
||||
|
||||
def process_label(self, label):
|
||||
@@ -519,21 +654,102 @@ class ParserRISCV(BaseParser):
|
||||
"""Post-process identifier operand"""
|
||||
return IdentifierOperand(
|
||||
name=identifier["name"] if "name" in identifier else None,
|
||||
offset=identifier["offset"] if "offset" in identifier else None
|
||||
offset=identifier["offset"] if "offset" in identifier else None,
|
||||
)
|
||||
|
||||
def process_immediate(self, immediate):
|
||||
"""Post-process immediate operand"""
|
||||
"""Post-process immediate operand with RISC-V specific handling"""
|
||||
# Handle relocations
|
||||
if "relocation" in immediate:
|
||||
reloc = immediate["relocation"]
|
||||
return ImmediateOperand(
|
||||
imd_type="reloc",
|
||||
value=None,
|
||||
reloc_type=reloc["reloc_type"],
|
||||
symbol=reloc["symbol"],
|
||||
)
|
||||
|
||||
# Handle identifiers
|
||||
if "identifier" in immediate:
|
||||
# actually an identifier, change declaration
|
||||
return self.process_identifier(immediate["identifier"])
|
||||
|
||||
# Handle numeric values with validation
|
||||
if "value" in immediate:
|
||||
# normal integer value
|
||||
immediate["type"] = "int"
|
||||
# convert hex/bin immediates to dec
|
||||
new_immediate = ImmediateOperand(imd_type=immediate["type"], value=immediate["value"])
|
||||
new_immediate.value = self.normalize_imd(new_immediate)
|
||||
return new_immediate
|
||||
value = int(
|
||||
immediate["value"], 0
|
||||
) # Convert to integer, handling hex/decimal
|
||||
|
||||
# Determine immediate type and validate range based on instruction type
|
||||
if hasattr(self, "current_instruction"):
|
||||
mnemonic = self.current_instruction.lower()
|
||||
|
||||
# I-type instructions (12-bit signed immediate)
|
||||
if any(
|
||||
x in mnemonic
|
||||
for x in [
|
||||
"addi",
|
||||
"slti",
|
||||
"xori",
|
||||
"ori",
|
||||
"andi",
|
||||
"slli",
|
||||
"srli",
|
||||
"srai",
|
||||
]
|
||||
):
|
||||
if not -2048 <= value <= 2047:
|
||||
raise ValueError(
|
||||
f"Immediate value {value} out of range for I-type "
|
||||
f"instruction (-2048 to 2047)"
|
||||
)
|
||||
return ImmediateOperand(imd_type="I", value=value)
|
||||
|
||||
# S-type instructions (12-bit signed immediate for store)
|
||||
elif any(x in mnemonic for x in ["sb", "sh", "sw", "sd"]):
|
||||
if not -2048 <= value <= 2047:
|
||||
raise ValueError(
|
||||
f"Immediate value {value} out of range for S-type "
|
||||
f"instruction (-2048 to 2047)"
|
||||
)
|
||||
return ImmediateOperand(imd_type="S", value=value)
|
||||
|
||||
# B-type instructions (13-bit signed immediate for branches, must be even)
|
||||
elif any(
|
||||
x in mnemonic for x in ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
|
||||
):
|
||||
if not -4096 <= value <= 4095 or value % 2 != 0:
|
||||
raise ValueError(
|
||||
f"Immediate value {value} out of range or not even "
|
||||
f"for B-type instruction (-4096 to 4095, must be even)"
|
||||
)
|
||||
return ImmediateOperand(imd_type="B", value=value)
|
||||
|
||||
# U-type instructions (20-bit upper immediate)
|
||||
elif any(x in mnemonic for x in ["lui", "auipc"]):
|
||||
if not 0 <= value <= 1048575:
|
||||
raise ValueError(
|
||||
f"Immediate value {value} out of range for U-type "
|
||||
f"instruction (0 to 1048575)"
|
||||
)
|
||||
return ImmediateOperand(imd_type="U", value=value)
|
||||
|
||||
# J-type instructions (21-bit signed immediate for jumps, must be even)
|
||||
elif any(x in mnemonic for x in ["jal"]):
|
||||
if not -1048576 <= value <= 1048575 or value % 2 != 0:
|
||||
raise ValueError(
|
||||
f"Immediate value {value} out of range or not even "
|
||||
f"for J-type instruction (-1048576 to 1048575, must be even)"
|
||||
)
|
||||
return ImmediateOperand(imd_type="J", value=value)
|
||||
|
||||
# Vector instructions might have specific immediate ranges
|
||||
elif mnemonic.startswith("v"):
|
||||
# Handle vector specific immediates (implementation specific)
|
||||
return ImmediateOperand(imd_type="V", value=value)
|
||||
|
||||
# Default case - no specific validation
|
||||
return ImmediateOperand(imd_type="int", value=value)
|
||||
|
||||
return immediate
|
||||
|
||||
def get_full_reg_name(self, register):
|
||||
@@ -566,35 +782,74 @@ class ParserRISCV(BaseParser):
|
||||
register_string = register_string.strip()
|
||||
|
||||
# Check for integer registers (x0-x31)
|
||||
x_match = re.match(r'^x([0-9]|[1-2][0-9]|3[0-1])$', register_string)
|
||||
x_match = re.match(r"^x([0-9]|[1-2][0-9]|3[0-1])$", register_string)
|
||||
if x_match:
|
||||
reg_num = int(x_match.group(1))
|
||||
return {"class": "register", "register": {"prefix": "x", "name": str(reg_num)}}
|
||||
return {
|
||||
"class": "register",
|
||||
"register": {"prefix": "x", "name": str(reg_num)},
|
||||
}
|
||||
|
||||
# Check for floating-point registers (f0-f31)
|
||||
f_match = re.match(r'^f([0-9]|[1-2][0-9]|3[0-1])$', register_string)
|
||||
f_match = re.match(r"^f([0-9]|[1-2][0-9]|3[0-1])$", register_string)
|
||||
if f_match:
|
||||
reg_num = int(f_match.group(1))
|
||||
return {"class": "register", "register": {"prefix": "f", "name": str(reg_num)}}
|
||||
return {
|
||||
"class": "register",
|
||||
"register": {"prefix": "f", "name": str(reg_num)},
|
||||
}
|
||||
|
||||
# Check for vector registers (v0-v31)
|
||||
v_match = re.match(r'^v([0-9]|[1-2][0-9]|3[0-1])$', register_string)
|
||||
v_match = re.match(r"^v([0-9]|[1-2][0-9]|3[0-1])$", register_string)
|
||||
if v_match:
|
||||
reg_num = int(v_match.group(1))
|
||||
return {"class": "register", "register": {"prefix": "v", "name": str(reg_num)}}
|
||||
return {
|
||||
"class": "register",
|
||||
"register": {"prefix": "v", "name": str(reg_num)},
|
||||
}
|
||||
|
||||
# Check for ABI names
|
||||
abi_names = {
|
||||
"zero": 0, "ra": 1, "sp": 2, "gp": 3, "tp": 4,
|
||||
"t0": 5, "t1": 6, "t2": 7,
|
||||
"s0": 8, "fp": 8, "s1": 9,
|
||||
"a0": 10, "a1": 11, "a2": 12, "a3": 13, "a4": 14, "a5": 15, "a6": 16, "a7": 17,
|
||||
"s2": 18, "s3": 19, "s4": 20, "s5": 21, "s6": 22, "s7": 23, "s8": 24, "s9": 25, "s10": 26, "s11": 27,
|
||||
"t3": 28, "t4": 29, "t5": 30, "t6": 31
|
||||
"zero": 0,
|
||||
"ra": 1,
|
||||
"sp": 2,
|
||||
"gp": 3,
|
||||
"tp": 4,
|
||||
"t0": 5,
|
||||
"t1": 6,
|
||||
"t2": 7,
|
||||
"s0": 8,
|
||||
"fp": 8,
|
||||
"s1": 9,
|
||||
"a0": 10,
|
||||
"a1": 11,
|
||||
"a2": 12,
|
||||
"a3": 13,
|
||||
"a4": 14,
|
||||
"a5": 15,
|
||||
"a6": 16,
|
||||
"a7": 17,
|
||||
"s2": 18,
|
||||
"s3": 19,
|
||||
"s4": 20,
|
||||
"s5": 21,
|
||||
"s6": 22,
|
||||
"s7": 23,
|
||||
"s8": 24,
|
||||
"s9": 25,
|
||||
"s10": 26,
|
||||
"s11": 27,
|
||||
"t3": 28,
|
||||
"t4": 29,
|
||||
"t5": 30,
|
||||
"t6": 31,
|
||||
}
|
||||
|
||||
if register_string in abi_names:
|
||||
return {"class": "register", "register": {"prefix": "", "name": register_string}}
|
||||
return {
|
||||
"class": "register",
|
||||
"register": {"prefix": "", "name": register_string},
|
||||
}
|
||||
|
||||
# If no match is found
|
||||
return None
|
||||
@@ -642,15 +897,38 @@ class ParserRISCV(BaseParser):
|
||||
|
||||
# ABI name mapping for integer registers
|
||||
abi_to_x = {
|
||||
"zero": "x0", "ra": "x1", "sp": "x2", "gp": "x3", "tp": "x4",
|
||||
"t0": "x5", "t1": "x6", "t2": "x7",
|
||||
"s0": "x8", "s1": "x9",
|
||||
"a0": "x10", "a1": "x11", "a2": "x12", "a3": "x13",
|
||||
"a4": "x14", "a5": "x15", "a6": "x16", "a7": "x17",
|
||||
"s2": "x18", "s3": "x19", "s4": "x20", "s5": "x21",
|
||||
"s6": "x22", "s7": "x23", "s8": "x24", "s9": "x25",
|
||||
"s10": "x26", "s11": "x27",
|
||||
"t3": "x28", "t4": "x29", "t5": "x30", "t6": "x31"
|
||||
"zero": "x0",
|
||||
"ra": "x1",
|
||||
"sp": "x2",
|
||||
"gp": "x3",
|
||||
"tp": "x4",
|
||||
"t0": "x5",
|
||||
"t1": "x6",
|
||||
"t2": "x7",
|
||||
"s0": "x8",
|
||||
"s1": "x9",
|
||||
"a0": "x10",
|
||||
"a1": "x11",
|
||||
"a2": "x12",
|
||||
"a3": "x13",
|
||||
"a4": "x14",
|
||||
"a5": "x15",
|
||||
"a6": "x16",
|
||||
"a7": "x17",
|
||||
"s2": "x18",
|
||||
"s3": "x19",
|
||||
"s4": "x20",
|
||||
"s5": "x21",
|
||||
"s6": "x22",
|
||||
"s7": "x23",
|
||||
"s8": "x24",
|
||||
"s9": "x25",
|
||||
"s10": "x26",
|
||||
"s11": "x27",
|
||||
"t3": "x28",
|
||||
"t4": "x29",
|
||||
"t5": "x30",
|
||||
"t6": "x31",
|
||||
}
|
||||
|
||||
# For integer register ABI names
|
||||
@@ -751,7 +1029,7 @@ class ParserRISCV(BaseParser):
|
||||
# For raw integer values or string immediates
|
||||
return ImmediateOperand(
|
||||
imd_type="int",
|
||||
value=str(operand) if isinstance(operand, int) else operand
|
||||
value=str(operand) if isinstance(operand, int) else operand,
|
||||
)
|
||||
elif isinstance(operand, dict) and "imd" in operand:
|
||||
# For immediate operands from instruction definitions
|
||||
@@ -759,11 +1037,8 @@ class ParserRISCV(BaseParser):
|
||||
imd_type=operand["imd"],
|
||||
value=operand.get("value"),
|
||||
identifier=operand.get("identifier"),
|
||||
shift=operand.get("shift")
|
||||
shift=operand.get("shift"),
|
||||
)
|
||||
else:
|
||||
# For any other immediate format
|
||||
return ImmediateOperand(
|
||||
imd_type="int",
|
||||
value=str(operand)
|
||||
)
|
||||
return ImmediateOperand(imd_type="int", value=str(operand))
|
||||
|
||||
@@ -1025,6 +1025,7 @@ class MachineModel(object):
|
||||
try:
|
||||
# Need to check if they refer to the same register
|
||||
from osaca.parser import ParserRISCV
|
||||
|
||||
parser = ParserRISCV()
|
||||
reg_canonical = parser._get_canonical_reg_name(reg)
|
||||
i_reg_canonical = parser._get_canonical_reg_name(i_reg)
|
||||
|
||||
@@ -307,11 +307,13 @@ class TestCLI(unittest.TestCase):
|
||||
@staticmethod
|
||||
def _find_file(kernel, arch, comp):
|
||||
testdir = os.path.dirname(__file__)
|
||||
# Handle special case for rv64 architecture
|
||||
arch_prefix = arch.lower() if arch.lower() == "rv64" else arch[:3].lower()
|
||||
name = os.path.join(
|
||||
testdir,
|
||||
"../examples",
|
||||
kernel,
|
||||
kernel + ".s." + arch[:3].lower() + "." + comp.lower() + ".s",
|
||||
kernel + ".s." + arch_prefix + "." + comp.lower() + ".s",
|
||||
)
|
||||
if kernel == "j2d" and arch.lower() == "csx":
|
||||
name = name[:-1] + "AVX.s"
|
||||
|
||||
@@ -8,9 +8,7 @@ import unittest
|
||||
|
||||
from pyparsing import ParseException
|
||||
|
||||
from osaca.parser import ParserRISCV, InstructionForm
|
||||
from osaca.parser.directive import DirectiveOperand
|
||||
from osaca.parser.memory import MemoryOperand
|
||||
from osaca.parser import ParserRISCV
|
||||
from osaca.parser.register import RegisterOperand
|
||||
from osaca.parser.immediate import ImmediateOperand
|
||||
from osaca.parser.identifier import IdentifierOperand
|
||||
@@ -180,12 +178,10 @@ class TestParserRISCV(unittest.TestCase):
|
||||
|
||||
# Test floating-point registers
|
||||
reg_fa0 = RegisterOperand(prefix="f", name="a0")
|
||||
reg_fa1 = RegisterOperand(prefix="f", name="a1")
|
||||
reg_f10 = RegisterOperand(prefix="f", name="10")
|
||||
|
||||
# Test vector registers
|
||||
reg_v1 = RegisterOperand(prefix="v", name="1")
|
||||
reg_v2 = RegisterOperand(prefix="v", name="2")
|
||||
|
||||
# Test register type detection
|
||||
self.assertTrue(self.parser.is_gpr(reg_a0))
|
||||
|
||||
Reference in New Issue
Block a user