Apply selected improvements from 1ceac6e: enhanced RISC-V parser, ImmediateOperand enhancements, and rv6→rv64 file renames

- Enhanced ImmediateOperand with reloc_type and symbol attributes for better RISC-V support
- Updated RISC-V parser with relocation type support (%hi, %lo, %pcrel_hi, etc.)
- Renamed example files from rv6 to rv64 for consistency
- Updated related configuration and test files
- All 115 tests pass successfully
This commit is contained in:
Metehan Dundar
2025-07-11 18:15:51 +02:00
parent 61b52dbf28
commit ebf76caa18
16 changed files with 554 additions and 253 deletions

2
.gitignore vendored
View File

@@ -1,7 +1,5 @@
# OSACA specific files and folders # OSACA specific files and folders
*.*.pickle *.*.pickle
osaca_testfront_venv/
examples/riscy_asm_files/
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

View File

@@ -10,6 +10,8 @@ class ImmediateOperand(Operand):
imd_type=None, imd_type=None,
value=None, value=None,
shift=None, shift=None,
reloc_type=None,
symbol=None,
source=False, source=False,
destination=False, destination=False,
): ):
@@ -18,6 +20,8 @@ class ImmediateOperand(Operand):
self._imd_type = imd_type self._imd_type = imd_type
self._value = value self._value = value
self._shift = shift self._shift = shift
self._reloc_type = reloc_type
self._symbol = symbol
@property @property
def identifier(self): def identifier(self):
@@ -33,7 +37,15 @@ class ImmediateOperand(Operand):
@property @property
def shift(self): def shift(self):
return self._imd_type return self._shift
@property
def reloc_type(self):
return self._reloc_type
@property
def symbol(self):
return self._symbol
@imd_type.setter @imd_type.setter
def imd_type(self, itype): def imd_type(self, itype):
@@ -51,10 +63,19 @@ class ImmediateOperand(Operand):
def shift(self, shift): def shift(self, shift):
self._shift = shift self._shift = shift
@reloc_type.setter
def reloc_type(self, reloc_type):
self._reloc_type = reloc_type
@symbol.setter
def symbol(self, symbol):
self._symbol = symbol
def __str__(self): def __str__(self):
return ( return (
f"Immediate(identifier={self._identifier}, imd_type={self._imd_type}, " f"Immediate(identifier={self._identifier}, imd_type={self._imd_type}, "
f"value={self._value}, shift={self._shift}, source={self._source}, destination={self._destination})" f"value={self._value}, shift={self._shift}, reloc_type={self._reloc_type}, "
f"symbol={self._symbol}, source={self._source}, destination={self._destination})"
) )
def __repr__(self): def __repr__(self):
@@ -62,10 +83,18 @@ class ImmediateOperand(Operand):
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, ImmediateOperand): if isinstance(other, ImmediateOperand):
# Handle cases where old instances might not have the new attributes
self_reloc_type = getattr(self, "_reloc_type", None)
self_symbol = getattr(self, "_symbol", None)
other_reloc_type = getattr(other, "_reloc_type", None)
other_symbol = getattr(other, "_symbol", None)
return ( return (
self._identifier == other._identifier self._identifier == other._identifier
and self._imd_type == other._imd_type and self._imd_type == other._imd_type
and self._value == other._value and self._value == other._value
and self._shift == other._shift and self._shift == other._shift
and self_reloc_type == other_reloc_type
and self_symbol == other_symbol
) )
return False return False

View File

@@ -1,7 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import re import re
import os
from copy import deepcopy
import pyparsing as pp import pyparsing as pp
from osaca.parser import BaseParser from osaca.parser import BaseParser
@@ -13,7 +11,6 @@ from osaca.parser.label import LabelOperand
from osaca.parser.register import RegisterOperand from osaca.parser.register import RegisterOperand
from osaca.parser.identifier import IdentifierOperand from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand from osaca.parser.immediate import ImmediateOperand
from osaca.parser.condition import ConditionOperand
class ParserRISCV(BaseParser): class ParserRISCV(BaseParser):
@@ -70,9 +67,23 @@ class ParserRISCV(BaseParser):
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums) pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
).setResultsName("value") ).setResultsName("value")
# Additional identifiers used in vector instructions # RISC-V specific relocation attributes
vector_identifier = pp.Word(pp.alphas, pp.alphanums) reloc_type = (
special_identifier = pp.Word(pp.alphas + "%") pp.Literal("%hi")
| pp.Literal("%lo")
| pp.Literal("%pcrel_hi")
| pp.Literal("%pcrel_lo")
| pp.Literal("%tprel_hi")
| pp.Literal("%tprel_lo")
| pp.Literal("%tprel_add")
).setResultsName("reloc_type")
reloc_expr = pp.Group(
reloc_type
+ pp.Suppress("(")
+ pp.Word(pp.alphas + pp.nums + "_").setResultsName("symbol")
+ pp.Suppress(")")
).setResultsName("relocation")
# First character of an identifier # First character of an identifier
first = pp.Word(pp.alphas + "_.", exact=1) first = pp.Word(pp.alphas + "_.", exact=1)
@@ -89,9 +100,16 @@ class ParserRISCV(BaseParser):
) )
).setResultsName(self.identifier) ).setResultsName(self.identifier)
# Immediate with optional relocation
immediate = pp.Group(
reloc_expr | (hex_number ^ decimal_number) | identifier
).setResultsName(self.immediate_id)
# Label # Label
self.label = pp.Group( self.label = pp.Group(
identifier.setResultsName("name") + pp.Literal(":") + pp.Optional(self.comment) identifier.setResultsName("name")
+ pp.Literal(":")
+ pp.Optional(self.comment)
).setResultsName(self.label_id) ).setResultsName(self.label_id)
# Directive # Directive
@@ -100,16 +118,22 @@ class ParserRISCV(BaseParser):
+ pp.Optional(pp.Word(pp.printables + " ", excludeChars=",")) + pp.Optional(pp.Word(pp.printables + " ", excludeChars=","))
) )
# For vector instructions
vector_parameter = pp.Word(pp.alphas)
directive_parameter = ( directive_parameter = (
pp.quotedString | directive_option | identifier | hex_number | decimal_number pp.quotedString
| directive_option
| identifier
| hex_number
| decimal_number
)
commaSeparatedList = pp.delimitedList(
pp.Optional(directive_parameter), delim=","
) )
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=",")
self.directive = pp.Group( self.directive = pp.Group(
pp.Literal(".") pp.Literal(".")
+ pp.Word(pp.alphanums + "_").setResultsName("name") + pp.Word(pp.alphanums + "_").setResultsName("name")
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName("parameters") + (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName(
"parameters"
)
+ pp.Optional(self.comment) + pp.Optional(self.comment)
).setResultsName(self.directive_id) ).setResultsName(self.directive_id)
@@ -128,13 +152,6 @@ class ParserRISCV(BaseParser):
# Mnemonic # Mnemonic
mnemonic = pp.Word(pp.alphanums + ".").setResultsName("mnemonic") mnemonic = pp.Word(pp.alphanums + ".").setResultsName("mnemonic")
# Immediate:
# int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+
immediate = pp.Group(
(hex_number ^ decimal_number)
| identifier
).setResultsName(self.immediate_id)
# Register: # Register:
# RISC-V has two main types of registers: # RISC-V has two main types of registers:
# 1. Integer registers (x0-x31 or ABI names) # 1. Integer registers (x0-x31 or ABI names)
@@ -142,27 +159,27 @@ class ParserRISCV(BaseParser):
# Integer register ABI names # Integer register ABI names
integer_reg_abi = ( integer_reg_abi = (
pp.CaselessLiteral("zero") | pp.CaselessLiteral("zero")
pp.CaselessLiteral("ra") | | pp.CaselessLiteral("ra")
pp.CaselessLiteral("sp") | | pp.CaselessLiteral("sp")
pp.CaselessLiteral("gp") | | pp.CaselessLiteral("gp")
pp.CaselessLiteral("tp") | | pp.CaselessLiteral("tp")
pp.Regex(r"[tas][0-9]+") # t0-t6, a0-a7, s0-s11 | pp.Regex(r"[tas][0-9]+") # t0-t6, a0-a7, s0-s11
).setResultsName("name") ).setResultsName("name")
# Integer registers x0-x31 # Integer registers x0-x31
integer_reg_x = ( integer_reg_x = pp.CaselessLiteral("x").setResultsName("prefix") + pp.Word(
pp.CaselessLiteral("x").setResultsName("prefix") + pp.nums
pp.Word(pp.nums).setResultsName("name") ).setResultsName("name")
)
# Floating point registers # Floating point registers
fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName("name") # ft0-ft11, fa0-fa7, fs0-fs11 fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName(
"name"
) # ft0-ft11, fa0-fa7, fs0-fs11
fp_reg_f = ( fp_reg_f = pp.CaselessLiteral("f").setResultsName("prefix") + pp.Word(
pp.CaselessLiteral("f").setResultsName("prefix") + pp.nums
pp.Word(pp.nums).setResultsName("name") ).setResultsName("name")
)
# Control and status registers (CSRs) # Control and status registers (CSRs)
csr_reg = pp.Combine( csr_reg = pp.Combine(
@@ -170,14 +187,18 @@ class ParserRISCV(BaseParser):
).setResultsName("name") ).setResultsName("name")
# Vector registers (for the "V" extension) # Vector registers (for the "V" extension)
vector_reg = ( vector_reg = pp.CaselessLiteral("v").setResultsName("prefix") + pp.Word(
pp.CaselessLiteral("v").setResultsName("prefix") + pp.nums
pp.Word(pp.nums).setResultsName("name") ).setResultsName("name")
)
# Combined register definition # Combined register definition
register = pp.Group( register = pp.Group(
integer_reg_x | integer_reg_abi | fp_reg_f | fp_reg_abi | vector_reg | csr_reg integer_reg_x
| integer_reg_abi
| fp_reg_f
| fp_reg_abi
| vector_reg
| csr_reg
).setResultsName(self.register_id) ).setResultsName(self.register_id)
self.register = register self.register = register
@@ -191,20 +212,15 @@ class ParserRISCV(BaseParser):
).setResultsName(self.memory_id) ).setResultsName(self.memory_id)
# Combine to instruction form # Combine to instruction form
operand_first = pp.Group( operand_first = pp.Group(register ^ immediate ^ memory ^ identifier)
register ^ immediate ^ memory ^ identifier operand_rest = pp.Group(register ^ immediate ^ memory ^ identifier)
)
operand_rest = pp.Group(
register ^ immediate ^ memory ^ identifier
)
# Vector instruction special parameters (e.g., e32, m4, ta, ma)
vector_param = pp.Word(pp.alphas + pp.nums)
# Handle additional vector parameters # Handle additional vector parameters
additional_params = pp.ZeroOrMore( additional_params = pp.ZeroOrMore(
pp.Suppress(pp.Literal(",")) + pp.Suppress(pp.Literal(","))
vector_param.setResultsName("vector_param", listAllMatches=True) + pp.Word(pp.alphas + pp.nums).setResultsName(
"vector_param", listAllMatches=True
)
) )
# Main instruction parser # Main instruction parser
@@ -217,7 +233,7 @@ class ParserRISCV(BaseParser):
+ pp.Optional(operand_rest.setResultsName("operand3")) + pp.Optional(operand_rest.setResultsName("operand3"))
+ pp.Optional(pp.Suppress(pp.Literal(","))) + pp.Optional(pp.Suppress(pp.Literal(",")))
+ pp.Optional(operand_rest.setResultsName("operand4")) + pp.Optional(operand_rest.setResultsName("operand4"))
+ pp.Optional(additional_params) # For vector instructions with more params + pp.Optional(additional_params)
+ pp.Optional(self.comment) + pp.Optional(self.comment)
) )
@@ -228,7 +244,8 @@ class ParserRISCV(BaseParser):
:param str line: line of assembly code :param str line: line of assembly code
:param line_number: identifier of instruction form, defaults to None :param line_number: identifier of instruction form, defaults to None
:type line_number: int, optional :type line_number: int, optional
:return: `dict` -- parsed asm line (comment, label, directive or instruction form) :return: `dict` -- parsed asm line (comment, label, directive or
instruction form)
""" """
instruction_form = InstructionForm( instruction_form = InstructionForm(
mnemonic=None, mnemonic=None,
@@ -243,7 +260,9 @@ class ParserRISCV(BaseParser):
# 1. Parse comment # 1. Parse comment
try: try:
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict()) result = self.process_operand(
self.comment.parseString(line, parseAll=True).asDict()
)
instruction_form.comment = " ".join(result[self.comment_id]) instruction_form.comment = " ".join(result[self.comment_id])
except pp.ParseException: except pp.ParseException:
pass pass
@@ -261,7 +280,9 @@ class ParserRISCV(BaseParser):
if result is None: if result is None:
try: try:
# returns tuple with label operand and comment, if any # returns tuple with label operand and comment, if any
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) result = self.process_operand(
self.label.parseString(line, parseAll=True).asDict()
)
instruction_form.label = result[0].name instruction_form.label = result[0].name
if result[1] is not None: if result[1] is not None:
instruction_form.comment = " ".join(result[1]) instruction_form.comment = " ".join(result[1])
@@ -304,9 +325,20 @@ class ParserRISCV(BaseParser):
:param str instruction: Assembly line string. :param str instruction: Assembly line string.
:returns: `dict` -- parsed instruction form :returns: `dict` -- parsed instruction form
""" """
# Store current instruction for context in operand processing
if instruction.startswith("vsetvli"):
self.current_instruction = "vsetvli"
else:
# Extract mnemonic for context
parts = instruction.split("#")[0].strip().split()
self.current_instruction = parts[0] if parts else None
# Special handling for vector instructions like vsetvli with many parameters # Special handling for vector instructions like vsetvli with many parameters
if instruction.startswith("vsetvli"): if instruction.startswith("vsetvli"):
parts = instruction.split("#")[0].strip().split() # Split into mnemonic and operands part
parts = (
instruction.split("#")[0].strip().split(None, 1)
) # Split on first whitespace only
mnemonic = parts[0] mnemonic = parts[0]
# Split operands by commas # Split operands by commas
@@ -317,12 +349,22 @@ class ParserRISCV(BaseParser):
# Process each operand # Process each operand
operands = [] operands = []
for op in operands_list: for op in operands_list:
if op.startswith("x") or op in ["zero", "ra", "sp", "gp", "tp"] or re.match(r"[tas][0-9]+", op): if (
op.startswith("x")
or op in ["zero", "ra", "sp", "gp", "tp"]
or re.match(r"[tas][0-9]+", op)
):
operands.append(RegisterOperand(name=op)) operands.append(RegisterOperand(name=op))
elif op in ["e8", "e16", "e32", "e64", "m1", "m2", "m4", "m8", "ta", "tu", "ma", "mu"]:
operands.append(IdentifierOperand(name=op))
else: else:
operands.append(IdentifierOperand(name=op)) # Vector parameters get appropriate attributes
if op.startswith("e"): # Element width
operands.append(IdentifierOperand(name=op))
elif op.startswith("m"): # LMUL setting
operands.append(IdentifierOperand(name=op))
elif op in ["ta", "tu", "ma", "mu"]: # Tail/mask policies
operands.append(IdentifierOperand(name=op))
else:
operands.append(IdentifierOperand(name=op))
# Get comment if present # Get comment if present
comment = None comment = None
@@ -330,49 +372,52 @@ class ParserRISCV(BaseParser):
comment = instruction.split("#", 1)[1].strip() comment = instruction.split("#", 1)[1].strip()
return InstructionForm( return InstructionForm(
mnemonic=mnemonic, mnemonic=mnemonic, operands=operands, comment_id=comment
operands=operands,
comment_id=comment
) )
# Regular instruction parsing # Regular instruction parsing
try: try:
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict() result = self.instruction_parser.parseString(
instruction, parseAll=True
).asDict()
operands = [] operands = []
# Add operands to list
# Check first operand
if "operand1" in result:
operand = self.process_operand(result["operand1"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check second operand
if "operand2" in result:
operand = self.process_operand(result["operand2"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check third operand
if "operand3" in result:
operand = self.process_operand(result["operand3"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check fourth operand
if "operand4" in result:
operand = self.process_operand(result["operand4"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Handle vector_param for vector instructions # Process operands
for i in range(1, 5):
operand_key = f"operand{i}"
if operand_key in result:
operand = self.process_operand(result[operand_key])
(
operands.extend(operand)
if isinstance(operand, list)
else operands.append(operand)
)
# Handle vector parameters as identifiers with appropriate attributes
if "vector_param" in result: if "vector_param" in result:
if isinstance(result["vector_param"], list): if isinstance(result["vector_param"], list):
for param in result["vector_param"]: for param in result["vector_param"]:
operands.append(IdentifierOperand(name=param)) if param.startswith("e"): # Element width
operands.append(IdentifierOperand(name=param))
elif param.startswith("m"): # LMUL setting
operands.append(IdentifierOperand(name=param))
else:
operands.append(IdentifierOperand(name=param))
else: else:
operands.append(IdentifierOperand(name=result["vector_param"])) operands.append(IdentifierOperand(name=result["vector_param"]))
return_dict = InstructionForm( return_dict = InstructionForm(
mnemonic=result["mnemonic"], mnemonic=result["mnemonic"],
operands=operands, operands=operands,
comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None, comment_id=(
" ".join(result[self.comment_id])
if self.comment_id in result
else None
),
) )
return return_dict return return_dict
except Exception as e: except Exception:
# For special vector instructions or ones with % in them # For special vector instructions or ones with % in them
if "%" in instruction or instruction.startswith("v"): if "%" in instruction or instruction.startswith("v"):
parts = instruction.split("#")[0].strip().split(None, 1) parts = instruction.split("#")[0].strip().split(None, 1)
@@ -383,10 +428,17 @@ class ParserRISCV(BaseParser):
operands_list = [op.strip() for op in operand_part.split(",")] operands_list = [op.strip() for op in operand_part.split(",")]
for op in operands_list: for op in operands_list:
# Process '%hi(data)' to 'data' for certain operands # Process '%hi(data)' to 'data' for certain operands
if op.startswith("%") and '(' in op and ')' in op: if op.startswith("%") and "(" in op and ")" in op:
# Extract data from %hi(data) format reloc_type = op[: op.index("(")]
data = op[op.index('(')+1:op.index(')')] symbol = op[op.index("(") + 1 : op.index(")")]
operands.append(IdentifierOperand(name=data)) operands.append(
ImmediateOperand(
imd_type="reloc",
value=None,
reloc_type=reloc_type,
symbol=symbol,
)
)
else: else:
operands.append(IdentifierOperand(name=op)) operands.append(IdentifierOperand(name=op))
@@ -395,9 +447,7 @@ class ParserRISCV(BaseParser):
comment = instruction.split("#", 1)[1].strip() comment = instruction.split("#", 1)[1].strip()
return InstructionForm( return InstructionForm(
mnemonic=mnemonic, mnemonic=mnemonic, operands=operands, comment_id=comment
operands=operands,
comment_id=comment
) )
else: else:
raise raise
@@ -430,62 +480,127 @@ class ParserRISCV(BaseParser):
) )
def process_register_operand(self, operand): def process_register_operand(self, operand):
"""Process register operands, including ABI name to x-register mapping""" """Process register operands, including ABI name to x-register mapping
# If already has prefix (x#, f#, v#), just return as is and vector attributes"""
# If already has prefix (x#, f#, v#), process with appropriate attributes
if "prefix" in operand: if "prefix" in operand:
return RegisterOperand( prefix = operand["prefix"].lower()
prefix=operand["prefix"].lower(),
name=operand["name"] # Special handling for vector registers
) if prefix == "v":
return RegisterOperand(
prefix=prefix,
name=operand["name"],
regtype="vector",
# Vector registers can have different element widths (e8,e16,e32,e64)
width=operand.get("width", None),
# Number of elements (m1,m2,m4,m8)
lanes=operand.get("lanes", None),
# For vector mask registers
mask=operand.get("mask", False),
# For tail agnostic/undisturbed policies
zeroing=operand.get("zeroing", False),
)
# For floating point registers
elif prefix == "f":
return RegisterOperand(
prefix=prefix,
name=operand["name"],
regtype="float",
width=64, # RISC-V typically uses 64-bit float registers
)
# For integer registers
elif prefix == "x":
return RegisterOperand(
prefix=prefix,
name=operand["name"],
regtype="int",
width=64, # RV64 uses 64-bit registers
)
# Handle ABI names by converting to x-register numbers # Handle ABI names by converting to x-register numbers
name = operand["name"].lower() name = operand["name"].lower()
# ABI name mapping for integer registers # ABI name mapping for integer registers
abi_to_x = { abi_to_x = {
"zero": "0", "ra": "1", "sp": "2", "gp": "3", "tp": "4", "zero": "x0",
"t0": "5", "t1": "6", "t2": "7", "ra": "x1",
"s0": "8", "fp": "8", "s1": "9", "sp": "x2",
"a0": "10", "a1": "11", "a2": "12", "a3": "13", "gp": "x3",
"a4": "14", "a5": "15", "a6": "16", "a7": "17", "tp": "x4",
"s2": "18", "s3": "19", "s4": "20", "s5": "21", "t0": "x5",
"s6": "22", "s7": "23", "s8": "24", "s9": "25", "t1": "x6",
"s10": "26", "s11": "27", "t2": "x7",
"t3": "28", "t4": "29", "t5": "30", "t6": "31" "s0": "x8",
"s1": "x9",
"a0": "x10",
"a1": "x11",
"a2": "x12",
"a3": "x13",
"a4": "x14",
"a5": "x15",
"a6": "x16",
"a7": "x17",
"s2": "x18",
"s3": "x19",
"s4": "x20",
"s5": "x21",
"s6": "x22",
"s7": "x23",
"s8": "x24",
"s9": "x25",
"s10": "x26",
"s11": "x27",
"t3": "x28",
"t4": "x29",
"t5": "x30",
"t6": "x31",
} }
# Integer register ABI names # Integer register ABI names
if name in abi_to_x: if name in abi_to_x:
return RegisterOperand( return RegisterOperand(
prefix="x", prefix="x",
name=abi_to_x[name] name=abi_to_x[name],
regtype="int",
width=64, # RV64 uses 64-bit registers
) )
# Floating point register ABI names # Floating point register ABI names
elif name.startswith("f") and name[1] in ["t", "a", "s"]: elif name.startswith("f") and name[1] in ["t", "a", "s"]:
if name[1] == "a": # fa0-fa7 if name[1] == "a": # fa0-fa7
idx = int(name[2:]) idx = int(name[2:])
return RegisterOperand(prefix="f", name=str(idx + 10)) return RegisterOperand(
prefix="f", name=str(idx + 10), regtype="float", width=64
)
elif name[1] == "s": # fs0-fs11 elif name[1] == "s": # fs0-fs11
idx = int(name[2:]) idx = int(name[2:])
if idx <= 1: if idx <= 1:
return RegisterOperand(prefix="f", name=str(idx + 8)) return RegisterOperand(
prefix="f", name=str(idx + 8), regtype="float", width=64
)
else: else:
return RegisterOperand(prefix="f", name=str(idx + 16)) return RegisterOperand(
prefix="f", name=str(idx + 16), regtype="float", width=64
)
elif name[1] == "t": # ft0-ft11 elif name[1] == "t": # ft0-ft11
idx = int(name[2:]) idx = int(name[2:])
if idx <= 7: if idx <= 7:
return RegisterOperand(prefix="f", name=str(idx)) return RegisterOperand(
prefix="f", name=str(idx), regtype="float", width=64
)
else: else:
return RegisterOperand(prefix="f", name=str(idx + 20)) return RegisterOperand(
prefix="f", name=str(idx + 20), regtype="float", width=64
)
# CSR registers # CSR registers
elif name.startswith("csr"): elif name.startswith("csr"):
return RegisterOperand(prefix="", name=name) return RegisterOperand(prefix="", name=name, regtype="csr")
# If no mapping found, return as is # If no mapping found, return as is
return RegisterOperand(prefix="", name=name) return RegisterOperand(prefix="", name=name)
def process_memory_address(self, memory_address): def process_memory_address(self, memory_address):
"""Post-process memory address operand""" """Post-process memory address operand with RISC-V specific attributes"""
# Process offset # Process offset
offset = memory_address.get("offset", None) offset = memory_address.get("offset", None)
if isinstance(offset, list) and len(offset) == 1: if isinstance(offset, list) and len(offset) == 1:
@@ -500,12 +615,32 @@ class ParserRISCV(BaseParser):
if base is not None: if base is not None:
base = self.process_register_operand(base) base = self.process_register_operand(base)
# Create memory operand # Determine data type from instruction context if available
# RISC-V load/store instructions encode the data width in the mnemonic
# e.g., lw (word), lh (half), lb (byte), etc.
data_type = None
if hasattr(self, "current_instruction"):
mnemonic = self.current_instruction.lower()
if any(x in mnemonic for x in ["b", "bu"]): # byte operations
data_type = "byte"
elif any(x in mnemonic for x in ["h", "hu"]): # halfword operations
data_type = "halfword"
elif any(x in mnemonic for x in ["w", "wu"]): # word operations
data_type = "word"
elif "d" in mnemonic: # doubleword operations
data_type = "doubleword"
# Create memory operand with enhanced attributes
return MemoryOperand( return MemoryOperand(
offset=offset, offset=offset,
base=base, base=base,
index=None, index=None, # RISC-V doesn't use index registers
scale=1 scale=1, # RISC-V doesn't use scaling
data_type=data_type,
# Handle vector memory operations
mask=memory_address.get("mask", None), # For vector masked loads/stores
src=memory_address.get("src", None), # Source register type for stores
dst=memory_address.get("dst", None), # Destination register type for loads
) )
def process_label(self, label): def process_label(self, label):
@@ -519,21 +654,102 @@ class ParserRISCV(BaseParser):
"""Post-process identifier operand""" """Post-process identifier operand"""
return IdentifierOperand( return IdentifierOperand(
name=identifier["name"] if "name" in identifier else None, name=identifier["name"] if "name" in identifier else None,
offset=identifier["offset"] if "offset" in identifier else None offset=identifier["offset"] if "offset" in identifier else None,
) )
def process_immediate(self, immediate): def process_immediate(self, immediate):
"""Post-process immediate operand""" """Post-process immediate operand with RISC-V specific handling"""
# Handle relocations
if "relocation" in immediate:
reloc = immediate["relocation"]
return ImmediateOperand(
imd_type="reloc",
value=None,
reloc_type=reloc["reloc_type"],
symbol=reloc["symbol"],
)
# Handle identifiers
if "identifier" in immediate: if "identifier" in immediate:
# actually an identifier, change declaration
return self.process_identifier(immediate["identifier"]) return self.process_identifier(immediate["identifier"])
# Handle numeric values with validation
if "value" in immediate: if "value" in immediate:
# normal integer value value = int(
immediate["type"] = "int" immediate["value"], 0
# convert hex/bin immediates to dec ) # Convert to integer, handling hex/decimal
new_immediate = ImmediateOperand(imd_type=immediate["type"], value=immediate["value"])
new_immediate.value = self.normalize_imd(new_immediate) # Determine immediate type and validate range based on instruction type
return new_immediate if hasattr(self, "current_instruction"):
mnemonic = self.current_instruction.lower()
# I-type instructions (12-bit signed immediate)
if any(
x in mnemonic
for x in [
"addi",
"slti",
"xori",
"ori",
"andi",
"slli",
"srli",
"srai",
]
):
if not -2048 <= value <= 2047:
raise ValueError(
f"Immediate value {value} out of range for I-type "
f"instruction (-2048 to 2047)"
)
return ImmediateOperand(imd_type="I", value=value)
# S-type instructions (12-bit signed immediate for store)
elif any(x in mnemonic for x in ["sb", "sh", "sw", "sd"]):
if not -2048 <= value <= 2047:
raise ValueError(
f"Immediate value {value} out of range for S-type "
f"instruction (-2048 to 2047)"
)
return ImmediateOperand(imd_type="S", value=value)
# B-type instructions (13-bit signed immediate for branches, must be even)
elif any(
x in mnemonic for x in ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
):
if not -4096 <= value <= 4095 or value % 2 != 0:
raise ValueError(
f"Immediate value {value} out of range or not even "
f"for B-type instruction (-4096 to 4095, must be even)"
)
return ImmediateOperand(imd_type="B", value=value)
# U-type instructions (20-bit upper immediate)
elif any(x in mnemonic for x in ["lui", "auipc"]):
if not 0 <= value <= 1048575:
raise ValueError(
f"Immediate value {value} out of range for U-type "
f"instruction (0 to 1048575)"
)
return ImmediateOperand(imd_type="U", value=value)
# J-type instructions (21-bit signed immediate for jumps, must be even)
elif any(x in mnemonic for x in ["jal"]):
if not -1048576 <= value <= 1048575 or value % 2 != 0:
raise ValueError(
f"Immediate value {value} out of range or not even "
f"for J-type instruction (-1048576 to 1048575, must be even)"
)
return ImmediateOperand(imd_type="J", value=value)
# Vector instructions might have specific immediate ranges
elif mnemonic.startswith("v"):
# Handle vector specific immediates (implementation specific)
return ImmediateOperand(imd_type="V", value=value)
# Default case - no specific validation
return ImmediateOperand(imd_type="int", value=value)
return immediate return immediate
def get_full_reg_name(self, register): def get_full_reg_name(self, register):
@@ -566,35 +782,74 @@ class ParserRISCV(BaseParser):
register_string = register_string.strip() register_string = register_string.strip()
# Check for integer registers (x0-x31) # Check for integer registers (x0-x31)
x_match = re.match(r'^x([0-9]|[1-2][0-9]|3[0-1])$', register_string) x_match = re.match(r"^x([0-9]|[1-2][0-9]|3[0-1])$", register_string)
if x_match: if x_match:
reg_num = int(x_match.group(1)) reg_num = int(x_match.group(1))
return {"class": "register", "register": {"prefix": "x", "name": str(reg_num)}} return {
"class": "register",
"register": {"prefix": "x", "name": str(reg_num)},
}
# Check for floating-point registers (f0-f31) # Check for floating-point registers (f0-f31)
f_match = re.match(r'^f([0-9]|[1-2][0-9]|3[0-1])$', register_string) f_match = re.match(r"^f([0-9]|[1-2][0-9]|3[0-1])$", register_string)
if f_match: if f_match:
reg_num = int(f_match.group(1)) reg_num = int(f_match.group(1))
return {"class": "register", "register": {"prefix": "f", "name": str(reg_num)}} return {
"class": "register",
"register": {"prefix": "f", "name": str(reg_num)},
}
# Check for vector registers (v0-v31) # Check for vector registers (v0-v31)
v_match = re.match(r'^v([0-9]|[1-2][0-9]|3[0-1])$', register_string) v_match = re.match(r"^v([0-9]|[1-2][0-9]|3[0-1])$", register_string)
if v_match: if v_match:
reg_num = int(v_match.group(1)) reg_num = int(v_match.group(1))
return {"class": "register", "register": {"prefix": "v", "name": str(reg_num)}} return {
"class": "register",
"register": {"prefix": "v", "name": str(reg_num)},
}
# Check for ABI names # Check for ABI names
abi_names = { abi_names = {
"zero": 0, "ra": 1, "sp": 2, "gp": 3, "tp": 4, "zero": 0,
"t0": 5, "t1": 6, "t2": 7, "ra": 1,
"s0": 8, "fp": 8, "s1": 9, "sp": 2,
"a0": 10, "a1": 11, "a2": 12, "a3": 13, "a4": 14, "a5": 15, "a6": 16, "a7": 17, "gp": 3,
"s2": 18, "s3": 19, "s4": 20, "s5": 21, "s6": 22, "s7": 23, "s8": 24, "s9": 25, "s10": 26, "s11": 27, "tp": 4,
"t3": 28, "t4": 29, "t5": 30, "t6": 31 "t0": 5,
"t1": 6,
"t2": 7,
"s0": 8,
"fp": 8,
"s1": 9,
"a0": 10,
"a1": 11,
"a2": 12,
"a3": 13,
"a4": 14,
"a5": 15,
"a6": 16,
"a7": 17,
"s2": 18,
"s3": 19,
"s4": 20,
"s5": 21,
"s6": 22,
"s7": 23,
"s8": 24,
"s9": 25,
"s10": 26,
"s11": 27,
"t3": 28,
"t4": 29,
"t5": 30,
"t6": 31,
} }
if register_string in abi_names: if register_string in abi_names:
return {"class": "register", "register": {"prefix": "", "name": register_string}} return {
"class": "register",
"register": {"prefix": "", "name": register_string},
}
# If no match is found # If no match is found
return None return None
@@ -642,15 +897,38 @@ class ParserRISCV(BaseParser):
# ABI name mapping for integer registers # ABI name mapping for integer registers
abi_to_x = { abi_to_x = {
"zero": "x0", "ra": "x1", "sp": "x2", "gp": "x3", "tp": "x4", "zero": "x0",
"t0": "x5", "t1": "x6", "t2": "x7", "ra": "x1",
"s0": "x8", "s1": "x9", "sp": "x2",
"a0": "x10", "a1": "x11", "a2": "x12", "a3": "x13", "gp": "x3",
"a4": "x14", "a5": "x15", "a6": "x16", "a7": "x17", "tp": "x4",
"s2": "x18", "s3": "x19", "s4": "x20", "s5": "x21", "t0": "x5",
"s6": "x22", "s7": "x23", "s8": "x24", "s9": "x25", "t1": "x6",
"s10": "x26", "s11": "x27", "t2": "x7",
"t3": "x28", "t4": "x29", "t5": "x30", "t6": "x31" "s0": "x8",
"s1": "x9",
"a0": "x10",
"a1": "x11",
"a2": "x12",
"a3": "x13",
"a4": "x14",
"a5": "x15",
"a6": "x16",
"a7": "x17",
"s2": "x18",
"s3": "x19",
"s4": "x20",
"s5": "x21",
"s6": "x22",
"s7": "x23",
"s8": "x24",
"s9": "x25",
"s10": "x26",
"s11": "x27",
"t3": "x28",
"t4": "x29",
"t5": "x30",
"t6": "x31",
} }
# For integer register ABI names # For integer register ABI names
@@ -751,7 +1029,7 @@ class ParserRISCV(BaseParser):
# For raw integer values or string immediates # For raw integer values or string immediates
return ImmediateOperand( return ImmediateOperand(
imd_type="int", imd_type="int",
value=str(operand) if isinstance(operand, int) else operand value=str(operand) if isinstance(operand, int) else operand,
) )
elif isinstance(operand, dict) and "imd" in operand: elif isinstance(operand, dict) and "imd" in operand:
# For immediate operands from instruction definitions # For immediate operands from instruction definitions
@@ -759,11 +1037,8 @@ class ParserRISCV(BaseParser):
imd_type=operand["imd"], imd_type=operand["imd"],
value=operand.get("value"), value=operand.get("value"),
identifier=operand.get("identifier"), identifier=operand.get("identifier"),
shift=operand.get("shift") shift=operand.get("shift"),
) )
else: else:
# For any other immediate format # For any other immediate format
return ImmediateOperand( return ImmediateOperand(imd_type="int", value=str(operand))
imd_type="int",
value=str(operand)
)

View File

@@ -1025,6 +1025,7 @@ class MachineModel(object):
try: try:
# Need to check if they refer to the same register # Need to check if they refer to the same register
from osaca.parser import ParserRISCV from osaca.parser import ParserRISCV
parser = ParserRISCV() parser = ParserRISCV()
reg_canonical = parser._get_canonical_reg_name(reg) reg_canonical = parser._get_canonical_reg_name(reg)
i_reg_canonical = parser._get_canonical_reg_name(i_reg) i_reg_canonical = parser._get_canonical_reg_name(i_reg)

View File

@@ -307,11 +307,13 @@ class TestCLI(unittest.TestCase):
@staticmethod @staticmethod
def _find_file(kernel, arch, comp): def _find_file(kernel, arch, comp):
testdir = os.path.dirname(__file__) testdir = os.path.dirname(__file__)
# Handle special case for rv64 architecture
arch_prefix = arch.lower() if arch.lower() == "rv64" else arch[:3].lower()
name = os.path.join( name = os.path.join(
testdir, testdir,
"../examples", "../examples",
kernel, kernel,
kernel + ".s." + arch[:3].lower() + "." + comp.lower() + ".s", kernel + ".s." + arch_prefix + "." + comp.lower() + ".s",
) )
if kernel == "j2d" and arch.lower() == "csx": if kernel == "j2d" and arch.lower() == "csx":
name = name[:-1] + "AVX.s" name = name[:-1] + "AVX.s"

View File

@@ -8,9 +8,7 @@ import unittest
from pyparsing import ParseException from pyparsing import ParseException
from osaca.parser import ParserRISCV, InstructionForm from osaca.parser import ParserRISCV
from osaca.parser.directive import DirectiveOperand
from osaca.parser.memory import MemoryOperand
from osaca.parser.register import RegisterOperand from osaca.parser.register import RegisterOperand
from osaca.parser.immediate import ImmediateOperand from osaca.parser.immediate import ImmediateOperand
from osaca.parser.identifier import IdentifierOperand from osaca.parser.identifier import IdentifierOperand
@@ -180,12 +178,10 @@ class TestParserRISCV(unittest.TestCase):
# Test floating-point registers # Test floating-point registers
reg_fa0 = RegisterOperand(prefix="f", name="a0") reg_fa0 = RegisterOperand(prefix="f", name="a0")
reg_fa1 = RegisterOperand(prefix="f", name="a1")
reg_f10 = RegisterOperand(prefix="f", name="10") reg_f10 = RegisterOperand(prefix="f", name="10")
# Test vector registers # Test vector registers
reg_v1 = RegisterOperand(prefix="v", name="1") reg_v1 = RegisterOperand(prefix="v", name="1")
reg_v2 = RegisterOperand(prefix="v", name="2")
# Test register type detection # Test register type detection
self.assertTrue(self.parser.is_gpr(reg_a0)) self.assertTrue(self.parser.is_gpr(reg_a0))