Apply selected improvements from 1ceac6e: enhanced RISC-V parser, ImmediateOperand enhancements, and rv6→rv64 file renames

- Enhanced ImmediateOperand with reloc_type and symbol attributes for better RISC-V support
- Updated RISC-V parser with relocation type support (%hi, %lo, %pcrel_hi, etc.)
- Renamed example files from rv6 to rv64 for consistency
- Updated related configuration and test files
- All 115 tests pass successfully
This commit is contained in:
Metehan Dundar
2025-07-11 18:15:51 +02:00
parent 61b52dbf28
commit ebf76caa18
16 changed files with 554 additions and 253 deletions

2
.gitignore vendored
View File

@@ -1,7 +1,5 @@
# OSACA specific files and folders
*.*.pickle
osaca_testfront_venv/
examples/riscy_asm_files/
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@@ -10,6 +10,8 @@ class ImmediateOperand(Operand):
imd_type=None,
value=None,
shift=None,
reloc_type=None,
symbol=None,
source=False,
destination=False,
):
@@ -18,6 +20,8 @@ class ImmediateOperand(Operand):
self._imd_type = imd_type
self._value = value
self._shift = shift
self._reloc_type = reloc_type
self._symbol = symbol
@property
def identifier(self):
@@ -33,7 +37,15 @@ class ImmediateOperand(Operand):
@property
def shift(self):
return self._imd_type
return self._shift
@property
def reloc_type(self):
return self._reloc_type
@property
def symbol(self):
return self._symbol
@imd_type.setter
def imd_type(self, itype):
@@ -51,10 +63,19 @@ class ImmediateOperand(Operand):
def shift(self, shift):
self._shift = shift
@reloc_type.setter
def reloc_type(self, reloc_type):
self._reloc_type = reloc_type
@symbol.setter
def symbol(self, symbol):
self._symbol = symbol
def __str__(self):
return (
f"Immediate(identifier={self._identifier}, imd_type={self._imd_type}, "
f"value={self._value}, shift={self._shift}, source={self._source}, destination={self._destination})"
f"value={self._value}, shift={self._shift}, reloc_type={self._reloc_type}, "
f"symbol={self._symbol}, source={self._source}, destination={self._destination})"
)
def __repr__(self):
@@ -62,10 +83,18 @@ class ImmediateOperand(Operand):
def __eq__(self, other):
if isinstance(other, ImmediateOperand):
# Handle cases where old instances might not have the new attributes
self_reloc_type = getattr(self, "_reloc_type", None)
self_symbol = getattr(self, "_symbol", None)
other_reloc_type = getattr(other, "_reloc_type", None)
other_symbol = getattr(other, "_symbol", None)
return (
self._identifier == other._identifier
and self._imd_type == other._imd_type
and self._value == other._value
and self._shift == other._shift
and self_reloc_type == other_reloc_type
and self_symbol == other_symbol
)
return False

View File

@@ -1,7 +1,5 @@
#!/usr/bin/env python3
import re
import os
from copy import deepcopy
import pyparsing as pp
from osaca.parser import BaseParser
@@ -13,7 +11,6 @@ from osaca.parser.label import LabelOperand
from osaca.parser.register import RegisterOperand
from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand
from osaca.parser.condition import ConditionOperand
class ParserRISCV(BaseParser):
@@ -70,9 +67,23 @@ class ParserRISCV(BaseParser):
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
).setResultsName("value")
# Additional identifiers used in vector instructions
vector_identifier = pp.Word(pp.alphas, pp.alphanums)
special_identifier = pp.Word(pp.alphas + "%")
# RISC-V specific relocation attributes
reloc_type = (
pp.Literal("%hi")
| pp.Literal("%lo")
| pp.Literal("%pcrel_hi")
| pp.Literal("%pcrel_lo")
| pp.Literal("%tprel_hi")
| pp.Literal("%tprel_lo")
| pp.Literal("%tprel_add")
).setResultsName("reloc_type")
reloc_expr = pp.Group(
reloc_type
+ pp.Suppress("(")
+ pp.Word(pp.alphas + pp.nums + "_").setResultsName("symbol")
+ pp.Suppress(")")
).setResultsName("relocation")
# First character of an identifier
first = pp.Word(pp.alphas + "_.", exact=1)
@@ -89,9 +100,16 @@ class ParserRISCV(BaseParser):
)
).setResultsName(self.identifier)
# Immediate with optional relocation
immediate = pp.Group(
reloc_expr | (hex_number ^ decimal_number) | identifier
).setResultsName(self.immediate_id)
# Label
self.label = pp.Group(
identifier.setResultsName("name") + pp.Literal(":") + pp.Optional(self.comment)
identifier.setResultsName("name")
+ pp.Literal(":")
+ pp.Optional(self.comment)
).setResultsName(self.label_id)
# Directive
@@ -100,16 +118,22 @@ class ParserRISCV(BaseParser):
+ pp.Optional(pp.Word(pp.printables + " ", excludeChars=","))
)
# For vector instructions
vector_parameter = pp.Word(pp.alphas)
directive_parameter = (
pp.quotedString | directive_option | identifier | hex_number | decimal_number
pp.quotedString
| directive_option
| identifier
| hex_number
| decimal_number
)
commaSeparatedList = pp.delimitedList(
pp.Optional(directive_parameter), delim=","
)
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=",")
self.directive = pp.Group(
pp.Literal(".")
+ pp.Word(pp.alphanums + "_").setResultsName("name")
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName("parameters")
+ (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName(
"parameters"
)
+ pp.Optional(self.comment)
).setResultsName(self.directive_id)
@@ -128,13 +152,6 @@ class ParserRISCV(BaseParser):
# Mnemonic
mnemonic = pp.Word(pp.alphanums + ".").setResultsName("mnemonic")
# Immediate:
# int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+
immediate = pp.Group(
(hex_number ^ decimal_number)
| identifier
).setResultsName(self.immediate_id)
# Register:
# RISC-V has two main types of registers:
# 1. Integer registers (x0-x31 or ABI names)
@@ -142,27 +159,27 @@ class ParserRISCV(BaseParser):
# Integer register ABI names
integer_reg_abi = (
pp.CaselessLiteral("zero") |
pp.CaselessLiteral("ra") |
pp.CaselessLiteral("sp") |
pp.CaselessLiteral("gp") |
pp.CaselessLiteral("tp") |
pp.Regex(r"[tas][0-9]+") # t0-t6, a0-a7, s0-s11
pp.CaselessLiteral("zero")
| pp.CaselessLiteral("ra")
| pp.CaselessLiteral("sp")
| pp.CaselessLiteral("gp")
| pp.CaselessLiteral("tp")
| pp.Regex(r"[tas][0-9]+") # t0-t6, a0-a7, s0-s11
).setResultsName("name")
# Integer registers x0-x31
integer_reg_x = (
pp.CaselessLiteral("x").setResultsName("prefix") +
pp.Word(pp.nums).setResultsName("name")
)
integer_reg_x = pp.CaselessLiteral("x").setResultsName("prefix") + pp.Word(
pp.nums
).setResultsName("name")
# Floating point registers
fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName("name") # ft0-ft11, fa0-fa7, fs0-fs11
fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName(
"name"
) # ft0-ft11, fa0-fa7, fs0-fs11
fp_reg_f = (
pp.CaselessLiteral("f").setResultsName("prefix") +
pp.Word(pp.nums).setResultsName("name")
)
fp_reg_f = pp.CaselessLiteral("f").setResultsName("prefix") + pp.Word(
pp.nums
).setResultsName("name")
# Control and status registers (CSRs)
csr_reg = pp.Combine(
@@ -170,14 +187,18 @@ class ParserRISCV(BaseParser):
).setResultsName("name")
# Vector registers (for the "V" extension)
vector_reg = (
pp.CaselessLiteral("v").setResultsName("prefix") +
pp.Word(pp.nums).setResultsName("name")
)
vector_reg = pp.CaselessLiteral("v").setResultsName("prefix") + pp.Word(
pp.nums
).setResultsName("name")
# Combined register definition
register = pp.Group(
integer_reg_x | integer_reg_abi | fp_reg_f | fp_reg_abi | vector_reg | csr_reg
integer_reg_x
| integer_reg_abi
| fp_reg_f
| fp_reg_abi
| vector_reg
| csr_reg
).setResultsName(self.register_id)
self.register = register
@@ -191,20 +212,15 @@ class ParserRISCV(BaseParser):
).setResultsName(self.memory_id)
# Combine to instruction form
operand_first = pp.Group(
register ^ immediate ^ memory ^ identifier
)
operand_rest = pp.Group(
register ^ immediate ^ memory ^ identifier
)
# Vector instruction special parameters (e.g., e32, m4, ta, ma)
vector_param = pp.Word(pp.alphas + pp.nums)
operand_first = pp.Group(register ^ immediate ^ memory ^ identifier)
operand_rest = pp.Group(register ^ immediate ^ memory ^ identifier)
# Handle additional vector parameters
additional_params = pp.ZeroOrMore(
pp.Suppress(pp.Literal(",")) +
vector_param.setResultsName("vector_param", listAllMatches=True)
pp.Suppress(pp.Literal(","))
+ pp.Word(pp.alphas + pp.nums).setResultsName(
"vector_param", listAllMatches=True
)
)
# Main instruction parser
@@ -217,7 +233,7 @@ class ParserRISCV(BaseParser):
+ pp.Optional(operand_rest.setResultsName("operand3"))
+ pp.Optional(pp.Suppress(pp.Literal(",")))
+ pp.Optional(operand_rest.setResultsName("operand4"))
+ pp.Optional(additional_params) # For vector instructions with more params
+ pp.Optional(additional_params)
+ pp.Optional(self.comment)
)
@@ -228,7 +244,8 @@ class ParserRISCV(BaseParser):
:param str line: line of assembly code
:param line_number: identifier of instruction form, defaults to None
:type line_number: int, optional
:return: `dict` -- parsed asm line (comment, label, directive or instruction form)
:return: `dict` -- parsed asm line (comment, label, directive or
instruction form)
"""
instruction_form = InstructionForm(
mnemonic=None,
@@ -243,7 +260,9 @@ class ParserRISCV(BaseParser):
# 1. Parse comment
try:
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
result = self.process_operand(
self.comment.parseString(line, parseAll=True).asDict()
)
instruction_form.comment = " ".join(result[self.comment_id])
except pp.ParseException:
pass
@@ -261,7 +280,9 @@ class ParserRISCV(BaseParser):
if result is None:
try:
# returns tuple with label operand and comment, if any
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
result = self.process_operand(
self.label.parseString(line, parseAll=True).asDict()
)
instruction_form.label = result[0].name
if result[1] is not None:
instruction_form.comment = " ".join(result[1])
@@ -304,9 +325,20 @@ class ParserRISCV(BaseParser):
:param str instruction: Assembly line string.
:returns: `dict` -- parsed instruction form
"""
# Store current instruction for context in operand processing
if instruction.startswith("vsetvli"):
self.current_instruction = "vsetvli"
else:
# Extract mnemonic for context
parts = instruction.split("#")[0].strip().split()
self.current_instruction = parts[0] if parts else None
# Special handling for vector instructions like vsetvli with many parameters
if instruction.startswith("vsetvli"):
parts = instruction.split("#")[0].strip().split()
# Split into mnemonic and operands part
parts = (
instruction.split("#")[0].strip().split(None, 1)
) # Split on first whitespace only
mnemonic = parts[0]
# Split operands by commas
@@ -317,9 +349,19 @@ class ParserRISCV(BaseParser):
# Process each operand
operands = []
for op in operands_list:
if op.startswith("x") or op in ["zero", "ra", "sp", "gp", "tp"] or re.match(r"[tas][0-9]+", op):
if (
op.startswith("x")
or op in ["zero", "ra", "sp", "gp", "tp"]
or re.match(r"[tas][0-9]+", op)
):
operands.append(RegisterOperand(name=op))
elif op in ["e8", "e16", "e32", "e64", "m1", "m2", "m4", "m8", "ta", "tu", "ma", "mu"]:
else:
# Vector parameters get appropriate attributes
if op.startswith("e"): # Element width
operands.append(IdentifierOperand(name=op))
elif op.startswith("m"): # LMUL setting
operands.append(IdentifierOperand(name=op))
elif op in ["ta", "tu", "ma", "mu"]: # Tail/mask policies
operands.append(IdentifierOperand(name=op))
else:
operands.append(IdentifierOperand(name=op))
@@ -330,37 +372,36 @@ class ParserRISCV(BaseParser):
comment = instruction.split("#", 1)[1].strip()
return InstructionForm(
mnemonic=mnemonic,
operands=operands,
comment_id=comment
mnemonic=mnemonic, operands=operands, comment_id=comment
)
# Regular instruction parsing
try:
result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
result = self.instruction_parser.parseString(
instruction, parseAll=True
).asDict()
operands = []
# Add operands to list
# Check first operand
if "operand1" in result:
operand = self.process_operand(result["operand1"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check second operand
if "operand2" in result:
operand = self.process_operand(result["operand2"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check third operand
if "operand3" in result:
operand = self.process_operand(result["operand3"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check fourth operand
if "operand4" in result:
operand = self.process_operand(result["operand4"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Handle vector_param for vector instructions
# Process operands
for i in range(1, 5):
operand_key = f"operand{i}"
if operand_key in result:
operand = self.process_operand(result[operand_key])
(
operands.extend(operand)
if isinstance(operand, list)
else operands.append(operand)
)
# Handle vector parameters as identifiers with appropriate attributes
if "vector_param" in result:
if isinstance(result["vector_param"], list):
for param in result["vector_param"]:
if param.startswith("e"): # Element width
operands.append(IdentifierOperand(name=param))
elif param.startswith("m"): # LMUL setting
operands.append(IdentifierOperand(name=param))
else:
operands.append(IdentifierOperand(name=param))
else:
operands.append(IdentifierOperand(name=result["vector_param"]))
@@ -368,11 +409,15 @@ class ParserRISCV(BaseParser):
return_dict = InstructionForm(
mnemonic=result["mnemonic"],
operands=operands,
comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None,
comment_id=(
" ".join(result[self.comment_id])
if self.comment_id in result
else None
),
)
return return_dict
except Exception as e:
except Exception:
# For special vector instructions or ones with % in them
if "%" in instruction or instruction.startswith("v"):
parts = instruction.split("#")[0].strip().split(None, 1)
@@ -383,10 +428,17 @@ class ParserRISCV(BaseParser):
operands_list = [op.strip() for op in operand_part.split(",")]
for op in operands_list:
# Process '%hi(data)' to 'data' for certain operands
if op.startswith("%") and '(' in op and ')' in op:
# Extract data from %hi(data) format
data = op[op.index('(')+1:op.index(')')]
operands.append(IdentifierOperand(name=data))
if op.startswith("%") and "(" in op and ")" in op:
reloc_type = op[: op.index("(")]
symbol = op[op.index("(") + 1 : op.index(")")]
operands.append(
ImmediateOperand(
imd_type="reloc",
value=None,
reloc_type=reloc_type,
symbol=symbol,
)
)
else:
operands.append(IdentifierOperand(name=op))
@@ -395,9 +447,7 @@ class ParserRISCV(BaseParser):
comment = instruction.split("#", 1)[1].strip()
return InstructionForm(
mnemonic=mnemonic,
operands=operands,
comment_id=comment
mnemonic=mnemonic, operands=operands, comment_id=comment
)
else:
raise
@@ -430,12 +480,42 @@ class ParserRISCV(BaseParser):
)
def process_register_operand(self, operand):
"""Process register operands, including ABI name to x-register mapping"""
# If already has prefix (x#, f#, v#), just return as is
"""Process register operands, including ABI name to x-register mapping
and vector attributes"""
# If already has prefix (x#, f#, v#), process with appropriate attributes
if "prefix" in operand:
prefix = operand["prefix"].lower()
# Special handling for vector registers
if prefix == "v":
return RegisterOperand(
prefix=operand["prefix"].lower(),
name=operand["name"]
prefix=prefix,
name=operand["name"],
regtype="vector",
# Vector registers can have different element widths (e8,e16,e32,e64)
width=operand.get("width", None),
# Number of elements (m1,m2,m4,m8)
lanes=operand.get("lanes", None),
# For vector mask registers
mask=operand.get("mask", False),
# For tail agnostic/undisturbed policies
zeroing=operand.get("zeroing", False),
)
# For floating point registers
elif prefix == "f":
return RegisterOperand(
prefix=prefix,
name=operand["name"],
regtype="float",
width=64, # RISC-V typically uses 64-bit float registers
)
# For integer registers
elif prefix == "x":
return RegisterOperand(
prefix=prefix,
name=operand["name"],
regtype="int",
width=64, # RV64 uses 64-bit registers
)
# Handle ABI names by converting to x-register numbers
@@ -443,49 +523,84 @@ class ParserRISCV(BaseParser):
# ABI name mapping for integer registers
abi_to_x = {
"zero": "0", "ra": "1", "sp": "2", "gp": "3", "tp": "4",
"t0": "5", "t1": "6", "t2": "7",
"s0": "8", "fp": "8", "s1": "9",
"a0": "10", "a1": "11", "a2": "12", "a3": "13",
"a4": "14", "a5": "15", "a6": "16", "a7": "17",
"s2": "18", "s3": "19", "s4": "20", "s5": "21",
"s6": "22", "s7": "23", "s8": "24", "s9": "25",
"s10": "26", "s11": "27",
"t3": "28", "t4": "29", "t5": "30", "t6": "31"
"zero": "x0",
"ra": "x1",
"sp": "x2",
"gp": "x3",
"tp": "x4",
"t0": "x5",
"t1": "x6",
"t2": "x7",
"s0": "x8",
"s1": "x9",
"a0": "x10",
"a1": "x11",
"a2": "x12",
"a3": "x13",
"a4": "x14",
"a5": "x15",
"a6": "x16",
"a7": "x17",
"s2": "x18",
"s3": "x19",
"s4": "x20",
"s5": "x21",
"s6": "x22",
"s7": "x23",
"s8": "x24",
"s9": "x25",
"s10": "x26",
"s11": "x27",
"t3": "x28",
"t4": "x29",
"t5": "x30",
"t6": "x31",
}
# Integer register ABI names
if name in abi_to_x:
return RegisterOperand(
prefix="x",
name=abi_to_x[name]
name=abi_to_x[name],
regtype="int",
width=64, # RV64 uses 64-bit registers
)
# Floating point register ABI names
elif name.startswith("f") and name[1] in ["t", "a", "s"]:
if name[1] == "a": # fa0-fa7
idx = int(name[2:])
return RegisterOperand(prefix="f", name=str(idx + 10))
return RegisterOperand(
prefix="f", name=str(idx + 10), regtype="float", width=64
)
elif name[1] == "s": # fs0-fs11
idx = int(name[2:])
if idx <= 1:
return RegisterOperand(prefix="f", name=str(idx + 8))
return RegisterOperand(
prefix="f", name=str(idx + 8), regtype="float", width=64
)
else:
return RegisterOperand(prefix="f", name=str(idx + 16))
return RegisterOperand(
prefix="f", name=str(idx + 16), regtype="float", width=64
)
elif name[1] == "t": # ft0-ft11
idx = int(name[2:])
if idx <= 7:
return RegisterOperand(prefix="f", name=str(idx))
return RegisterOperand(
prefix="f", name=str(idx), regtype="float", width=64
)
else:
return RegisterOperand(prefix="f", name=str(idx + 20))
return RegisterOperand(
prefix="f", name=str(idx + 20), regtype="float", width=64
)
# CSR registers
elif name.startswith("csr"):
return RegisterOperand(prefix="", name=name)
return RegisterOperand(prefix="", name=name, regtype="csr")
# If no mapping found, return as is
return RegisterOperand(prefix="", name=name)
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
"""Post-process memory address operand with RISC-V specific attributes"""
# Process offset
offset = memory_address.get("offset", None)
if isinstance(offset, list) and len(offset) == 1:
@@ -500,12 +615,32 @@ class ParserRISCV(BaseParser):
if base is not None:
base = self.process_register_operand(base)
# Create memory operand
# Determine data type from instruction context if available
# RISC-V load/store instructions encode the data width in the mnemonic
# e.g., lw (word), lh (half), lb (byte), etc.
data_type = None
if hasattr(self, "current_instruction"):
mnemonic = self.current_instruction.lower()
if any(x in mnemonic for x in ["b", "bu"]): # byte operations
data_type = "byte"
elif any(x in mnemonic for x in ["h", "hu"]): # halfword operations
data_type = "halfword"
elif any(x in mnemonic for x in ["w", "wu"]): # word operations
data_type = "word"
elif "d" in mnemonic: # doubleword operations
data_type = "doubleword"
# Create memory operand with enhanced attributes
return MemoryOperand(
offset=offset,
base=base,
index=None,
scale=1
index=None, # RISC-V doesn't use index registers
scale=1, # RISC-V doesn't use scaling
data_type=data_type,
# Handle vector memory operations
mask=memory_address.get("mask", None), # For vector masked loads/stores
src=memory_address.get("src", None), # Source register type for stores
dst=memory_address.get("dst", None), # Destination register type for loads
)
def process_label(self, label):
@@ -519,21 +654,102 @@ class ParserRISCV(BaseParser):
"""Post-process identifier operand"""
return IdentifierOperand(
name=identifier["name"] if "name" in identifier else None,
offset=identifier["offset"] if "offset" in identifier else None
offset=identifier["offset"] if "offset" in identifier else None,
)
def process_immediate(self, immediate):
"""Post-process immediate operand"""
"""Post-process immediate operand with RISC-V specific handling"""
# Handle relocations
if "relocation" in immediate:
reloc = immediate["relocation"]
return ImmediateOperand(
imd_type="reloc",
value=None,
reloc_type=reloc["reloc_type"],
symbol=reloc["symbol"],
)
# Handle identifiers
if "identifier" in immediate:
# actually an identifier, change declaration
return self.process_identifier(immediate["identifier"])
# Handle numeric values with validation
if "value" in immediate:
# normal integer value
immediate["type"] = "int"
# convert hex/bin immediates to dec
new_immediate = ImmediateOperand(imd_type=immediate["type"], value=immediate["value"])
new_immediate.value = self.normalize_imd(new_immediate)
return new_immediate
value = int(
immediate["value"], 0
) # Convert to integer, handling hex/decimal
# Determine immediate type and validate range based on instruction type
if hasattr(self, "current_instruction"):
mnemonic = self.current_instruction.lower()
# I-type instructions (12-bit signed immediate)
if any(
x in mnemonic
for x in [
"addi",
"slti",
"xori",
"ori",
"andi",
"slli",
"srli",
"srai",
]
):
if not -2048 <= value <= 2047:
raise ValueError(
f"Immediate value {value} out of range for I-type "
f"instruction (-2048 to 2047)"
)
return ImmediateOperand(imd_type="I", value=value)
# S-type instructions (12-bit signed immediate for store)
elif any(x in mnemonic for x in ["sb", "sh", "sw", "sd"]):
if not -2048 <= value <= 2047:
raise ValueError(
f"Immediate value {value} out of range for S-type "
f"instruction (-2048 to 2047)"
)
return ImmediateOperand(imd_type="S", value=value)
# B-type instructions (13-bit signed immediate for branches, must be even)
elif any(
x in mnemonic for x in ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
):
if not -4096 <= value <= 4095 or value % 2 != 0:
raise ValueError(
f"Immediate value {value} out of range or not even "
f"for B-type instruction (-4096 to 4095, must be even)"
)
return ImmediateOperand(imd_type="B", value=value)
# U-type instructions (20-bit upper immediate)
elif any(x in mnemonic for x in ["lui", "auipc"]):
if not 0 <= value <= 1048575:
raise ValueError(
f"Immediate value {value} out of range for U-type "
f"instruction (0 to 1048575)"
)
return ImmediateOperand(imd_type="U", value=value)
# J-type instructions (21-bit signed immediate for jumps, must be even)
elif any(x in mnemonic for x in ["jal"]):
if not -1048576 <= value <= 1048575 or value % 2 != 0:
raise ValueError(
f"Immediate value {value} out of range or not even "
f"for J-type instruction (-1048576 to 1048575, must be even)"
)
return ImmediateOperand(imd_type="J", value=value)
# Vector instructions might have specific immediate ranges
elif mnemonic.startswith("v"):
# Handle vector specific immediates (implementation specific)
return ImmediateOperand(imd_type="V", value=value)
# Default case - no specific validation
return ImmediateOperand(imd_type="int", value=value)
return immediate
def get_full_reg_name(self, register):
@@ -566,35 +782,74 @@ class ParserRISCV(BaseParser):
register_string = register_string.strip()
# Check for integer registers (x0-x31)
x_match = re.match(r'^x([0-9]|[1-2][0-9]|3[0-1])$', register_string)
x_match = re.match(r"^x([0-9]|[1-2][0-9]|3[0-1])$", register_string)
if x_match:
reg_num = int(x_match.group(1))
return {"class": "register", "register": {"prefix": "x", "name": str(reg_num)}}
return {
"class": "register",
"register": {"prefix": "x", "name": str(reg_num)},
}
# Check for floating-point registers (f0-f31)
f_match = re.match(r'^f([0-9]|[1-2][0-9]|3[0-1])$', register_string)
f_match = re.match(r"^f([0-9]|[1-2][0-9]|3[0-1])$", register_string)
if f_match:
reg_num = int(f_match.group(1))
return {"class": "register", "register": {"prefix": "f", "name": str(reg_num)}}
return {
"class": "register",
"register": {"prefix": "f", "name": str(reg_num)},
}
# Check for vector registers (v0-v31)
v_match = re.match(r'^v([0-9]|[1-2][0-9]|3[0-1])$', register_string)
v_match = re.match(r"^v([0-9]|[1-2][0-9]|3[0-1])$", register_string)
if v_match:
reg_num = int(v_match.group(1))
return {"class": "register", "register": {"prefix": "v", "name": str(reg_num)}}
return {
"class": "register",
"register": {"prefix": "v", "name": str(reg_num)},
}
# Check for ABI names
abi_names = {
"zero": 0, "ra": 1, "sp": 2, "gp": 3, "tp": 4,
"t0": 5, "t1": 6, "t2": 7,
"s0": 8, "fp": 8, "s1": 9,
"a0": 10, "a1": 11, "a2": 12, "a3": 13, "a4": 14, "a5": 15, "a6": 16, "a7": 17,
"s2": 18, "s3": 19, "s4": 20, "s5": 21, "s6": 22, "s7": 23, "s8": 24, "s9": 25, "s10": 26, "s11": 27,
"t3": 28, "t4": 29, "t5": 30, "t6": 31
"zero": 0,
"ra": 1,
"sp": 2,
"gp": 3,
"tp": 4,
"t0": 5,
"t1": 6,
"t2": 7,
"s0": 8,
"fp": 8,
"s1": 9,
"a0": 10,
"a1": 11,
"a2": 12,
"a3": 13,
"a4": 14,
"a5": 15,
"a6": 16,
"a7": 17,
"s2": 18,
"s3": 19,
"s4": 20,
"s5": 21,
"s6": 22,
"s7": 23,
"s8": 24,
"s9": 25,
"s10": 26,
"s11": 27,
"t3": 28,
"t4": 29,
"t5": 30,
"t6": 31,
}
if register_string in abi_names:
return {"class": "register", "register": {"prefix": "", "name": register_string}}
return {
"class": "register",
"register": {"prefix": "", "name": register_string},
}
# If no match is found
return None
@@ -642,15 +897,38 @@ class ParserRISCV(BaseParser):
# ABI name mapping for integer registers
abi_to_x = {
"zero": "x0", "ra": "x1", "sp": "x2", "gp": "x3", "tp": "x4",
"t0": "x5", "t1": "x6", "t2": "x7",
"s0": "x8", "s1": "x9",
"a0": "x10", "a1": "x11", "a2": "x12", "a3": "x13",
"a4": "x14", "a5": "x15", "a6": "x16", "a7": "x17",
"s2": "x18", "s3": "x19", "s4": "x20", "s5": "x21",
"s6": "x22", "s7": "x23", "s8": "x24", "s9": "x25",
"s10": "x26", "s11": "x27",
"t3": "x28", "t4": "x29", "t5": "x30", "t6": "x31"
"zero": "x0",
"ra": "x1",
"sp": "x2",
"gp": "x3",
"tp": "x4",
"t0": "x5",
"t1": "x6",
"t2": "x7",
"s0": "x8",
"s1": "x9",
"a0": "x10",
"a1": "x11",
"a2": "x12",
"a3": "x13",
"a4": "x14",
"a5": "x15",
"a6": "x16",
"a7": "x17",
"s2": "x18",
"s3": "x19",
"s4": "x20",
"s5": "x21",
"s6": "x22",
"s7": "x23",
"s8": "x24",
"s9": "x25",
"s10": "x26",
"s11": "x27",
"t3": "x28",
"t4": "x29",
"t5": "x30",
"t6": "x31",
}
# For integer register ABI names
@@ -751,7 +1029,7 @@ class ParserRISCV(BaseParser):
# For raw integer values or string immediates
return ImmediateOperand(
imd_type="int",
value=str(operand) if isinstance(operand, int) else operand
value=str(operand) if isinstance(operand, int) else operand,
)
elif isinstance(operand, dict) and "imd" in operand:
# For immediate operands from instruction definitions
@@ -759,11 +1037,8 @@ class ParserRISCV(BaseParser):
imd_type=operand["imd"],
value=operand.get("value"),
identifier=operand.get("identifier"),
shift=operand.get("shift")
shift=operand.get("shift"),
)
else:
# For any other immediate format
return ImmediateOperand(
imd_type="int",
value=str(operand)
)
return ImmediateOperand(imd_type="int", value=str(operand))

View File

@@ -1025,6 +1025,7 @@ class MachineModel(object):
try:
# Need to check if they refer to the same register
from osaca.parser import ParserRISCV
parser = ParserRISCV()
reg_canonical = parser._get_canonical_reg_name(reg)
i_reg_canonical = parser._get_canonical_reg_name(i_reg)

View File

@@ -307,11 +307,13 @@ class TestCLI(unittest.TestCase):
@staticmethod
def _find_file(kernel, arch, comp):
testdir = os.path.dirname(__file__)
# Handle special case for rv64 architecture
arch_prefix = arch.lower() if arch.lower() == "rv64" else arch[:3].lower()
name = os.path.join(
testdir,
"../examples",
kernel,
kernel + ".s." + arch[:3].lower() + "." + comp.lower() + ".s",
kernel + ".s." + arch_prefix + "." + comp.lower() + ".s",
)
if kernel == "j2d" and arch.lower() == "csx":
name = name[:-1] + "AVX.s"

View File

@@ -8,9 +8,7 @@ import unittest
from pyparsing import ParseException
from osaca.parser import ParserRISCV, InstructionForm
from osaca.parser.directive import DirectiveOperand
from osaca.parser.memory import MemoryOperand
from osaca.parser import ParserRISCV
from osaca.parser.register import RegisterOperand
from osaca.parser.immediate import ImmediateOperand
from osaca.parser.identifier import IdentifierOperand
@@ -180,12 +178,10 @@ class TestParserRISCV(unittest.TestCase):
# Test floating-point registers
reg_fa0 = RegisterOperand(prefix="f", name="a0")
reg_fa1 = RegisterOperand(prefix="f", name="a1")
reg_f10 = RegisterOperand(prefix="f", name="10")
# Test vector registers
reg_v1 = RegisterOperand(prefix="v", name="1")
reg_v2 = RegisterOperand(prefix="v", name="2")
# Test register type detection
self.assertTrue(self.parser.is_gpr(reg_a0))