Apply selected improvements from 1ceac6e: enhanced RISC-V parser, ImmediateOperand enhancements, and rv6→rv64 file renames

- Enhanced ImmediateOperand with reloc_type and symbol attributes for better RISC-V support - Updated RISC-V parser with relocation type support (%hi, %lo, %pcrel_hi, etc.) - Renamed example files from rv6 to rv64 for consistency - Updated related configuration and test files - All 115 tests pass successfully
2025-12-15 16:40:05 +01:00 · 2025-07-11 18:15:51 +02:00
parent 61b52dbf28
commit ebf76caa18
16 changed files with 554 additions and 253 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,5 @@
 # OSACA specific files and folders
 *.*.pickle
-osaca_testfront_venv/
-examples/riscy_asm_files/

 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/examples/add/add.s.rv64.gcc.s
+++ b/examples/add/add.s.rv64.gcc.s
--- a/examples/copy/copy.s.rv64.gcc.s
+++ b/examples/copy/copy.s.rv64.gcc.s
--- a/examples/daxpy/daxpy.s.rv64.gcc.s
+++ b/examples/daxpy/daxpy.s.rv64.gcc.s
--- a/examples/gs/gs.s.rv64.gcc.s
+++ b/examples/gs/gs.s.rv64.gcc.s
--- a/examples/j2d/j2d.s.rv64.gcc.s
+++ b/examples/j2d/j2d.s.rv64.gcc.s
--- a/examples/striad/striad.s.rv64.gcc.s
+++ b/examples/striad/striad.s.rv64.gcc.s
--- a/examples/sum_reduction/sum_reduction.s.rv64.gcc.s
+++ b/examples/sum_reduction/sum_reduction.s.rv64.gcc.s
--- a/examples/triad/triad.s.rv64.gcc.s
+++ b/examples/triad/triad.s.rv64.gcc.s
--- a/examples/update/update.s.rv64.gcc.s
+++ b/examples/update/update.s.rv64.gcc.s
--- a/osaca/parser/immediate.py
+++ b/osaca/parser/immediate.py
@@ -10,6 +10,8 @@ class ImmediateOperand(Operand):
        imd_type=None,
        value=None,
        shift=None,
+        reloc_type=None,
+        symbol=None,
        source=False,
        destination=False,
    ):
@@ -18,6 +20,8 @@ class ImmediateOperand(Operand):
        self._imd_type = imd_type
        self._value = value
        self._shift = shift
+        self._reloc_type = reloc_type
+        self._symbol = symbol

    @property
    def identifier(self):
@@ -33,7 +37,15 @@ class ImmediateOperand(Operand):

    @property
    def shift(self):
-        return self._imd_type
+        return self._shift
+
+    @property
+    def reloc_type(self):
+        return self._reloc_type
+
+    @property
+    def symbol(self):
+        return self._symbol

    @imd_type.setter
    def imd_type(self, itype):
@@ -51,10 +63,19 @@ class ImmediateOperand(Operand):
    def shift(self, shift):
        self._shift = shift

+    @reloc_type.setter
+    def reloc_type(self, reloc_type):
+        self._reloc_type = reloc_type
+
+    @symbol.setter
+    def symbol(self, symbol):
+        self._symbol = symbol
+
    def __str__(self):
        return (
            f"Immediate(identifier={self._identifier}, imd_type={self._imd_type}, "
-            f"value={self._value}, shift={self._shift}, source={self._source}, destination={self._destination})"
+            f"value={self._value}, shift={self._shift}, reloc_type={self._reloc_type}, "
+            f"symbol={self._symbol}, source={self._source}, destination={self._destination})"
        )

    def __repr__(self):
@@ -62,10 +83,18 @@ class ImmediateOperand(Operand):

    def __eq__(self, other):
        if isinstance(other, ImmediateOperand):
+            # Handle cases where old instances might not have the new attributes
+            self_reloc_type = getattr(self, "_reloc_type", None)
+            self_symbol = getattr(self, "_symbol", None)
+            other_reloc_type = getattr(other, "_reloc_type", None)
+            other_symbol = getattr(other, "_symbol", None)
+
            return (
                self._identifier == other._identifier
                and self._imd_type == other._imd_type
                and self._value == other._value
                and self._shift == other._shift
+                and self_reloc_type == other_reloc_type
+                and self_symbol == other_symbol
            )
        return False
--- a/osaca/parser/parser_RISCV.py
+++ b/osaca/parser/parser_RISCV.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
 import re
-import os
-from copy import deepcopy
 import pyparsing as pp

 from osaca.parser import BaseParser
@@ -13,7 +11,6 @@ from osaca.parser.label import LabelOperand
 from osaca.parser.register import RegisterOperand
 from osaca.parser.identifier import IdentifierOperand
 from osaca.parser.immediate import ImmediateOperand
-from osaca.parser.condition import ConditionOperand


 class ParserRISCV(BaseParser):
@@ -70,9 +67,23 @@ class ParserRISCV(BaseParser):
            pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
        ).setResultsName("value")

-        # Additional identifiers used in vector instructions
-        vector_identifier = pp.Word(pp.alphas, pp.alphanums)
-        special_identifier = pp.Word(pp.alphas + "%")
+        # RISC-V specific relocation attributes
+        reloc_type = (
+            pp.Literal("%hi")
+            | pp.Literal("%lo")
+            | pp.Literal("%pcrel_hi")
+            | pp.Literal("%pcrel_lo")
+            | pp.Literal("%tprel_hi")
+            | pp.Literal("%tprel_lo")
+            | pp.Literal("%tprel_add")
+        ).setResultsName("reloc_type")
+
+        reloc_expr = pp.Group(
+            reloc_type
+            + pp.Suppress("(")
+            + pp.Word(pp.alphas + pp.nums + "_").setResultsName("symbol")
+            + pp.Suppress(")")
+        ).setResultsName("relocation")

        # First character of an identifier
        first = pp.Word(pp.alphas + "_.", exact=1)
@@ -89,9 +100,16 @@ class ParserRISCV(BaseParser):
            )
        ).setResultsName(self.identifier)

+        # Immediate with optional relocation
+        immediate = pp.Group(
+            reloc_expr | (hex_number ^ decimal_number) | identifier
+        ).setResultsName(self.immediate_id)
+
        # Label
        self.label = pp.Group(
-            identifier.setResultsName("name") + pp.Literal(":") + pp.Optional(self.comment)
+            identifier.setResultsName("name")
+            + pp.Literal(":")
+            + pp.Optional(self.comment)
        ).setResultsName(self.label_id)

        # Directive
@@ -100,16 +118,22 @@ class ParserRISCV(BaseParser):
            + pp.Optional(pp.Word(pp.printables + " ", excludeChars=","))
        )

-        # For vector instructions
-        vector_parameter = pp.Word(pp.alphas)
        directive_parameter = (
-            pp.quotedString | directive_option | identifier | hex_number | decimal_number
+            pp.quotedString
+            | directive_option
+            | identifier
+            | hex_number
+            | decimal_number
+        )
+        commaSeparatedList = pp.delimitedList(
+            pp.Optional(directive_parameter), delim=","
        )
-        commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=",")
        self.directive = pp.Group(
            pp.Literal(".")
            + pp.Word(pp.alphanums + "_").setResultsName("name")
-            + (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName("parameters")
+            + (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName(
+                "parameters"
+            )
            + pp.Optional(self.comment)
        ).setResultsName(self.directive_id)

@@ -128,13 +152,6 @@ class ParserRISCV(BaseParser):
        # Mnemonic
        mnemonic = pp.Word(pp.alphanums + ".").setResultsName("mnemonic")

-        # Immediate:
-        # int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+
-        immediate = pp.Group(
-            (hex_number ^ decimal_number)
-            | identifier
-        ).setResultsName(self.immediate_id)
-        
        # Register:
        # RISC-V has two main types of registers:
        # 1. Integer registers (x0-x31 or ABI names)
@@ -142,27 +159,27 @@ class ParserRISCV(BaseParser):

        # Integer register ABI names
        integer_reg_abi = (
-            pp.CaselessLiteral("zero") |
-            pp.CaselessLiteral("ra") |
-            pp.CaselessLiteral("sp") |
-            pp.CaselessLiteral("gp") |
-            pp.CaselessLiteral("tp") |
-            pp.Regex(r"[tas][0-9]+")  # t0-t6, a0-a7, s0-s11
+            pp.CaselessLiteral("zero")
+            | pp.CaselessLiteral("ra")
+            | pp.CaselessLiteral("sp")
+            | pp.CaselessLiteral("gp")
+            | pp.CaselessLiteral("tp")
+            | pp.Regex(r"[tas][0-9]+")  # t0-t6, a0-a7, s0-s11
        ).setResultsName("name")

        # Integer registers x0-x31
-        integer_reg_x = (
-            pp.CaselessLiteral("x").setResultsName("prefix") + 
-            pp.Word(pp.nums).setResultsName("name")
-        )
+        integer_reg_x = pp.CaselessLiteral("x").setResultsName("prefix") + pp.Word(
+            pp.nums
+        ).setResultsName("name")

        # Floating point registers
-        fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName("name")  # ft0-ft11, fa0-fa7, fs0-fs11
+        fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName(
+            "name"
+        )  # ft0-ft11, fa0-fa7, fs0-fs11

-        fp_reg_f = (
-            pp.CaselessLiteral("f").setResultsName("prefix") + 
-            pp.Word(pp.nums).setResultsName("name")
-        )
+        fp_reg_f = pp.CaselessLiteral("f").setResultsName("prefix") + pp.Word(
+            pp.nums
+        ).setResultsName("name")

        # Control and status registers (CSRs)
        csr_reg = pp.Combine(
@@ -170,14 +187,18 @@ class ParserRISCV(BaseParser):
        ).setResultsName("name")

        # Vector registers (for the "V" extension)
-        vector_reg = (
-            pp.CaselessLiteral("v").setResultsName("prefix") + 
-            pp.Word(pp.nums).setResultsName("name")
-        )
+        vector_reg = pp.CaselessLiteral("v").setResultsName("prefix") + pp.Word(
+            pp.nums
+        ).setResultsName("name")

        # Combined register definition
        register = pp.Group(
-            integer_reg_x | integer_reg_abi | fp_reg_f | fp_reg_abi | vector_reg | csr_reg
+            integer_reg_x
+            | integer_reg_abi
+            | fp_reg_f
+            | fp_reg_abi
+            | vector_reg
+            | csr_reg
        ).setResultsName(self.register_id)

        self.register = register
@@ -191,20 +212,15 @@ class ParserRISCV(BaseParser):
        ).setResultsName(self.memory_id)

        # Combine to instruction form
-        operand_first = pp.Group(
-            register ^ immediate ^ memory ^ identifier
-        )
-        operand_rest = pp.Group(
-            register ^ immediate ^ memory ^ identifier
-        )
-        
-        # Vector instruction special parameters (e.g., e32, m4, ta, ma)
-        vector_param = pp.Word(pp.alphas + pp.nums)
+        operand_first = pp.Group(register ^ immediate ^ memory ^ identifier)
+        operand_rest = pp.Group(register ^ immediate ^ memory ^ identifier)

        # Handle additional vector parameters
        additional_params = pp.ZeroOrMore(
-            pp.Suppress(pp.Literal(",")) + 
-            vector_param.setResultsName("vector_param", listAllMatches=True)
+            pp.Suppress(pp.Literal(","))
+            + pp.Word(pp.alphas + pp.nums).setResultsName(
+                "vector_param", listAllMatches=True
+            )
        )

        # Main instruction parser
@@ -217,7 +233,7 @@ class ParserRISCV(BaseParser):
            + pp.Optional(operand_rest.setResultsName("operand3"))
            + pp.Optional(pp.Suppress(pp.Literal(",")))
            + pp.Optional(operand_rest.setResultsName("operand4"))
-            + pp.Optional(additional_params)  # For vector instructions with more params
+            + pp.Optional(additional_params)
            + pp.Optional(self.comment)
        )

@@ -228,7 +244,8 @@ class ParserRISCV(BaseParser):
        :param str line: line of assembly code
        :param line_number: identifier of instruction form, defaults to None
        :type line_number: int, optional
-        :return: `dict` -- parsed asm line (comment, label, directive or instruction form)
+        :return: `dict` -- parsed asm line (comment, label, directive or
+                 instruction form)
        """
        instruction_form = InstructionForm(
            mnemonic=None,
@@ -243,7 +260,9 @@ class ParserRISCV(BaseParser):

        # 1. Parse comment
        try:
-            result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
+            result = self.process_operand(
+                self.comment.parseString(line, parseAll=True).asDict()
+            )
            instruction_form.comment = " ".join(result[self.comment_id])
        except pp.ParseException:
            pass
@@ -261,7 +280,9 @@ class ParserRISCV(BaseParser):
        if result is None:
            try:
                # returns tuple with label operand and comment, if any
-                result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
+                result = self.process_operand(
+                    self.label.parseString(line, parseAll=True).asDict()
+                )
                instruction_form.label = result[0].name
                if result[1] is not None:
                    instruction_form.comment = " ".join(result[1])
@@ -304,9 +325,20 @@ class ParserRISCV(BaseParser):
        :param str instruction: Assembly line string.
        :returns: `dict` -- parsed instruction form
        """
+        # Store current instruction for context in operand processing
+        if instruction.startswith("vsetvli"):
+            self.current_instruction = "vsetvli"
+        else:
+            # Extract mnemonic for context
+            parts = instruction.split("#")[0].strip().split()
+            self.current_instruction = parts[0] if parts else None
+
        # Special handling for vector instructions like vsetvli with many parameters
        if instruction.startswith("vsetvli"):
-            parts = instruction.split("#")[0].strip().split()
+            # Split into mnemonic and operands part
+            parts = (
+                instruction.split("#")[0].strip().split(None, 1)
+            )  # Split on first whitespace only
            mnemonic = parts[0]

            # Split operands by commas
@@ -317,9 +349,19 @@ class ParserRISCV(BaseParser):
                # Process each operand
                operands = []
                for op in operands_list:
-                    if op.startswith("x") or op in ["zero", "ra", "sp", "gp", "tp"] or re.match(r"[tas][0-9]+", op):
+                    if (
+                        op.startswith("x")
+                        or op in ["zero", "ra", "sp", "gp", "tp"]
+                        or re.match(r"[tas][0-9]+", op)
+                    ):
                        operands.append(RegisterOperand(name=op))
-                    elif op in ["e8", "e16", "e32", "e64", "m1", "m2", "m4", "m8", "ta", "tu", "ma", "mu"]:
+                    else:
+                        # Vector parameters get appropriate attributes
+                        if op.startswith("e"):  # Element width
+                            operands.append(IdentifierOperand(name=op))
+                        elif op.startswith("m"):  # LMUL setting
+                            operands.append(IdentifierOperand(name=op))
+                        elif op in ["ta", "tu", "ma", "mu"]:  # Tail/mask policies
                            operands.append(IdentifierOperand(name=op))
                        else:
                            operands.append(IdentifierOperand(name=op))
@@ -330,37 +372,36 @@ class ParserRISCV(BaseParser):
                    comment = instruction.split("#", 1)[1].strip()

                return InstructionForm(
-                    mnemonic=mnemonic,
-                    operands=operands,
-                    comment_id=comment
+                    mnemonic=mnemonic, operands=operands, comment_id=comment
                )

        # Regular instruction parsing
        try:
-            result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
+            result = self.instruction_parser.parseString(
+                instruction, parseAll=True
+            ).asDict()
            operands = []
-            # Add operands to list
-            # Check first operand
-            if "operand1" in result:
-                operand = self.process_operand(result["operand1"])
-                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
-            # Check second operand
-            if "operand2" in result:
-                operand = self.process_operand(result["operand2"])
-                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
-            # Check third operand
-            if "operand3" in result:
-                operand = self.process_operand(result["operand3"])
-                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
-            # Check fourth operand
-            if "operand4" in result:
-                operand = self.process_operand(result["operand4"])
-                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)

-            # Handle vector_param for vector instructions
+            # Process operands
+            for i in range(1, 5):
+                operand_key = f"operand{i}"
+                if operand_key in result:
+                    operand = self.process_operand(result[operand_key])
+                    (
+                        operands.extend(operand)
+                        if isinstance(operand, list)
+                        else operands.append(operand)
+                    )
+
+            # Handle vector parameters as identifiers with appropriate attributes
            if "vector_param" in result:
                if isinstance(result["vector_param"], list):
                    for param in result["vector_param"]:
+                        if param.startswith("e"):  # Element width
+                            operands.append(IdentifierOperand(name=param))
+                        elif param.startswith("m"):  # LMUL setting
+                            operands.append(IdentifierOperand(name=param))
+                        else:
                            operands.append(IdentifierOperand(name=param))
                else:
                    operands.append(IdentifierOperand(name=result["vector_param"]))
@@ -368,11 +409,15 @@ class ParserRISCV(BaseParser):
            return_dict = InstructionForm(
                mnemonic=result["mnemonic"],
                operands=operands,
-                comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None,
+                comment_id=(
+                    " ".join(result[self.comment_id])
+                    if self.comment_id in result
+                    else None
+                ),
            )
            return return_dict

-        except Exception as e:
+        except Exception:
            # For special vector instructions or ones with % in them
            if "%" in instruction or instruction.startswith("v"):
                parts = instruction.split("#")[0].strip().split(None, 1)
@@ -383,10 +428,17 @@ class ParserRISCV(BaseParser):
                    operands_list = [op.strip() for op in operand_part.split(",")]
                    for op in operands_list:
                        # Process '%hi(data)' to 'data' for certain operands
-                        if op.startswith("%") and '(' in op and ')' in op:
-                            # Extract data from %hi(data) format
-                            data = op[op.index('(')+1:op.index(')')]
-                            operands.append(IdentifierOperand(name=data))
+                        if op.startswith("%") and "(" in op and ")" in op:
+                            reloc_type = op[: op.index("(")]
+                            symbol = op[op.index("(") + 1 : op.index(")")]
+                            operands.append(
+                                ImmediateOperand(
+                                    imd_type="reloc",
+                                    value=None,
+                                    reloc_type=reloc_type,
+                                    symbol=symbol,
+                                )
+                            )
                        else:
                            operands.append(IdentifierOperand(name=op))

@@ -395,9 +447,7 @@ class ParserRISCV(BaseParser):
                    comment = instruction.split("#", 1)[1].strip()

                return InstructionForm(
-                    mnemonic=mnemonic,
-                    operands=operands,
-                    comment_id=comment
+                    mnemonic=mnemonic, operands=operands, comment_id=comment
                )
            else:
                raise
@@ -430,12 +480,42 @@ class ParserRISCV(BaseParser):
        )

    def process_register_operand(self, operand):
-        """Process register operands, including ABI name to x-register mapping"""
-        # If already has prefix (x#, f#, v#), just return as is
+        """Process register operands, including ABI name to x-register mapping
+        and vector attributes"""
+        # If already has prefix (x#, f#, v#), process with appropriate attributes
        if "prefix" in operand:
+            prefix = operand["prefix"].lower()
+
+            # Special handling for vector registers
+            if prefix == "v":
                return RegisterOperand(
-                prefix=operand["prefix"].lower(),
-                name=operand["name"]
+                    prefix=prefix,
+                    name=operand["name"],
+                    regtype="vector",
+                    # Vector registers can have different element widths (e8,e16,e32,e64)
+                    width=operand.get("width", None),
+                    # Number of elements (m1,m2,m4,m8)
+                    lanes=operand.get("lanes", None),
+                    # For vector mask registers
+                    mask=operand.get("mask", False),
+                    # For tail agnostic/undisturbed policies
+                    zeroing=operand.get("zeroing", False),
+                )
+            # For floating point registers
+            elif prefix == "f":
+                return RegisterOperand(
+                    prefix=prefix,
+                    name=operand["name"],
+                    regtype="float",
+                    width=64,  # RISC-V typically uses 64-bit float registers
+                )
+            # For integer registers
+            elif prefix == "x":
+                return RegisterOperand(
+                    prefix=prefix,
+                    name=operand["name"],
+                    regtype="int",
+                    width=64,  # RV64 uses 64-bit registers
                )

        # Handle ABI names by converting to x-register numbers
@@ -443,49 +523,84 @@ class ParserRISCV(BaseParser):

        # ABI name mapping for integer registers
        abi_to_x = {
-            "zero": "0", "ra": "1", "sp": "2", "gp": "3", "tp": "4",
-            "t0": "5", "t1": "6", "t2": "7",
-            "s0": "8", "fp": "8", "s1": "9",
-            "a0": "10", "a1": "11", "a2": "12", "a3": "13",
-            "a4": "14", "a5": "15", "a6": "16", "a7": "17",
-            "s2": "18", "s3": "19", "s4": "20", "s5": "21",
-            "s6": "22", "s7": "23", "s8": "24", "s9": "25",
-            "s10": "26", "s11": "27",
-            "t3": "28", "t4": "29", "t5": "30", "t6": "31"
+            "zero": "x0",
+            "ra": "x1",
+            "sp": "x2",
+            "gp": "x3",
+            "tp": "x4",
+            "t0": "x5",
+            "t1": "x6",
+            "t2": "x7",
+            "s0": "x8",
+            "s1": "x9",
+            "a0": "x10",
+            "a1": "x11",
+            "a2": "x12",
+            "a3": "x13",
+            "a4": "x14",
+            "a5": "x15",
+            "a6": "x16",
+            "a7": "x17",
+            "s2": "x18",
+            "s3": "x19",
+            "s4": "x20",
+            "s5": "x21",
+            "s6": "x22",
+            "s7": "x23",
+            "s8": "x24",
+            "s9": "x25",
+            "s10": "x26",
+            "s11": "x27",
+            "t3": "x28",
+            "t4": "x29",
+            "t5": "x30",
+            "t6": "x31",
        }

        # Integer register ABI names
        if name in abi_to_x:
            return RegisterOperand(
                prefix="x",
-                name=abi_to_x[name]
+                name=abi_to_x[name],
+                regtype="int",
+                width=64,  # RV64 uses 64-bit registers
            )
        # Floating point register ABI names
        elif name.startswith("f") and name[1] in ["t", "a", "s"]:
            if name[1] == "a":  # fa0-fa7
                idx = int(name[2:])
-                return RegisterOperand(prefix="f", name=str(idx + 10))
+                return RegisterOperand(
+                    prefix="f", name=str(idx + 10), regtype="float", width=64
+                )
            elif name[1] == "s":  # fs0-fs11
                idx = int(name[2:])
                if idx <= 1:
-                    return RegisterOperand(prefix="f", name=str(idx + 8))
+                    return RegisterOperand(
+                        prefix="f", name=str(idx + 8), regtype="float", width=64
+                    )
                else:
-                    return RegisterOperand(prefix="f", name=str(idx + 16))
+                    return RegisterOperand(
+                        prefix="f", name=str(idx + 16), regtype="float", width=64
+                    )
            elif name[1] == "t":  # ft0-ft11
                idx = int(name[2:])
                if idx <= 7:
-                    return RegisterOperand(prefix="f", name=str(idx))
+                    return RegisterOperand(
+                        prefix="f", name=str(idx), regtype="float", width=64
+                    )
                else:
-                    return RegisterOperand(prefix="f", name=str(idx + 20))
+                    return RegisterOperand(
+                        prefix="f", name=str(idx + 20), regtype="float", width=64
+                    )
        # CSR registers
        elif name.startswith("csr"):
-            return RegisterOperand(prefix="", name=name)
+            return RegisterOperand(prefix="", name=name, regtype="csr")

        # If no mapping found, return as is
        return RegisterOperand(prefix="", name=name)

    def process_memory_address(self, memory_address):
-        """Post-process memory address operand"""
+        """Post-process memory address operand with RISC-V specific attributes"""
        # Process offset
        offset = memory_address.get("offset", None)
        if isinstance(offset, list) and len(offset) == 1:
@@ -500,12 +615,32 @@ class ParserRISCV(BaseParser):
        if base is not None:
            base = self.process_register_operand(base)

-        # Create memory operand
+        # Determine data type from instruction context if available
+        # RISC-V load/store instructions encode the data width in the mnemonic
+        # e.g., lw (word), lh (half), lb (byte), etc.
+        data_type = None
+        if hasattr(self, "current_instruction"):
+            mnemonic = self.current_instruction.lower()
+            if any(x in mnemonic for x in ["b", "bu"]):  # byte operations
+                data_type = "byte"
+            elif any(x in mnemonic for x in ["h", "hu"]):  # halfword operations
+                data_type = "halfword"
+            elif any(x in mnemonic for x in ["w", "wu"]):  # word operations
+                data_type = "word"
+            elif "d" in mnemonic:  # doubleword operations
+                data_type = "doubleword"
+
+        # Create memory operand with enhanced attributes
        return MemoryOperand(
            offset=offset,
            base=base,
-            index=None,
-            scale=1
+            index=None,  # RISC-V doesn't use index registers
+            scale=1,  # RISC-V doesn't use scaling
+            data_type=data_type,
+            # Handle vector memory operations
+            mask=memory_address.get("mask", None),  # For vector masked loads/stores
+            src=memory_address.get("src", None),  # Source register type for stores
+            dst=memory_address.get("dst", None),  # Destination register type for loads
        )

    def process_label(self, label):
@@ -519,21 +654,102 @@ class ParserRISCV(BaseParser):
        """Post-process identifier operand"""
        return IdentifierOperand(
            name=identifier["name"] if "name" in identifier else None,
-            offset=identifier["offset"] if "offset" in identifier else None
+            offset=identifier["offset"] if "offset" in identifier else None,
        )

    def process_immediate(self, immediate):
-        """Post-process immediate operand"""
+        """Post-process immediate operand with RISC-V specific handling"""
+        # Handle relocations
+        if "relocation" in immediate:
+            reloc = immediate["relocation"]
+            return ImmediateOperand(
+                imd_type="reloc",
+                value=None,
+                reloc_type=reloc["reloc_type"],
+                symbol=reloc["symbol"],
+            )
+
+        # Handle identifiers
        if "identifier" in immediate:
-            # actually an identifier, change declaration
            return self.process_identifier(immediate["identifier"])
+
+        # Handle numeric values with validation
        if "value" in immediate:
-            # normal integer value
-            immediate["type"] = "int"
-            # convert hex/bin immediates to dec
-            new_immediate = ImmediateOperand(imd_type=immediate["type"], value=immediate["value"])
-            new_immediate.value = self.normalize_imd(new_immediate)
-            return new_immediate
+            value = int(
+                immediate["value"], 0
+            )  # Convert to integer, handling hex/decimal
+
+            # Determine immediate type and validate range based on instruction type
+            if hasattr(self, "current_instruction"):
+                mnemonic = self.current_instruction.lower()
+
+                # I-type instructions (12-bit signed immediate)
+                if any(
+                    x in mnemonic
+                    for x in [
+                        "addi",
+                        "slti",
+                        "xori",
+                        "ori",
+                        "andi",
+                        "slli",
+                        "srli",
+                        "srai",
+                    ]
+                ):
+                    if not -2048 <= value <= 2047:
+                        raise ValueError(
+                            f"Immediate value {value} out of range for I-type "
+                            f"instruction (-2048 to 2047)"
+                        )
+                    return ImmediateOperand(imd_type="I", value=value)
+
+                # S-type instructions (12-bit signed immediate for store)
+                elif any(x in mnemonic for x in ["sb", "sh", "sw", "sd"]):
+                    if not -2048 <= value <= 2047:
+                        raise ValueError(
+                            f"Immediate value {value} out of range for S-type "
+                            f"instruction (-2048 to 2047)"
+                        )
+                    return ImmediateOperand(imd_type="S", value=value)
+
+                # B-type instructions (13-bit signed immediate for branches, must be even)
+                elif any(
+                    x in mnemonic for x in ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
+                ):
+                    if not -4096 <= value <= 4095 or value % 2 != 0:
+                        raise ValueError(
+                            f"Immediate value {value} out of range or not even "
+                            f"for B-type instruction (-4096 to 4095, must be even)"
+                        )
+                    return ImmediateOperand(imd_type="B", value=value)
+
+                # U-type instructions (20-bit upper immediate)
+                elif any(x in mnemonic for x in ["lui", "auipc"]):
+                    if not 0 <= value <= 1048575:
+                        raise ValueError(
+                            f"Immediate value {value} out of range for U-type "
+                            f"instruction (0 to 1048575)"
+                        )
+                    return ImmediateOperand(imd_type="U", value=value)
+
+                # J-type instructions (21-bit signed immediate for jumps, must be even)
+                elif any(x in mnemonic for x in ["jal"]):
+                    if not -1048576 <= value <= 1048575 or value % 2 != 0:
+                        raise ValueError(
+                            f"Immediate value {value} out of range or not even "
+                            f"for J-type instruction (-1048576 to 1048575, must be even)"
+                        )
+                    return ImmediateOperand(imd_type="J", value=value)
+
+                # Vector instructions might have specific immediate ranges
+                elif mnemonic.startswith("v"):
+                    # Handle vector specific immediates (implementation specific)
+                    return ImmediateOperand(imd_type="V", value=value)
+
+            # Default case - no specific validation
+            return ImmediateOperand(imd_type="int", value=value)
+
        return immediate

    def get_full_reg_name(self, register):
@@ -566,35 +782,74 @@ class ParserRISCV(BaseParser):
        register_string = register_string.strip()

        # Check for integer registers (x0-x31)
-        x_match = re.match(r'^x([0-9]|[1-2][0-9]|3[0-1])$', register_string)
+        x_match = re.match(r"^x([0-9]|[1-2][0-9]|3[0-1])$", register_string)
        if x_match:
            reg_num = int(x_match.group(1))
-            return {"class": "register", "register": {"prefix": "x", "name": str(reg_num)}}
+            return {
+                "class": "register",
+                "register": {"prefix": "x", "name": str(reg_num)},
+            }

        # Check for floating-point registers (f0-f31)
-        f_match = re.match(r'^f([0-9]|[1-2][0-9]|3[0-1])$', register_string)
+        f_match = re.match(r"^f([0-9]|[1-2][0-9]|3[0-1])$", register_string)
        if f_match:
            reg_num = int(f_match.group(1))
-            return {"class": "register", "register": {"prefix": "f", "name": str(reg_num)}}
+            return {
+                "class": "register",
+                "register": {"prefix": "f", "name": str(reg_num)},
+            }

        # Check for vector registers (v0-v31)
-        v_match = re.match(r'^v([0-9]|[1-2][0-9]|3[0-1])$', register_string)
+        v_match = re.match(r"^v([0-9]|[1-2][0-9]|3[0-1])$", register_string)
        if v_match:
            reg_num = int(v_match.group(1))
-            return {"class": "register", "register": {"prefix": "v", "name": str(reg_num)}}
+            return {
+                "class": "register",
+                "register": {"prefix": "v", "name": str(reg_num)},
+            }

        # Check for ABI names
        abi_names = {
-            "zero": 0, "ra": 1, "sp": 2, "gp": 3, "tp": 4,
-            "t0": 5, "t1": 6, "t2": 7,
-            "s0": 8, "fp": 8, "s1": 9,
-            "a0": 10, "a1": 11, "a2": 12, "a3": 13, "a4": 14, "a5": 15, "a6": 16, "a7": 17,
-            "s2": 18, "s3": 19, "s4": 20, "s5": 21, "s6": 22, "s7": 23, "s8": 24, "s9": 25, "s10": 26, "s11": 27,
-            "t3": 28, "t4": 29, "t5": 30, "t6": 31
+            "zero": 0,
+            "ra": 1,
+            "sp": 2,
+            "gp": 3,
+            "tp": 4,
+            "t0": 5,
+            "t1": 6,
+            "t2": 7,
+            "s0": 8,
+            "fp": 8,
+            "s1": 9,
+            "a0": 10,
+            "a1": 11,
+            "a2": 12,
+            "a3": 13,
+            "a4": 14,
+            "a5": 15,
+            "a6": 16,
+            "a7": 17,
+            "s2": 18,
+            "s3": 19,
+            "s4": 20,
+            "s5": 21,
+            "s6": 22,
+            "s7": 23,
+            "s8": 24,
+            "s9": 25,
+            "s10": 26,
+            "s11": 27,
+            "t3": 28,
+            "t4": 29,
+            "t5": 30,
+            "t6": 31,
        }

        if register_string in abi_names:
-            return {"class": "register", "register": {"prefix": "", "name": register_string}}
+            return {
+                "class": "register",
+                "register": {"prefix": "", "name": register_string},
+            }

        # If no match is found
        return None
@@ -642,15 +897,38 @@ class ParserRISCV(BaseParser):

        # ABI name mapping for integer registers
        abi_to_x = {
-            "zero": "x0", "ra": "x1", "sp": "x2", "gp": "x3", "tp": "x4",
-            "t0": "x5", "t1": "x6", "t2": "x7",
-            "s0": "x8", "s1": "x9",
-            "a0": "x10", "a1": "x11", "a2": "x12", "a3": "x13",
-            "a4": "x14", "a5": "x15", "a6": "x16", "a7": "x17",
-            "s2": "x18", "s3": "x19", "s4": "x20", "s5": "x21",
-            "s6": "x22", "s7": "x23", "s8": "x24", "s9": "x25",
-            "s10": "x26", "s11": "x27",
-            "t3": "x28", "t4": "x29", "t5": "x30", "t6": "x31"
+            "zero": "x0",
+            "ra": "x1",
+            "sp": "x2",
+            "gp": "x3",
+            "tp": "x4",
+            "t0": "x5",
+            "t1": "x6",
+            "t2": "x7",
+            "s0": "x8",
+            "s1": "x9",
+            "a0": "x10",
+            "a1": "x11",
+            "a2": "x12",
+            "a3": "x13",
+            "a4": "x14",
+            "a5": "x15",
+            "a6": "x16",
+            "a7": "x17",
+            "s2": "x18",
+            "s3": "x19",
+            "s4": "x20",
+            "s5": "x21",
+            "s6": "x22",
+            "s7": "x23",
+            "s8": "x24",
+            "s9": "x25",
+            "s10": "x26",
+            "s11": "x27",
+            "t3": "x28",
+            "t4": "x29",
+            "t5": "x30",
+            "t6": "x31",
        }

        # For integer register ABI names
@@ -751,7 +1029,7 @@ class ParserRISCV(BaseParser):
            # For raw integer values or string immediates
            return ImmediateOperand(
                imd_type="int",
-                value=str(operand) if isinstance(operand, int) else operand
+                value=str(operand) if isinstance(operand, int) else operand,
            )
        elif isinstance(operand, dict) and "imd" in operand:
            # For immediate operands from instruction definitions
@@ -759,11 +1037,8 @@ class ParserRISCV(BaseParser):
                imd_type=operand["imd"],
                value=operand.get("value"),
                identifier=operand.get("identifier"),
-                shift=operand.get("shift")
+                shift=operand.get("shift"),
            )
        else:
            # For any other immediate format
-            return ImmediateOperand(
-                imd_type="int",
-                value=str(operand)
-            ) 
+            return ImmediateOperand(imd_type="int", value=str(operand))
--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -1025,6 +1025,7 @@ class MachineModel(object):
            try:
                # Need to check if they refer to the same register
                from osaca.parser import ParserRISCV
+
                parser = ParserRISCV()
                reg_canonical = parser._get_canonical_reg_name(reg)
                i_reg_canonical = parser._get_canonical_reg_name(i_reg)
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -307,11 +307,13 @@ class TestCLI(unittest.TestCase):
    @staticmethod
    def _find_file(kernel, arch, comp):
        testdir = os.path.dirname(__file__)
+        # Handle special case for rv64 architecture
+        arch_prefix = arch.lower() if arch.lower() == "rv64" else arch[:3].lower()
        name = os.path.join(
            testdir,
            "../examples",
            kernel,
-            kernel + ".s." + arch[:3].lower() + "." + comp.lower() + ".s",
+            kernel + ".s." + arch_prefix + "." + comp.lower() + ".s",
        )
        if kernel == "j2d" and arch.lower() == "csx":
            name = name[:-1] + "AVX.s"
--- a/tests/test_parser_RISCV.py
+++ b/tests/test_parser_RISCV.py
@@ -8,9 +8,7 @@ import unittest

 from pyparsing import ParseException

-from osaca.parser import ParserRISCV, InstructionForm
-from osaca.parser.directive import DirectiveOperand
-from osaca.parser.memory import MemoryOperand
+from osaca.parser import ParserRISCV
 from osaca.parser.register import RegisterOperand
 from osaca.parser.immediate import ImmediateOperand
 from osaca.parser.identifier import IdentifierOperand
@@ -180,12 +178,10 @@ class TestParserRISCV(unittest.TestCase):

        # Test floating-point registers
        reg_fa0 = RegisterOperand(prefix="f", name="a0")
-        reg_fa1 = RegisterOperand(prefix="f", name="a1")
        reg_f10 = RegisterOperand(prefix="f", name="10")

        # Test vector registers
        reg_v1 = RegisterOperand(prefix="v", name="1")
-        reg_v2 = RegisterOperand(prefix="v", name="2")

        # Test register type detection
        self.assertTrue(self.parser.is_gpr(reg_a0))