Apply selected improvements from 1ceac6e: enhanced RISC-V parser, ImmediateOperand enhancements, and rv6→rv64 file renames

- Enhanced ImmediateOperand with reloc_type and symbol attributes for better RISC-V support - Updated RISC-V parser with relocation type support (%hi, %lo, %pcrel_hi, etc.) - Renamed example files from rv6 to rv64 for consistency - Updated related configuration and test files - All 115 tests pass successfully
2025-12-16 00:50:06 +01:00 · 2025-07-11 18:15:51 +02:00
parent 61b52dbf28
commit ebf76caa18
16 changed files with 554 additions and 253 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,5 @@
 # OSACA specific files and folders
 *.*.pickle
 osaca_testfront_venv/
 examples/riscy_asm_files/
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/examples/add/add.s.rv64.gcc.s
+++ b/examples/add/add.s.rv64.gcc.s
--- a/examples/copy/copy.s.rv64.gcc.s
+++ b/examples/copy/copy.s.rv64.gcc.s
--- a/examples/daxpy/daxpy.s.rv64.gcc.s
+++ b/examples/daxpy/daxpy.s.rv64.gcc.s
--- a/examples/gs/gs.s.rv64.gcc.s
+++ b/examples/gs/gs.s.rv64.gcc.s
--- a/examples/j2d/j2d.s.rv64.gcc.s
+++ b/examples/j2d/j2d.s.rv64.gcc.s
--- a/examples/striad/striad.s.rv64.gcc.s
+++ b/examples/striad/striad.s.rv64.gcc.s
--- a/examples/sum_reduction/sum_reduction.s.rv64.gcc.s
+++ b/examples/sum_reduction/sum_reduction.s.rv64.gcc.s
--- a/examples/triad/triad.s.rv64.gcc.s
+++ b/examples/triad/triad.s.rv64.gcc.s
--- a/examples/update/update.s.rv64.gcc.s
+++ b/examples/update/update.s.rv64.gcc.s
--- a/osaca/parser/immediate.py
+++ b/osaca/parser/immediate.py
@@ -10,6 +10,8 @@ class ImmediateOperand(Operand):
        imd_type=None,
        value=None,
        shift=None,
        reloc_type=None,
        symbol=None,
        source=False,
        destination=False,
    ):
@@ -18,6 +20,8 @@ class ImmediateOperand(Operand):
        self._imd_type = imd_type
        self._value = value
        self._shift = shift
        self._reloc_type = reloc_type
        self._symbol = symbol
    @property
    def identifier(self):
@@ -33,7 +37,15 @@ class ImmediateOperand(Operand):
    @property
    def shift(self):
-        return self._imd_type
+        return self._shift
    @property
    def reloc_type(self):
        return self._reloc_type
    @property
    def symbol(self):
        return self._symbol
    @imd_type.setter
    def imd_type(self, itype):
@@ -51,10 +63,19 @@ class ImmediateOperand(Operand):
    def shift(self, shift):
        self._shift = shift
    @reloc_type.setter
    def reloc_type(self, reloc_type):
        self._reloc_type = reloc_type
    @symbol.setter
    def symbol(self, symbol):
        self._symbol = symbol
    def __str__(self):
        return (
            f"Immediate(identifier={self._identifier}, imd_type={self._imd_type}, "
-            f"value={self._value}, shift={self._shift}, source={self._source}, destination={self._destination})"
+            f"value={self._value}, shift={self._shift}, reloc_type={self._reloc_type}, "
            f"symbol={self._symbol}, source={self._source}, destination={self._destination})"
        )
    def __repr__(self):
@@ -62,10 +83,18 @@ class ImmediateOperand(Operand):
    def __eq__(self, other):
        if isinstance(other, ImmediateOperand):
            # Handle cases where old instances might not have the new attributes
            self_reloc_type = getattr(self, "_reloc_type", None)
            self_symbol = getattr(self, "_symbol", None)
            other_reloc_type = getattr(other, "_reloc_type", None)
            other_symbol = getattr(other, "_symbol", None)
            return (
                self._identifier == other._identifier
                and self._imd_type == other._imd_type
                and self._value == other._value
                and self._shift == other._shift
                and self_reloc_type == other_reloc_type
                and self_symbol == other_symbol
            )
        return False
--- a/osaca/parser/parser_RISCV.py
+++ b/osaca/parser/parser_RISCV.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
 import re
 import os
 from copy import deepcopy
 import pyparsing as pp
 from osaca.parser import BaseParser
@@ -13,7 +11,6 @@ from osaca.parser.label import LabelOperand
 from osaca.parser.register import RegisterOperand
 from osaca.parser.identifier import IdentifierOperand
 from osaca.parser.immediate import ImmediateOperand
 from osaca.parser.condition import ConditionOperand
 class ParserRISCV(BaseParser):
@@ -70,9 +67,23 @@ class ParserRISCV(BaseParser):
            pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
        ).setResultsName("value")
-        # Additional identifiers used in vector instructions
+        # RISC-V specific relocation attributes
-        vector_identifier = pp.Word(pp.alphas, pp.alphanums)
+        reloc_type = (
-        special_identifier = pp.Word(pp.alphas + "%")
+            pp.Literal("%hi")
            | pp.Literal("%lo")
            | pp.Literal("%pcrel_hi")
            | pp.Literal("%pcrel_lo")
            | pp.Literal("%tprel_hi")
            | pp.Literal("%tprel_lo")
            | pp.Literal("%tprel_add")
        ).setResultsName("reloc_type")
        reloc_expr = pp.Group(
            reloc_type
            + pp.Suppress("(")
            + pp.Word(pp.alphas + pp.nums + "_").setResultsName("symbol")
            + pp.Suppress(")")
        ).setResultsName("relocation")
        # First character of an identifier
        first = pp.Word(pp.alphas + "_.", exact=1)
@@ -89,9 +100,16 @@ class ParserRISCV(BaseParser):
            )
        ).setResultsName(self.identifier)
        # Immediate with optional relocation
        immediate = pp.Group(
            reloc_expr | (hex_number ^ decimal_number) | identifier
        ).setResultsName(self.immediate_id)
        # Label
        self.label = pp.Group(
-            identifier.setResultsName("name") + pp.Literal(":") + pp.Optional(self.comment)
+            identifier.setResultsName("name")
            + pp.Literal(":")
            + pp.Optional(self.comment)
        ).setResultsName(self.label_id)
        # Directive
@@ -100,16 +118,22 @@ class ParserRISCV(BaseParser):
            + pp.Optional(pp.Word(pp.printables + " ", excludeChars=","))
        )
        # For vector instructions
        vector_parameter = pp.Word(pp.alphas)
        directive_parameter = (
-            pp.quotedString | directive_option | identifier | hex_number | decimal_number
+            pp.quotedString
            | directive_option
            | identifier
            | hex_number
            | decimal_number
        )
        commaSeparatedList = pp.delimitedList(
            pp.Optional(directive_parameter), delim=","
        )
        commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=",")
        self.directive = pp.Group(
            pp.Literal(".")
            + pp.Word(pp.alphanums + "_").setResultsName("name")
-            + (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName("parameters")
+            + (pp.OneOrMore(directive_parameter) ^ commaSeparatedList).setResultsName(
                "parameters"
            )
            + pp.Optional(self.comment)
        ).setResultsName(self.directive_id)
@@ -128,13 +152,6 @@ class ParserRISCV(BaseParser):
        # Mnemonic
        mnemonic = pp.Word(pp.alphanums + ".").setResultsName("mnemonic")
        # Immediate:
        # int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+
        immediate = pp.Group(
            (hex_number ^ decimal_number)
            | identifier
        ).setResultsName(self.immediate_id)
        # Register:
        # RISC-V has two main types of registers:
        # 1. Integer registers (x0-x31 or ABI names)
@@ -142,27 +159,27 @@ class ParserRISCV(BaseParser):
        # Integer register ABI names
        integer_reg_abi = (
-            pp.CaselessLiteral("zero") |
+            pp.CaselessLiteral("zero")
-            pp.CaselessLiteral("ra") |
+            | pp.CaselessLiteral("ra")
-            pp.CaselessLiteral("sp") |
+            | pp.CaselessLiteral("sp")
-            pp.CaselessLiteral("gp") |
+            | pp.CaselessLiteral("gp")
-            pp.CaselessLiteral("tp") |
+            | pp.CaselessLiteral("tp")
-            pp.Regex(r"[tas][0-9]+")  # t0-t6, a0-a7, s0-s11
+            | pp.Regex(r"[tas][0-9]+")  # t0-t6, a0-a7, s0-s11
        ).setResultsName("name")
        # Integer registers x0-x31
-        integer_reg_x = (
+        integer_reg_x = pp.CaselessLiteral("x").setResultsName("prefix") + pp.Word(
-            pp.CaselessLiteral("x").setResultsName("prefix") + 
+            pp.nums
-            pp.Word(pp.nums).setResultsName("name")
+        ).setResultsName("name")
        )
        # Floating point registers
-        fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName("name")  # ft0-ft11, fa0-fa7, fs0-fs11
+        fp_reg_abi = pp.Regex(r"f[tas][0-9]+").setResultsName(
            "name"
        )  # ft0-ft11, fa0-fa7, fs0-fs11
-        fp_reg_f = (
+        fp_reg_f = pp.CaselessLiteral("f").setResultsName("prefix") + pp.Word(
-            pp.CaselessLiteral("f").setResultsName("prefix") + 
+            pp.nums
-            pp.Word(pp.nums).setResultsName("name")
+        ).setResultsName("name")
        )
        # Control and status registers (CSRs)
        csr_reg = pp.Combine(
@@ -170,14 +187,18 @@ class ParserRISCV(BaseParser):
        ).setResultsName("name")
        # Vector registers (for the "V" extension)
-        vector_reg = (
+        vector_reg = pp.CaselessLiteral("v").setResultsName("prefix") + pp.Word(
-            pp.CaselessLiteral("v").setResultsName("prefix") + 
+            pp.nums
-            pp.Word(pp.nums).setResultsName("name")
+        ).setResultsName("name")
        )
        # Combined register definition
        register = pp.Group(
-            integer_reg_x | integer_reg_abi | fp_reg_f | fp_reg_abi | vector_reg | csr_reg
+            integer_reg_x
            | integer_reg_abi
            | fp_reg_f
            | fp_reg_abi
            | vector_reg
            | csr_reg
        ).setResultsName(self.register_id)
        self.register = register
@@ -191,20 +212,15 @@ class ParserRISCV(BaseParser):
        ).setResultsName(self.memory_id)
        # Combine to instruction form
-        operand_first = pp.Group(
+        operand_first = pp.Group(register ^ immediate ^ memory ^ identifier)
-            register ^ immediate ^ memory ^ identifier
+        operand_rest = pp.Group(register ^ immediate ^ memory ^ identifier)
        )
        operand_rest = pp.Group(
            register ^ immediate ^ memory ^ identifier
        )
        # Vector instruction special parameters (e.g., e32, m4, ta, ma)
        vector_param = pp.Word(pp.alphas + pp.nums)
        # Handle additional vector parameters
        additional_params = pp.ZeroOrMore(
-            pp.Suppress(pp.Literal(",")) + 
+            pp.Suppress(pp.Literal(","))
-            vector_param.setResultsName("vector_param", listAllMatches=True)
+            + pp.Word(pp.alphas + pp.nums).setResultsName(
                "vector_param", listAllMatches=True
            )
        )
        # Main instruction parser
@@ -217,7 +233,7 @@ class ParserRISCV(BaseParser):
            + pp.Optional(operand_rest.setResultsName("operand3"))
            + pp.Optional(pp.Suppress(pp.Literal(",")))
            + pp.Optional(operand_rest.setResultsName("operand4"))
-            + pp.Optional(additional_params)  # For vector instructions with more params
+            + pp.Optional(additional_params)
            + pp.Optional(self.comment)
        )
@@ -228,7 +244,8 @@ class ParserRISCV(BaseParser):
        :param str line: line of assembly code
        :param line_number: identifier of instruction form, defaults to None
        :type line_number: int, optional
-        :return: `dict` -- parsed asm line (comment, label, directive or instruction form)
+        :return: `dict` -- parsed asm line (comment, label, directive or
                 instruction form)
        """
        instruction_form = InstructionForm(
            mnemonic=None,
@@ -243,7 +260,9 @@ class ParserRISCV(BaseParser):
        # 1. Parse comment
        try:
-            result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
+            result = self.process_operand(
                self.comment.parseString(line, parseAll=True).asDict()
            )
            instruction_form.comment = " ".join(result[self.comment_id])
        except pp.ParseException:
            pass
@@ -261,7 +280,9 @@ class ParserRISCV(BaseParser):
        if result is None:
            try:
                # returns tuple with label operand and comment, if any
-                result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
+                result = self.process_operand(
                    self.label.parseString(line, parseAll=True).asDict()
                )
                instruction_form.label = result[0].name
                if result[1] is not None:
                    instruction_form.comment = " ".join(result[1])
@@ -304,9 +325,20 @@ class ParserRISCV(BaseParser):
        :param str instruction: Assembly line string.
        :returns: `dict` -- parsed instruction form
        """
        # Store current instruction for context in operand processing
        if instruction.startswith("vsetvli"):
            self.current_instruction = "vsetvli"
        else:
            # Extract mnemonic for context
            parts = instruction.split("#")[0].strip().split()
            self.current_instruction = parts[0] if parts else None
        # Special handling for vector instructions like vsetvli with many parameters
        if instruction.startswith("vsetvli"):
-            parts = instruction.split("#")[0].strip().split()
+            # Split into mnemonic and operands part
            parts = (
                instruction.split("#")[0].strip().split(None, 1)
            )  # Split on first whitespace only
            mnemonic = parts[0]
            # Split operands by commas
@@ -317,12 +349,22 @@ class ParserRISCV(BaseParser):
                # Process each operand
                operands = []
                for op in operands_list:
-                    if op.startswith("x") or op in ["zero", "ra", "sp", "gp", "tp"] or re.match(r"[tas][0-9]+", op):
+                    if (
                        op.startswith("x")
                        or op in ["zero", "ra", "sp", "gp", "tp"]
                        or re.match(r"[tas][0-9]+", op)
                    ):
                        operands.append(RegisterOperand(name=op))
                    elif op in ["e8", "e16", "e32", "e64", "m1", "m2", "m4", "m8", "ta", "tu", "ma", "mu"]:
                        operands.append(IdentifierOperand(name=op))
                    else:
-                        operands.append(IdentifierOperand(name=op))
+                        # Vector parameters get appropriate attributes
                        if op.startswith("e"):  # Element width
                            operands.append(IdentifierOperand(name=op))
                        elif op.startswith("m"):  # LMUL setting
                            operands.append(IdentifierOperand(name=op))
                        elif op in ["ta", "tu", "ma", "mu"]:  # Tail/mask policies
                            operands.append(IdentifierOperand(name=op))
                        else:
                            operands.append(IdentifierOperand(name=op))
                # Get comment if present
                comment = None
@@ -330,49 +372,52 @@ class ParserRISCV(BaseParser):
                    comment = instruction.split("#", 1)[1].strip()
                return InstructionForm(
-                    mnemonic=mnemonic,
+                    mnemonic=mnemonic, operands=operands, comment_id=comment
                    operands=operands,
                    comment_id=comment
                )
        # Regular instruction parsing
        try:
-            result = self.instruction_parser.parseString(instruction, parseAll=True).asDict()
+            result = self.instruction_parser.parseString(
                instruction, parseAll=True
            ).asDict()
            operands = []
            # Add operands to list
            # Check first operand
            if "operand1" in result:
                operand = self.process_operand(result["operand1"])
                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
            # Check second operand
            if "operand2" in result:
                operand = self.process_operand(result["operand2"])
                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
            # Check third operand
            if "operand3" in result:
                operand = self.process_operand(result["operand3"])
                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
            # Check fourth operand
            if "operand4" in result:
                operand = self.process_operand(result["operand4"])
                operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
-            # Handle vector_param for vector instructions
+            # Process operands
            for i in range(1, 5):
                operand_key = f"operand{i}"
                if operand_key in result:
                    operand = self.process_operand(result[operand_key])
                    (
                        operands.extend(operand)
                        if isinstance(operand, list)
                        else operands.append(operand)
                    )
            # Handle vector parameters as identifiers with appropriate attributes
            if "vector_param" in result:
                if isinstance(result["vector_param"], list):
                    for param in result["vector_param"]:
-                        operands.append(IdentifierOperand(name=param))
+                        if param.startswith("e"):  # Element width
                            operands.append(IdentifierOperand(name=param))
                        elif param.startswith("m"):  # LMUL setting
                            operands.append(IdentifierOperand(name=param))
                        else:
                            operands.append(IdentifierOperand(name=param))
                else:
                    operands.append(IdentifierOperand(name=result["vector_param"]))
            return_dict = InstructionForm(
                mnemonic=result["mnemonic"],
                operands=operands,
-                comment_id=" ".join(result[self.comment_id]) if self.comment_id in result else None,
+                comment_id=(
                    " ".join(result[self.comment_id])
                    if self.comment_id in result
                    else None
                ),
            )
            return return_dict
-        except Exception as e:
+        except Exception:
            # For special vector instructions or ones with % in them
            if "%" in instruction or instruction.startswith("v"):
                parts = instruction.split("#")[0].strip().split(None, 1)
@@ -383,10 +428,17 @@ class ParserRISCV(BaseParser):
                    operands_list = [op.strip() for op in operand_part.split(",")]
                    for op in operands_list:
                        # Process '%hi(data)' to 'data' for certain operands
-                        if op.startswith("%") and '(' in op and ')' in op:
+                        if op.startswith("%") and "(" in op and ")" in op:
-                            # Extract data from %hi(data) format
+                            reloc_type = op[: op.index("(")]
-                            data = op[op.index('(')+1:op.index(')')]
+                            symbol = op[op.index("(") + 1 : op.index(")")]
-                            operands.append(IdentifierOperand(name=data))
+                            operands.append(
                                ImmediateOperand(
                                    imd_type="reloc",
                                    value=None,
                                    reloc_type=reloc_type,
                                    symbol=symbol,
                                )
                            )
                        else:
                            operands.append(IdentifierOperand(name=op))
@@ -395,9 +447,7 @@ class ParserRISCV(BaseParser):
                    comment = instruction.split("#", 1)[1].strip()
                return InstructionForm(
-                    mnemonic=mnemonic,
+                    mnemonic=mnemonic, operands=operands, comment_id=comment
                    operands=operands,
                    comment_id=comment
                )
            else:
                raise
@@ -430,62 +480,127 @@ class ParserRISCV(BaseParser):
        )
    def process_register_operand(self, operand):
-        """Process register operands, including ABI name to x-register mapping"""
+        """Process register operands, including ABI name to x-register mapping
-        # If already has prefix (x#, f#, v#), just return as is
+        and vector attributes"""
        # If already has prefix (x#, f#, v#), process with appropriate attributes
        if "prefix" in operand:
-            return RegisterOperand(
+            prefix = operand["prefix"].lower()
-                prefix=operand["prefix"].lower(),
+
-                name=operand["name"]
+            # Special handling for vector registers
-            )
+            if prefix == "v":
                return RegisterOperand(
                    prefix=prefix,
                    name=operand["name"],
                    regtype="vector",
                    # Vector registers can have different element widths (e8,e16,e32,e64)
                    width=operand.get("width", None),
                    # Number of elements (m1,m2,m4,m8)
                    lanes=operand.get("lanes", None),
                    # For vector mask registers
                    mask=operand.get("mask", False),
                    # For tail agnostic/undisturbed policies
                    zeroing=operand.get("zeroing", False),
                )
            # For floating point registers
            elif prefix == "f":
                return RegisterOperand(
                    prefix=prefix,
                    name=operand["name"],
                    regtype="float",
                    width=64,  # RISC-V typically uses 64-bit float registers
                )
            # For integer registers
            elif prefix == "x":
                return RegisterOperand(
                    prefix=prefix,
                    name=operand["name"],
                    regtype="int",
                    width=64,  # RV64 uses 64-bit registers
                )
        # Handle ABI names by converting to x-register numbers
        name = operand["name"].lower()
        # ABI name mapping for integer registers
        abi_to_x = {
-            "zero": "0", "ra": "1", "sp": "2", "gp": "3", "tp": "4",
+            "zero": "x0",
-            "t0": "5", "t1": "6", "t2": "7",
+            "ra": "x1",
-            "s0": "8", "fp": "8", "s1": "9",
+            "sp": "x2",
-            "a0": "10", "a1": "11", "a2": "12", "a3": "13",
+            "gp": "x3",
-            "a4": "14", "a5": "15", "a6": "16", "a7": "17",
+            "tp": "x4",
-            "s2": "18", "s3": "19", "s4": "20", "s5": "21",
+            "t0": "x5",
-            "s6": "22", "s7": "23", "s8": "24", "s9": "25",
+            "t1": "x6",
-            "s10": "26", "s11": "27",
+            "t2": "x7",
-            "t3": "28", "t4": "29", "t5": "30", "t6": "31"
+            "s0": "x8",
            "s1": "x9",
            "a0": "x10",
            "a1": "x11",
            "a2": "x12",
            "a3": "x13",
            "a4": "x14",
            "a5": "x15",
            "a6": "x16",
            "a7": "x17",
            "s2": "x18",
            "s3": "x19",
            "s4": "x20",
            "s5": "x21",
            "s6": "x22",
            "s7": "x23",
            "s8": "x24",
            "s9": "x25",
            "s10": "x26",
            "s11": "x27",
            "t3": "x28",
            "t4": "x29",
            "t5": "x30",
            "t6": "x31",
        }
        # Integer register ABI names
        if name in abi_to_x:
            return RegisterOperand(
                prefix="x",
-                name=abi_to_x[name]
+                name=abi_to_x[name],
                regtype="int",
                width=64,  # RV64 uses 64-bit registers
            )
        # Floating point register ABI names
        elif name.startswith("f") and name[1] in ["t", "a", "s"]:
            if name[1] == "a":  # fa0-fa7
                idx = int(name[2:])
-                return RegisterOperand(prefix="f", name=str(idx + 10))
+                return RegisterOperand(
                    prefix="f", name=str(idx + 10), regtype="float", width=64
                )
            elif name[1] == "s":  # fs0-fs11
                idx = int(name[2:])
                if idx <= 1:
-                    return RegisterOperand(prefix="f", name=str(idx + 8))
+                    return RegisterOperand(
                        prefix="f", name=str(idx + 8), regtype="float", width=64
                    )
                else:
-                    return RegisterOperand(prefix="f", name=str(idx + 16))
+                    return RegisterOperand(
                        prefix="f", name=str(idx + 16), regtype="float", width=64
                    )
            elif name[1] == "t":  # ft0-ft11
                idx = int(name[2:])
                if idx <= 7:
-                    return RegisterOperand(prefix="f", name=str(idx))
+                    return RegisterOperand(
                        prefix="f", name=str(idx), regtype="float", width=64
                    )
                else:
-                    return RegisterOperand(prefix="f", name=str(idx + 20))
+                    return RegisterOperand(
                        prefix="f", name=str(idx + 20), regtype="float", width=64
                    )
        # CSR registers
        elif name.startswith("csr"):
-            return RegisterOperand(prefix="", name=name)
+            return RegisterOperand(prefix="", name=name, regtype="csr")
        # If no mapping found, return as is
        return RegisterOperand(prefix="", name=name)
    def process_memory_address(self, memory_address):
-        """Post-process memory address operand"""
+        """Post-process memory address operand with RISC-V specific attributes"""
        # Process offset
        offset = memory_address.get("offset", None)
        if isinstance(offset, list) and len(offset) == 1:
@@ -500,12 +615,32 @@ class ParserRISCV(BaseParser):
        if base is not None:
            base = self.process_register_operand(base)
-        # Create memory operand
+        # Determine data type from instruction context if available
        # RISC-V load/store instructions encode the data width in the mnemonic
        # e.g., lw (word), lh (half), lb (byte), etc.
        data_type = None
        if hasattr(self, "current_instruction"):
            mnemonic = self.current_instruction.lower()
            if any(x in mnemonic for x in ["b", "bu"]):  # byte operations
                data_type = "byte"
            elif any(x in mnemonic for x in ["h", "hu"]):  # halfword operations
                data_type = "halfword"
            elif any(x in mnemonic for x in ["w", "wu"]):  # word operations
                data_type = "word"
            elif "d" in mnemonic:  # doubleword operations
                data_type = "doubleword"
        # Create memory operand with enhanced attributes
        return MemoryOperand(
            offset=offset,
            base=base,
-            index=None,
+            index=None,  # RISC-V doesn't use index registers
-            scale=1
+            scale=1,  # RISC-V doesn't use scaling
            data_type=data_type,
            # Handle vector memory operations
            mask=memory_address.get("mask", None),  # For vector masked loads/stores
            src=memory_address.get("src", None),  # Source register type for stores
            dst=memory_address.get("dst", None),  # Destination register type for loads
        )
    def process_label(self, label):
@@ -519,21 +654,102 @@ class ParserRISCV(BaseParser):
        """Post-process identifier operand"""
        return IdentifierOperand(
            name=identifier["name"] if "name" in identifier else None,
-            offset=identifier["offset"] if "offset" in identifier else None
+            offset=identifier["offset"] if "offset" in identifier else None,
        )
    def process_immediate(self, immediate):
-        """Post-process immediate operand"""
+        """Post-process immediate operand with RISC-V specific handling"""
        # Handle relocations
        if "relocation" in immediate:
            reloc = immediate["relocation"]
            return ImmediateOperand(
                imd_type="reloc",
                value=None,
                reloc_type=reloc["reloc_type"],
                symbol=reloc["symbol"],
            )
        # Handle identifiers
        if "identifier" in immediate:
            # actually an identifier, change declaration
            return self.process_identifier(immediate["identifier"])
        # Handle numeric values with validation
        if "value" in immediate:
-            # normal integer value
+            value = int(
-            immediate["type"] = "int"
+                immediate["value"], 0
-            # convert hex/bin immediates to dec
+            )  # Convert to integer, handling hex/decimal
-            new_immediate = ImmediateOperand(imd_type=immediate["type"], value=immediate["value"])
+
-            new_immediate.value = self.normalize_imd(new_immediate)
+            # Determine immediate type and validate range based on instruction type
-            return new_immediate
+            if hasattr(self, "current_instruction"):
                mnemonic = self.current_instruction.lower()
                # I-type instructions (12-bit signed immediate)
                if any(
                    x in mnemonic
                    for x in [
                        "addi",
                        "slti",
                        "xori",
                        "ori",
                        "andi",
                        "slli",
                        "srli",
                        "srai",
                    ]
                ):
                    if not -2048 <= value <= 2047:
                        raise ValueError(
                            f"Immediate value {value} out of range for I-type "
                            f"instruction (-2048 to 2047)"
                        )
                    return ImmediateOperand(imd_type="I", value=value)
                # S-type instructions (12-bit signed immediate for store)
                elif any(x in mnemonic for x in ["sb", "sh", "sw", "sd"]):
                    if not -2048 <= value <= 2047:
                        raise ValueError(
                            f"Immediate value {value} out of range for S-type "
                            f"instruction (-2048 to 2047)"
                        )
                    return ImmediateOperand(imd_type="S", value=value)
                # B-type instructions (13-bit signed immediate for branches, must be even)
                elif any(
                    x in mnemonic for x in ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
                ):
                    if not -4096 <= value <= 4095 or value % 2 != 0:
                        raise ValueError(
                            f"Immediate value {value} out of range or not even "
                            f"for B-type instruction (-4096 to 4095, must be even)"
                        )
                    return ImmediateOperand(imd_type="B", value=value)
                # U-type instructions (20-bit upper immediate)
                elif any(x in mnemonic for x in ["lui", "auipc"]):
                    if not 0 <= value <= 1048575:
                        raise ValueError(
                            f"Immediate value {value} out of range for U-type "
                            f"instruction (0 to 1048575)"
                        )
                    return ImmediateOperand(imd_type="U", value=value)
                # J-type instructions (21-bit signed immediate for jumps, must be even)
                elif any(x in mnemonic for x in ["jal"]):
                    if not -1048576 <= value <= 1048575 or value % 2 != 0:
                        raise ValueError(
                            f"Immediate value {value} out of range or not even "
                            f"for J-type instruction (-1048576 to 1048575, must be even)"
                        )
                    return ImmediateOperand(imd_type="J", value=value)
                # Vector instructions might have specific immediate ranges
                elif mnemonic.startswith("v"):
                    # Handle vector specific immediates (implementation specific)
                    return ImmediateOperand(imd_type="V", value=value)
            # Default case - no specific validation
            return ImmediateOperand(imd_type="int", value=value)
        return immediate
    def get_full_reg_name(self, register):
@@ -566,35 +782,74 @@ class ParserRISCV(BaseParser):
        register_string = register_string.strip()
        # Check for integer registers (x0-x31)
-        x_match = re.match(r'^x([0-9]|[1-2][0-9]|3[0-1])$', register_string)
+        x_match = re.match(r"^x([0-9]|[1-2][0-9]|3[0-1])$", register_string)
        if x_match:
            reg_num = int(x_match.group(1))
-            return {"class": "register", "register": {"prefix": "x", "name": str(reg_num)}}
+            return {
                "class": "register",
                "register": {"prefix": "x", "name": str(reg_num)},
            }
        # Check for floating-point registers (f0-f31)
-        f_match = re.match(r'^f([0-9]|[1-2][0-9]|3[0-1])$', register_string)
+        f_match = re.match(r"^f([0-9]|[1-2][0-9]|3[0-1])$", register_string)
        if f_match:
            reg_num = int(f_match.group(1))
-            return {"class": "register", "register": {"prefix": "f", "name": str(reg_num)}}
+            return {
                "class": "register",
                "register": {"prefix": "f", "name": str(reg_num)},
            }
        # Check for vector registers (v0-v31)
-        v_match = re.match(r'^v([0-9]|[1-2][0-9]|3[0-1])$', register_string)
+        v_match = re.match(r"^v([0-9]|[1-2][0-9]|3[0-1])$", register_string)
        if v_match:
            reg_num = int(v_match.group(1))
-            return {"class": "register", "register": {"prefix": "v", "name": str(reg_num)}}
+            return {
                "class": "register",
                "register": {"prefix": "v", "name": str(reg_num)},
            }
        # Check for ABI names
        abi_names = {
-            "zero": 0, "ra": 1, "sp": 2, "gp": 3, "tp": 4,
+            "zero": 0,
-            "t0": 5, "t1": 6, "t2": 7,
+            "ra": 1,
-            "s0": 8, "fp": 8, "s1": 9,
+            "sp": 2,
-            "a0": 10, "a1": 11, "a2": 12, "a3": 13, "a4": 14, "a5": 15, "a6": 16, "a7": 17,
+            "gp": 3,
-            "s2": 18, "s3": 19, "s4": 20, "s5": 21, "s6": 22, "s7": 23, "s8": 24, "s9": 25, "s10": 26, "s11": 27,
+            "tp": 4,
-            "t3": 28, "t4": 29, "t5": 30, "t6": 31
+            "t0": 5,
            "t1": 6,
            "t2": 7,
            "s0": 8,
            "fp": 8,
            "s1": 9,
            "a0": 10,
            "a1": 11,
            "a2": 12,
            "a3": 13,
            "a4": 14,
            "a5": 15,
            "a6": 16,
            "a7": 17,
            "s2": 18,
            "s3": 19,
            "s4": 20,
            "s5": 21,
            "s6": 22,
            "s7": 23,
            "s8": 24,
            "s9": 25,
            "s10": 26,
            "s11": 27,
            "t3": 28,
            "t4": 29,
            "t5": 30,
            "t6": 31,
        }
        if register_string in abi_names:
-            return {"class": "register", "register": {"prefix": "", "name": register_string}}
+            return {
                "class": "register",
                "register": {"prefix": "", "name": register_string},
            }
        # If no match is found
        return None
@@ -642,15 +897,38 @@ class ParserRISCV(BaseParser):
        # ABI name mapping for integer registers
        abi_to_x = {
-            "zero": "x0", "ra": "x1", "sp": "x2", "gp": "x3", "tp": "x4",
+            "zero": "x0",
-            "t0": "x5", "t1": "x6", "t2": "x7",
+            "ra": "x1",
-            "s0": "x8", "s1": "x9",
+            "sp": "x2",
-            "a0": "x10", "a1": "x11", "a2": "x12", "a3": "x13",
+            "gp": "x3",
-            "a4": "x14", "a5": "x15", "a6": "x16", "a7": "x17",
+            "tp": "x4",
-            "s2": "x18", "s3": "x19", "s4": "x20", "s5": "x21",
+            "t0": "x5",
-            "s6": "x22", "s7": "x23", "s8": "x24", "s9": "x25",
+            "t1": "x6",
-            "s10": "x26", "s11": "x27",
+            "t2": "x7",
-            "t3": "x28", "t4": "x29", "t5": "x30", "t6": "x31"
+            "s0": "x8",
            "s1": "x9",
            "a0": "x10",
            "a1": "x11",
            "a2": "x12",
            "a3": "x13",
            "a4": "x14",
            "a5": "x15",
            "a6": "x16",
            "a7": "x17",
            "s2": "x18",
            "s3": "x19",
            "s4": "x20",
            "s5": "x21",
            "s6": "x22",
            "s7": "x23",
            "s8": "x24",
            "s9": "x25",
            "s10": "x26",
            "s11": "x27",
            "t3": "x28",
            "t4": "x29",
            "t5": "x30",
            "t6": "x31",
        }
        # For integer register ABI names
@@ -751,7 +1029,7 @@ class ParserRISCV(BaseParser):
            # For raw integer values or string immediates
            return ImmediateOperand(
                imd_type="int",
-                value=str(operand) if isinstance(operand, int) else operand
+                value=str(operand) if isinstance(operand, int) else operand,
            )
        elif isinstance(operand, dict) and "imd" in operand:
            # For immediate operands from instruction definitions
@@ -759,11 +1037,8 @@ class ParserRISCV(BaseParser):
                imd_type=operand["imd"],
                value=operand.get("value"),
                identifier=operand.get("identifier"),
-                shift=operand.get("shift")
+                shift=operand.get("shift"),
            )
        else:
            # For any other immediate format
-            return ImmediateOperand(
+            return ImmediateOperand(imd_type="int", value=str(operand))
                imd_type="int",
                value=str(operand)
            ) 
--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -1025,6 +1025,7 @@ class MachineModel(object):
            try:
                # Need to check if they refer to the same register
                from osaca.parser import ParserRISCV
                parser = ParserRISCV()
                reg_canonical = parser._get_canonical_reg_name(reg)
                i_reg_canonical = parser._get_canonical_reg_name(i_reg)
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -307,11 +307,13 @@ class TestCLI(unittest.TestCase):
    @staticmethod
    def _find_file(kernel, arch, comp):
        testdir = os.path.dirname(__file__)
        # Handle special case for rv64 architecture
        arch_prefix = arch.lower() if arch.lower() == "rv64" else arch[:3].lower()
        name = os.path.join(
            testdir,
            "../examples",
            kernel,
-            kernel + ".s." + arch[:3].lower() + "." + comp.lower() + ".s",
+            kernel + ".s." + arch_prefix + "." + comp.lower() + ".s",
        )
        if kernel == "j2d" and arch.lower() == "csx":
            name = name[:-1] + "AVX.s"
--- a/tests/test_parser_RISCV.py
+++ b/tests/test_parser_RISCV.py
@@ -8,9 +8,7 @@ import unittest
 from pyparsing import ParseException
-from osaca.parser import ParserRISCV, InstructionForm
+from osaca.parser import ParserRISCV
 from osaca.parser.directive import DirectiveOperand
 from osaca.parser.memory import MemoryOperand
 from osaca.parser.register import RegisterOperand
 from osaca.parser.immediate import ImmediateOperand
 from osaca.parser.identifier import IdentifierOperand
@@ -180,12 +178,10 @@ class TestParserRISCV(unittest.TestCase):
        # Test floating-point registers
        reg_fa0 = RegisterOperand(prefix="f", name="a0")
        reg_fa1 = RegisterOperand(prefix="f", name="a1")
        reg_f10 = RegisterOperand(prefix="f", name="10")
        # Test vector registers
        reg_v1 = RegisterOperand(prefix="v", name="1")
        reg_v2 = RegisterOperand(prefix="v", name="2")
        # Test register type detection
        self.assertTrue(self.parser.is_gpr(reg_a0))