Add support for the Intel syntax supported by MSVC and ICC

2026-01-05 02:30:08 +01:00 · 2025-02-02 14:02:16 +01:00
parent 785a365c63
commit 1a7c1588f6
30 changed files with 2744 additions and 499 deletions
--- a/osaca/data/model_importer.py
+++ b/osaca/data/model_importer.py
@@ -111,7 +111,8 @@ def extract_model(tree, arch, skip_mem=True):
        print("Skipping...", file=sys.stderr)
        return None
    mm = MachineModel(isa=isa)
-    parser = get_parser(isa)
+    # The model uses the AT&T syntax.
+    parser = get_parser(isa, "ATT")

    for instruction_tag in tree.findall(".//instruction"):
        ignore = False
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -11,7 +11,7 @@ from ruamel.yaml import YAML

 from osaca.db_interface import import_benchmark_output, sanity_check
 from osaca.frontend import Frontend
-from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
+from osaca.parser import BaseParser, ParserAArch64, ParserX86, ParserX86ATT, ParserX86Intel
 from osaca.semantics import (
    INSTR_FLAGS,
    ArchSemantics,
@@ -47,6 +47,10 @@ DEFAULT_ARCHS = {
    "aarch64": "V2",
    "x86": "SPR",
 }
+SUPPORTED_SYNTAXES = [
+    "ATT",
+    "INTEL",
+]


 # Stolen from pip
@@ -108,6 +112,12 @@ def create_parser(parser=None):
        "ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a "
        "default uarch for x86/AArch64.",
    )
+    parser.add_argument(
+        "--syntax",
+        type=str,
+        help="Define the assembly syntax (ATT, Intel) for x86. If no syntax is given, OSACA "
+        "tries to determine automatically the syntax to use.",
+    )
    parser.add_argument(
        "--fixed",
        action="store_true",
@@ -232,6 +242,14 @@ def check_arguments(args, parser):
        parser.error(
            "Microarchitecture not supported. Please see --help for all valid architecture codes."
        )
+    if args.syntax and args.arch and MachineModel.get_isa_for_arch(args.arch) != "x86":
+        parser.error(
+            "Syntax can only be explicitly specified for an x86 microarchitecture"
+        )
+    if args.syntax and args.syntax.upper() not in SUPPORTED_SYNTAXES:
+        parser.error(
+            "Assembly syntax not supported. Please see --help for all valid assembly syntaxes."
+        )
    if "import_data" in args and args.import_data not in supported_import_files:
        parser.error(
            "Microbenchmark not supported for data import. Please see --help for all valid "
@@ -310,30 +328,56 @@ def inspect(args, output_file=sys.stdout):
    code = args.file.read()

    # Detect ISA if necessary
-    arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
-    print_arch_warning = False if args.arch else True
-    isa = MachineModel.get_isa_for_arch(arch)
+    detected_isa, detected_syntax = BaseParser.detect_ISA(code)
+    detected_arch = DEFAULT_ARCHS[detected_isa]
+
+    print_arch_warning = not args.arch
    verbose = args.verbose
    ignore_unknown = args.ignore_unknown

-    # Parse file
-    parser = get_asm_parser(arch)
-    try:
-        parsed_code = parser.parse_file(code)
-    except Exception as e:
-        # probably the wrong parser based on heuristic
-        if args.arch is None:
-            # change ISA and try again
-            arch = (
-                DEFAULT_ARCHS["x86"]
-                if BaseParser.detect_ISA(code) == "aarch64"
-                else DEFAULT_ARCHS["aarch64"]
-            )
-            isa = MachineModel.get_isa_for_arch(arch)
-            parser = get_asm_parser(arch)
+    # If the arch/syntax is explicitly specified, that's the only thing we'll try.  Otherwise, we'll
+    # look at all the possible archs/syntaxes, but with our detected arch/syntax last in the list,
+    # thus tried first.
+    if args.arch:
+        archs_to_try = [args.arch]
+    else:
+        archs_to_try = list(DEFAULT_ARCHS)
+        archs_to_try.remove(detected_arch)
+        archs_to_try.append(detected_arch)
+    if args.syntax:
+        syntaxes_to_try = [args.syntax]
+    else:
+        syntaxes_to_try = SUPPORTED_SYNTAXES + [None]
+        syntaxes_to_try.remove(detected_syntax)
+        syntaxes_to_try.append(detected_syntax)
+
+    # Filter the cross-product of archs and syntaxes to eliminate the combinations that don't make
+    # sense.
+    combinations_to_try = [
+        (arch, syntax)
+        for arch in archs_to_try
+        for syntax in syntaxes_to_try
+        if (syntax != None) == (MachineModel.get_isa_for_arch(arch) == "x86")
+    ]
+
+    # Parse file.
+    message = ""
+    single_combination = len(combinations_to_try) == 1
+    while True:
+        arch, syntax = combinations_to_try.pop()
+        parser = get_asm_parser(arch, syntax)
+        try:
            parsed_code = parser.parse_file(code)
-        else:
-            raise e
+            break
+        except Exception as e:
+            message += f"\nWith arch {arch} and syntax {syntax} got error: {e}."
+            # Either the wrong parser based on heuristic, or a bona fide syntax error (or
+            # unsupported syntax).  For ease of debugging, we emit the entire exception trace if
+            # we tried a single arch/syntax combination.  If we tried multiple combinations, we
+            # don't emit the traceback as it would apply to the latest combination tried, which is
+            # probably the less interesting.
+            if not combinations_to_try:
+                raise SyntaxError(message) from e if single_combination else None

    # Reduce to marked kernel or chosen section and add semantics
    if args.lines:
@@ -341,13 +385,14 @@ def inspect(args, output_file=sys.stdout):
        kernel = [line for line in parsed_code if line.line_number in line_range]
        print_length_warning = False
    else:
-        kernel = reduce_to_section(parsed_code, isa)
+        kernel = reduce_to_section(parsed_code, parser)
        # Print warning if kernel has no markers and is larger than threshold (100)
        print_length_warning = (
            True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
        )
    machine_model = MachineModel(arch=arch)
-    semantics = ArchSemantics(machine_model)
+    semantics = ArchSemantics(parser, machine_model)
+    semantics.normalize_instruction_forms(kernel)
    semantics.add_semantics(kernel)
    # Do optimal schedule for kernel throughput if wished
    if not args.fixed:
@@ -417,7 +462,7 @@ def run(args, output_file=sys.stdout):


@lru_cache()
-def get_asm_parser(arch) -> BaseParser:
+def get_asm_parser(arch, syntax) -> BaseParser:
    """
    Helper function to create the right parser for a specific architecture.

@@ -427,7 +472,7 @@ def get_asm_parser(arch) -> BaseParser:
    """
    isa = MachineModel.get_isa_for_arch(arch)
    if isa == "x86":
-        return ParserX86ATT()
+        return ParserX86ATT() if syntax == "ATT" else ParserX86Intel()
    elif isa == "aarch64":
        return ParserAArch64()

--- a/osaca/parser/init.py
+++ b/osaca/parser/init.py
@@ -1,11 +1,13 @@
 """
 Collection of parsers supported by OSACA.

-Only the parser below will be exported, so please add new parsers to __all__.
+Only the parsers below will be exported, so please add new parsers to __all__.
 """

 from .base_parser import BaseParser
+from .parser_x86 import ParserX86
 from .parser_x86att import ParserX86ATT
+from .parser_x86intel import ParserX86Intel
 from .parser_AArch64 import ParserAArch64
 from .instruction_form import InstructionForm
 from .operand import Operand
@@ -14,15 +16,17 @@ __all__ = [
    "Operand",
    "InstructionForm",
    "BaseParser",
+    "ParserX86",
    "ParserX86ATT",
+    "ParserX86Intel",
    "ParserAArch64",
    "get_parser",
 ]


-def get_parser(isa):
+def get_parser(isa, syntax):
    if isa.lower() == "x86":
-        return ParserX86ATT()
+        return ParserX86ATT() if syntax == "ATT" else ParserX86Intel()
    elif isa.lower() == "aarch64":
        return ParserAArch64()
    else:
--- a/osaca/parser/base_parser.py
+++ b/osaca/parser/base_parser.py
@@ -3,6 +3,8 @@
 import operator
 import re

+from osaca.semantics.hw_model import MachineModel
+

 class BaseParser(object):
    # Identifiers for operand types
@@ -25,20 +27,62 @@ class BaseParser(object):
            self.construct_parser()
            self._parser_constructed = True

+    def isa(self):
+        # Done in derived classes
+        raise NotImplementedError
+
+    # The marker functions return lists of `InstructionForm` that are used to find the IACA markers
+    # in the parsed code.  In addition to just a list, the marker may have a structure like
+    # [I1, [I2, I3], I4, ...] where the nested list indicates that at least one of I2 and I3 must
+    # match the second instruction in the fragment of parsed code.
+    # If an instruction form is a `DirectiveOperand`, the match may happen over several directive
+    # operands in the parsed code, provided that the directives have the same name and the
+    # parameters are in sequence with respect to the pattern.  This provides an easy way to describe
+    # a sequence of bytes irrespective of the way it was grouped in the assembly source.
+    # Note that markers must be matched *before* normalization.
+    def start_marker(self):
+        # Done in derived classes
+        raise NotImplementedError
+
+    def end_marker(self):
+        # Done in derived classes
+        raise NotImplementedError
+
+    # Performs all the normalization needed to match the instruction to the ISO/arch model.  This
+    # method must set the `normalized` property of the instruction and must be idempotent.
+    def normalize_instruction_form(
+        self,
+        instruction_form,
+        isa_model: MachineModel,
+        arch_model: MachineModel
+    ):
+        raise NotImplementedError
+
    @staticmethod
    def detect_ISA(file_content):
-        """Detect the ISA of the assembly based on the used registers and return the ISA code."""
+        """
+        Detect the ISA of the assembly based on the used registers and return the ISA code.
+
+        :param str file_content: assembly code.
+        :return: a tuple isa, syntax describing the architecture and the assembly syntax,
+                 if appropriate.  If there is no notion of syntax, the second element is None.
+        """
        # Check for the amount of registers in the code to determine the ISA
        # 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86
+        #    AT&T syntax.  There is a % before each register name.
        heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"]
-        # 2) check for v and z vector registers and x/w general-purpose registers
+        # 2) Same as above, but for the Intel syntax.  There is no % before the register names.
+        heuristics_x86Intel = [r"[^%][xyz]mm[0-9]", r"[^%][er][abcd]x[0-9]"]
+        # 3) check for v and z vector registers and x/w general-purpose registers
        heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"]
-        matches = {"x86": 0, "aarch64": 0}
+        matches = {("x86", "ATT"): 0, ("x86", "INTEL"): 0, ("aarch64", None): 0}

        for h in heuristics_x86ATT:
-            matches["x86"] += len(re.findall(h, file_content))
+            matches[("x86", "ATT")] += len(re.findall(h, file_content))
+        for h in heuristics_x86Intel:
+            matches[("x86", "INTEL")] += len(re.findall(h, file_content))
        for h in heuristics_aarch64:
-            matches["aarch64"] += len(re.findall(h, file_content))
+            matches[("aarch64", None)] += len(re.findall(h, file_content))

        return max(matches.items(), key=operator.itemgetter(1))[0]

@@ -94,6 +138,14 @@ class BaseParser(object):
    def get_full_reg_name(self, register):
        raise NotImplementedError

+    # Must be called on a *normalized* instruction.
+    def get_regular_source_operands(self, instruction_form):
+        raise NotImplementedError
+
+    # Must be called on a *normalized* instruction.
+    def get_regular_destination_operands(self, instruction_form):
+        raise NotImplementedError
+
    def normalize_imd(self, imd):
        raise NotImplementedError

--- a/osaca/parser/identifier.py
+++ b/osaca/parser/identifier.py
@@ -41,3 +41,12 @@ class IdentifierOperand(Operand):

    def __repr__(self):
        return self.__str__()
+
+    def __eq__(self, other):
+        if isinstance(other, IdentifierOperand):
+            return (
+                self._name == other._name
+                and self._offset == other._offset
+                and self._relocation == other._relocation
+            )
+        return False
--- a/osaca/parser/instruction_form.py
+++ b/osaca/parser/instruction_form.py
@@ -19,6 +19,7 @@ class InstructionForm:
        port_pressure=None,
        operation=None,
        breaks_dependency_on_equal_operands=False,
+        normalized=False,
    ):
        self._mnemonic = mnemonic
        self._operands = operands
@@ -33,6 +34,7 @@ class InstructionForm:
        self._operation = operation
        self._uops = uops
        self._breaks_dependency_on_equal_operands = breaks_dependency_on_equal_operands
+        self._normalized = normalized
        self._latency = latency
        self._throughput = throughput
        self._latency_cp = []
@@ -42,6 +44,10 @@ class InstructionForm:
        self._port_uops = []
        self._flags = []

+    def check_normalized(self):
+        if not self._normalized:
+            raise AssertionError("Unnormalized instruction")
+
    @property
    def semantic_operands(self):
        return self._semantic_operands
@@ -114,6 +120,10 @@ class InstructionForm:
    def breaks_dependency_on_equal_operands(self):
        return self._breaks_dependency_on_equal_operands

+    @property
+    def normalized(self):
+        return self._normalized
+
    @semantic_operands.setter
    def semantic_operands(self, semantic_operands):
        self._semantic_operands = semantic_operands
@@ -142,6 +152,10 @@ class InstructionForm:
    def breaks_dependency_on_equal_operands(self, boolean):
        self._breaks_dependency_on_equal_operands = boolean

+    @normalized.setter
+    def normalized(self, normalized):
+        self._normalized = normalized
+
    @mnemonic.setter
    def mnemonic(self, mnemonic):
        self._mnemonic = mnemonic
--- a/osaca/parser/label.py
+++ b/osaca/parser/label.py
@@ -20,3 +20,8 @@ class LabelOperand(Operand):

    def __repr__(self):
        return self.__str__()
+
+    def __eq__(self, other):
+        if isinstance(other, LabelOperand):
+            return self._name == other._name
+        return False
--- a/osaca/parser/memory.py
+++ b/osaca/parser/memory.py
@@ -15,6 +15,7 @@ class MemoryOperand(Operand):
        pre_indexed=False,
        post_indexed=False,
        indexed_val=None,
+        data_type=None,
        src=None,
        dst=None,
        source=False,
@@ -30,6 +31,7 @@ class MemoryOperand(Operand):
        self._pre_indexed = pre_indexed
        self._post_indexed = post_indexed
        self._indexed_val = indexed_val
+        self._data_type = data_type
        # type of register we store from (`src`) or load to (`dst`)
        self._src = src
        self._dst = dst
@@ -74,6 +76,14 @@ class MemoryOperand(Operand):
    def indexed_val(self):
        return self._indexed_val

+    @property
+    def data_type(self):
+        return self._data_type
+
+    @data_type.setter
+    def data_type(self, data_type):
+        self._data_type = data_type
+
    @property
    def src(self):
        return self._src
--- a/osaca/parser/parser_AArch64.py
+++ b/osaca/parser/parser_AArch64.py
@@ -13,6 +13,7 @@ from osaca.parser.identifier import IdentifierOperand
 from osaca.parser.immediate import ImmediateOperand
 from osaca.parser.condition import ConditionOperand
 from osaca.parser.prefetch import PrefetchOperand
+from osaca.semantics.hw_model import MachineModel


 class ParserAArch64(BaseParser):
@@ -26,7 +27,58 @@ class ParserAArch64(BaseParser):

    def __init__(self):
        super().__init__()
-        self.isa = "aarch64"
+
+    def isa(self):
+        return "aarch64"
+
+    def start_marker(self):
+        return [
+            InstructionForm(
+                mnemonic="mov",
+                operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=111)]
+            ),
+            InstructionForm(
+                directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"])
+            )
+        ]
+
+    def end_marker(self):
+        return [
+            InstructionForm(
+                mnemonic="mov",
+                operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=222)]
+            ),
+            InstructionForm(
+                directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"])
+            )
+        ]
+
+    def normalize_instruction_form(
+        self,
+        instruction_form,
+        isa_model: MachineModel,
+        arch_model: MachineModel
+    ):
+        """
+        If the instruction doesn't exist in the machine model, normalize it by dropping the shape
+        suffix.
+        """
+        if instruction_form.normalized:
+            return
+        instruction_form.normalized = True
+
+        mnemonic = instruction_form.mnemonic
+        if not mnemonic:
+            return
+        model = arch_model.get_instruction(mnemonic, instruction_form.operands)
+        if not model:
+            if "." in mnemonic:
+                # Check for instruction without shape/cc suffix.
+                suffix_start = mnemonic.index(".")
+                mnemonic = mnemonic[:suffix_start]
+                model = arch_model.get_instruction(mnemonic, instruction_form.operands)
+                if model:
+                    instruction_form.mnemonic = mnemonic

    def construct_parser(self):
        """Create parser for ARM AArch64 ISA."""
@@ -589,6 +641,21 @@ class ParserAArch64(BaseParser):
            name += "[" + str(register.index) + "]"
        return name

+    def get_regular_source_operands(self, instruction_form):
+        """Get source operand of given instruction form assuming regular src/dst behavior."""
+        # if there is only one operand, assume it is a source operand
+        if len(instruction_form.operands) == 1:
+            return [instruction_form.operands[0]]
+        return [op for op in instruction_form.operands[1:]]
+
+    def get_regular_destination_operands(self, instruction_form):
+        """Get destination operand of given instruction form assuming regular src/dst behavior."""
+        # if there is only one operand, assume no destination
+        if len(instruction_form.operands) == 1:
+            return []
+        # return first operand
+        return instruction_form.operands[:1]
+
    def normalize_imd(self, imd):
        """Normalize immediate to decimal based representation"""
        if isinstance(imd, IdentifierOperand):
--- a/osaca/parser/parser_x86.py
+++ b/osaca/parser/parser_x86.py
@@ -0,0 +1,123 @@
+import re
+import string
+
+from osaca.parser import BaseParser
+
+
+class ParserX86(BaseParser):
+    _instance = None
+
+    # Singleton pattern, as this is created very many times.
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(ParserX86, cls).__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        super().__init__()
+
+    def isa(self):
+        return "x86"
+
+    def is_reg_dependend_of(self, reg_a, reg_b):
+        """Check if ``reg_a`` is dependent on ``reg_b``"""
+        reg_a_name = reg_a.name.upper()
+        reg_b_name = reg_b.name.upper()
+
+        # Check if they are the same registers
+        if reg_a_name == reg_b_name:
+            return True
+        # Check vector registers first
+        if self.is_vector_register(reg_a):
+            if self.is_vector_register(reg_b):
+                if reg_a_name[1:] == reg_b_name[1:]:
+                    # Registers in the same vector space
+                    return True
+            return False
+        # Check basic GPRs
+        gpr_groups = {
+            "A": ["RAX", "EAX", "AX", "AH", "AL"],
+            "B": ["RBX", "EBX", "BX", "BH", "BL"],
+            "C": ["RCX", "ECX", "CX", "CH", "CL"],
+            "D": ["RDX", "EDX", "DX", "DH", "DL"],
+            "SP": ["RSP", "ESP", "SP", "SPL"],
+            "SRC": ["RSI", "ESI", "SI", "SIL"],
+            "DST": ["RDI", "EDI", "DI", "DIL"],
+        }
+        if self.is_basic_gpr(reg_a):
+            if self.is_basic_gpr(reg_b):
+                for dep_group in gpr_groups.values():
+                    if reg_a_name in dep_group:
+                        if reg_b_name in dep_group:
+                            return True
+            return False
+
+        # Check other GPRs
+        ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name)
+        mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name)
+        if ma and mb and ma.group(1) == mb.group(1):
+            return True
+
+        # No dependencies
+        return False
+
+    def is_basic_gpr(self, register):
+        """Check if register is a basic general purpose register (ebi, rax, ...)"""
+        if any(char.isdigit() for char in register.name) or any(
+            register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"]
+        ):
+            return False
+        return True
+
+    def is_gpr(self, register):
+        """Check if register is a general purpose register"""
+        if register is None:
+            return False
+        if self.is_basic_gpr(register):
+            return True
+        return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE)
+
+    def is_vector_register(self, register):
+        """Check if register is a vector register"""
+        if register is None or register.name is None:
+            return False
+        if register.name.rstrip(string.digits).lower() in [
+            "mm",
+            "xmm",
+            "ymm",
+            "zmm",
+        ]:
+            return True
+        return False
+
+    def get_reg_type(self, register):
+        """Get register type"""
+        if register is None:
+            return False
+        if self.is_gpr(register):
+            return "gpr"
+        elif self.is_vector_register(register):
+            return register.name.rstrip(string.digits).lower()
+        raise ValueError
+
+    def is_flag_dependend_of(self, flag_a, flag_b):
+        """Check if ``flag_a`` is dependent on ``flag_b``"""
+        # we assume flags are independent of each other, e.g., CF can be read while ZF gets written
+        # TODO validate this assumption
+        return flag_a.name == flag_b.name
+
+    def get_regular_source_operands(self, instruction_form):
+        """Get source operand of given instruction form assuming regular src/dst behavior."""
+        # if there is only one operand, assume it is a source operand
+        if len(instruction_form.operands) == 1:
+            return [instruction_form.operands[0]]
+        # return all but last operand
+        return [op for op in instruction_form.operands[0:-1]]
+
+    def get_regular_destination_operands(self, instruction_form):
+        """Get destination operand of given instruction form assuming regular src/dst behavior."""
+        # if there is only one operand, assume no destination
+        if len(instruction_form.operands) == 1:
+            return []
+        # return last operand
+        return instruction_form.operands[-1:]
--- a/osaca/parser/parser_x86att.py
+++ b/osaca/parser/parser_x86att.py
@@ -5,7 +5,7 @@ import re

 import pyparsing as pp

-from osaca.parser import BaseParser
+from osaca.parser import ParserX86
 from osaca.parser.instruction_form import InstructionForm
 from osaca.parser.directive import DirectiveOperand
 from osaca.parser.memory import MemoryOperand
@@ -13,10 +13,12 @@ from osaca.parser.label import LabelOperand
 from osaca.parser.register import RegisterOperand
 from osaca.parser.identifier import IdentifierOperand
 from osaca.parser.immediate import ImmediateOperand
+from osaca.semantics.hw_model import MachineModel


-class ParserX86ATT(BaseParser):
+class ParserX86ATT(ParserX86):
    _instance = None
+    GAS_SUFFIXES = "bswlqt"

    # Singelton pattern, as this is created very many times
    def __new__(cls):
@@ -26,7 +28,66 @@ class ParserX86ATT(BaseParser):

    def __init__(self):
        super().__init__()
-        self.isa = "x86"
+
+    def start_marker(self):
+        return [
+            [
+                InstructionForm(
+                    mnemonic="mov",
+                    operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")]
+                 ),
+                 InstructionForm(
+                     mnemonic="movl",
+                     operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")]
+                 )
+            ],
+            InstructionForm(
+                directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])
+            )
+        ]
+
+    def end_marker(self):
+        return [
+            [
+                InstructionForm(
+                    mnemonic="mov",
+                    operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")]
+                 ),
+                 InstructionForm(
+                     mnemonic="movl",
+                     operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")]
+                 )
+            ],
+            InstructionForm(
+                directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])
+            )
+        ]
+
+    def normalize_instruction_form(
+        self,
+        instruction_form,
+        isa_model: MachineModel,
+        arch_model: MachineModel
+    ):
+        """
+        If the instruction doesn't exist in the machine model, normalize it by dropping the GAS
+        suffix.
+        """
+        if instruction_form.normalized:
+            return
+        instruction_form.normalized = True
+
+        mnemonic = instruction_form.mnemonic
+        if not mnemonic:
+            return
+        model = arch_model.get_instruction(mnemonic, instruction_form.operands)
+        if not model:
+            # Check for instruction without GAS suffix.
+            if mnemonic[-1] in self.GAS_SUFFIXES:
+                mnemonic = mnemonic[:-1]
+                model = arch_model.get_instruction(mnemonic, instruction_form.operands)
+                if model:
+                    instruction_form.mnemonic = mnemonic

    def construct_parser(self):
        """Create parser for x86 AT&T ISA."""
@@ -253,10 +314,10 @@ class ParserX86ATT(BaseParser):
        if result is None:
            try:
                result = self.parse_instruction(line)
-            except pp.ParseException:
+            except pp.ParseException as e:
                raise ValueError(
                    "Could not parse instruction on line {}: {!r}".format(line_number, line)
-                )
+                ) from e
            instruction_form.mnemonic = result.mnemonic
            instruction_form.operands = result.operands
            instruction_form.comment = result.comment
@@ -393,90 +454,3 @@ class ParserX86ATT(BaseParser):
                return imd.value
        # identifier
        return imd
-
-    def is_flag_dependend_of(self, flag_a, flag_b):
-        """Check if ``flag_a`` is dependent on ``flag_b``"""
-        # we assume flags are independent of each other, e.g., CF can be read while ZF gets written
-        # TODO validate this assumption
-        return flag_a.name == flag_b.name
-
-    def is_reg_dependend_of(self, reg_a, reg_b):
-        """Check if ``reg_a`` is dependent on ``reg_b``"""
-        reg_a_name = reg_a.name.upper()
-        reg_b_name = reg_b.name.upper()
-
-        # Check if they are the same registers
-        if reg_a_name == reg_b_name:
-            return True
-        # Check vector registers first
-        if self.is_vector_register(reg_a):
-            if self.is_vector_register(reg_b):
-                if reg_a_name[1:] == reg_b_name[1:]:
-                    # Registers in the same vector space
-                    return True
-            return False
-        # Check basic GPRs
-        gpr_groups = {
-            "A": ["RAX", "EAX", "AX", "AH", "AL"],
-            "B": ["RBX", "EBX", "BX", "BH", "BL"],
-            "C": ["RCX", "ECX", "CX", "CH", "CL"],
-            "D": ["RDX", "EDX", "DX", "DH", "DL"],
-            "SP": ["RSP", "ESP", "SP", "SPL"],
-            "SRC": ["RSI", "ESI", "SI", "SIL"],
-            "DST": ["RDI", "EDI", "DI", "DIL"],
-        }
-        if self.is_basic_gpr(reg_a):
-            if self.is_basic_gpr(reg_b):
-                for dep_group in gpr_groups.values():
-                    if reg_a_name in dep_group:
-                        if reg_b_name in dep_group:
-                            return True
-            return False
-
-        # Check other GPRs
-        ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name)
-        mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name)
-        if ma and mb and ma.group(1) == mb.group(1):
-            return True
-
-        # No dependencies
-        return False
-
-    def is_basic_gpr(self, register):
-        """Check if register is a basic general purpose register (ebi, rax, ...)"""
-        if any(char.isdigit() for char in register.name) or any(
-            register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"]
-        ):
-            return False
-        return True
-
-    def is_gpr(self, register):
-        """Check if register is a general purpose register"""
-        if register is None:
-            return False
-        if self.is_basic_gpr(register):
-            return True
-        return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE)
-
-    def is_vector_register(self, register):
-        """Check if register is a vector register"""
-        if register is None or register.name is None:
-            return False
-        if register.name.rstrip(string.digits).lower() in [
-            "mm",
-            "xmm",
-            "ymm",
-            "zmm",
-        ]:
-            return True
-        return False
-
-    def get_reg_type(self, register):
-        """Get register type"""
-        if register is None:
-            return False
-        if self.is_gpr(register):
-            return "gpr"
-        elif self.is_vector_register(register):
-            return register.name.rstrip(string.digits).lower()
-        raise ValueError
--- a/osaca/parser/parser_x86intel.py
+++ b/osaca/parser/parser_x86intel.py
@@ -0,0 +1,830 @@
+#!/usr/bin/env python3
+
+import pyparsing as pp
+import re
+import string
+import unicodedata
+
+from osaca.parser import ParserX86
+from osaca.parser.directive import DirectiveOperand
+from osaca.parser.identifier import IdentifierOperand
+from osaca.parser.immediate import ImmediateOperand
+from osaca.parser.instruction_form import InstructionForm
+from osaca.parser.label import LabelOperand
+from osaca.parser.memory import MemoryOperand
+from osaca.parser.register import RegisterOperand
+from osaca.semantics.hw_model import MachineModel
+
+# We assume any non-ASCII characters except control characters and line terminators can be part of
+# identifiers; this is based on the assumption that no assembler uses non-ASCII white space and
+# syntax characters.
+# This approach is described at the end of https://www.unicode.org/reports/tr55/#Whitespace-Syntax.
+# It is appropriate for tools, such as this one, which process source code but do not fully validate
+# it (in this case, that’s the job of the assembler).
+NON_ASCII_PRINTABLE_CHARACTERS = "".join(
+    chr(cp) for cp in range(0x80, 0x10FFFF + 1)
+    if unicodedata.category(chr(cp)) not in ("Cc", "Zl", "Zp", "Cs", "Cn")
+)
+
+# References:
+#   ASM386 Assembly Language Reference, document number 469165-003, https://mirror.math.princeton.edu/pub/oldlinux/Linux.old/Ref-docs/asm-ref.pdf.
+#   Microsoft Macro Assembler BNF Grammar, https://learn.microsoft.com/en-us/cpp/assembler/masm/masm-bnf-grammar?view=msvc-170.
+#   Intel Architecture Code Analyzer User's Guide, https://www.intel.com/content/dam/develop/external/us/en/documents/intel-architecture-code-analyzer-3-0-users-guide-157552.pdf.
+class ParserX86Intel(ParserX86):
+    _instance = None
+
+    # Singleton pattern, as this is created very many times.
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(ParserX86Intel, cls).__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        super().__init__()
+        self._equ = {}
+
+    # The IACA manual says: "For For Microsoft* Visual C++ compiler, 64-bit version, use
+    # IACA_VC64_START and IACA_VC64_END, instead" (of IACA_START and IACA_END).
+    # TODO: Inconveniently, the code generated with optimization disabled (/Od) has two
+    # instructions.  We should support both patterns, but then who runs OSACA with /Od?
+    def start_marker(self):
+        return [
+            InstructionForm(
+                mnemonic="mov",
+                operands=[
+                    MemoryOperand(
+                        base=RegisterOperand(name="GS"),
+                        offset=ImmediateOperand(value=111)
+                    ),
+                    ImmediateOperand(value=111)
+                ]
+            ),
+        ]
+
+    def end_marker(self):
+        return [
+            InstructionForm(
+                mnemonic="mov",
+                operands=[
+                    MemoryOperand(
+                        base=RegisterOperand(name="GS"),
+                        offset=ImmediateOperand(value=222)
+                    ),
+                    ImmediateOperand(value=222)
+                ]
+            ),
+        ]
+
+    def normalize_instruction_form(
+        self,
+        instruction_form,
+        isa_model: MachineModel,
+        arch_model: MachineModel
+    ):
+        """
+        If the model indicates that this instruction has a single destination that is the last
+        operand, move the first operand to the last position.  This effectively converts the Intel
+        syntax to the AT&T one.
+        """
+        if instruction_form.normalized:
+            return
+        instruction_form.normalized = True
+
+        mnemonic = instruction_form.mnemonic
+        if not mnemonic:
+            return
+
+        # The model may only contain the VEX-encoded instruction and we may have the non-VEX-encoded
+        # one, or vice-versa.  Note that this doesn't work when the arguments differ between VEX-
+        # encoded and non-VEX-encoded, e.g., for psubq.
+        if not arch_model.get_instruction(
+            mnemonic,
+            len(instruction_form.operands)
+        ):
+            if mnemonic[0] == 'v':
+                unvexed_mnemonic = mnemonic[1:]
+                if arch_model.get_instruction(
+                    unvexed_mnemonic,
+                    len(instruction_form.operands)
+                ):
+                    mnemonic = unvexed_mnemonic
+            else:
+                vexed_mnemonic = 'v' + mnemonic
+                if arch_model.get_instruction(
+                    vexed_mnemonic,
+                    len(instruction_form.operands)
+                ):
+                    mnemonic = vexed_mnemonic
+            instruction_form.mnemonic = mnemonic
+
+        # We cannot pass the operands because they may not match before the reordering.  We just
+        # pass the arity instead.  Also, this must use the ISA model, because that's where the
+        # source/destination information is found.
+        model = isa_model.get_instruction(mnemonic, len(instruction_form.operands))
+        has_single_destination_at_end = False
+        has_destination = False
+        if model:
+            for o in model.operands:
+                if o.source:
+                    if has_destination:
+                        has_single_destination_at_end = False
+                if o.destination:
+                    if has_destination:
+                        has_single_destination_at_end = False
+                    else:
+                        has_destination = True
+                        has_single_destination_at_end = True
+        else:
+            # if there is only one operand, assume it is a source operand
+            has_single_destination_at_end = len(instruction_form.operands) > 1
+
+        if has_single_destination_at_end:
+            # It is important to reverse the operands, we cannot just move the first one last.  This
+            # makes a difference for instructions with 3 operands or more, such as roundsd: the
+            # model files expect the rounding mode (an immediate) first but the Intel syntax has it
+            # last.
+            instruction_form.operands.reverse()
+
+        # A hack to help with comparison instruction: if the instruction is in the model, and has
+        # exactly two sources, swap its operands.
+        if (model and
+            not has_destination and
+            len(instruction_form.operands) == 2
+            and not isa_model.get_instruction(
+                mnemonic,
+                instruction_form.operands
+            ) and not arch_model.get_instruction(
+                mnemonic,
+                instruction_form.operands
+            )):
+            instruction_form.operands.reverse()
+
+        # If the instruction has a well-known data type, append a suffix.
+        data_type_to_suffix = {"DWORD": "d", "QWORD": "q"}
+        for o in instruction_form.operands:
+            if isinstance(o, MemoryOperand) and o.data_type:
+                suffix = data_type_to_suffix.get(o.data_type, None)
+                if suffix:
+                    suffixed_mnemonic = mnemonic + suffix
+                    if isa_model.get_instruction(
+                        suffixed_mnemonic,
+                        len(instruction_form.operands)
+                    ) or arch_model.get_instruction(
+                        suffixed_mnemonic,
+                        len(instruction_form.operands)
+                    ):
+                        instruction_form.mnemonic = suffixed_mnemonic
+                        break
+
+
+    def construct_parser(self):
+        """Create parser for x86 Intel ISA."""
+        # Numeric literal.
+        binary_number = pp.Combine(
+            pp.Word("01") + pp.CaselessLiteral("B")
+        )
+        octal_number = pp.Combine(
+            pp.Word("01234567") + pp.CaselessLiteral("O")
+        )
+        decimal_number = pp.Combine(
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
+        )
+        hex_number = pp.Combine(
+            pp.Word(pp.hexnums) + pp.CaselessLiteral("H")
+        )
+        float_number = pp.Combine(
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + pp.Word(".", pp.nums)
+        ).setResultsName("value")
+        integer_number = (
+            binary_number ^ octal_number ^ decimal_number ^ hex_number
+        ).setResultsName("value")
+
+        # Comment.
+        self.comment = pp.Word(";#", exact=1) + pp.Group(
+            pp.ZeroOrMore(pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS))
+        ).setResultsName(self.comment_id)
+
+        # Types.
+        data_type = (
+            pp.CaselessKeyword("BYTE")
+            | pp.CaselessKeyword("DWORD")
+            | pp.CaselessKeyword("FWORD")
+            | pp.CaselessKeyword("MMWORD")
+            | pp.CaselessKeyword("OWORD")
+            | pp.CaselessKeyword("QWORD")
+            | pp.CaselessKeyword("REAL10")
+            | pp.CaselessKeyword("REAL4")
+            | pp.CaselessKeyword("REAL8")
+            | pp.CaselessKeyword("SBYTE")
+            | pp.CaselessKeyword("SDWORD")
+            | pp.CaselessKeyword("SQWORD")
+            | pp.CaselessKeyword("SWORD")
+            | pp.CaselessKeyword("TBYTE")
+            | pp.CaselessKeyword("WORD")
+            | pp.CaselessKeyword("XMMWORD")
+            | pp.CaselessKeyword("YMMWORD")
+        ).setResultsName("data_type")
+
+        # Identifier.  Note that $ is not mentioned in the ASM386 Assembly Language Reference,
+        # but it is mentioned in the MASM syntax.  < and > apparently show up in C++ mangled names.
+        # ICC allows ".", at least in labels.
+        first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>", exact=1)
+        rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>")
+        identifier = pp.Group(
+            pp.Combine(first + pp.Optional(rest)).setResultsName("name")
+        ).setResultsName("identifier")
+
+        # Register.
+        # This follows the MASM grammar.
+        special_register = (
+            pp.CaselessKeyword("CR0")
+            | pp.CaselessKeyword("CR2")
+            | pp.CaselessKeyword("CR3")
+            | pp.CaselessKeyword("DR0")
+            | pp.CaselessKeyword("DR1")
+            | pp.CaselessKeyword("DR2")
+            | pp.CaselessKeyword("DR3")
+            | pp.CaselessKeyword("DR6")
+            | pp.CaselessKeyword("DR7")
+            | pp.CaselessKeyword("TR3")
+            | pp.CaselessKeyword("TR4")
+            | pp.CaselessKeyword("TR5")
+            | pp.CaselessKeyword("TR6")
+            | pp.CaselessKeyword("TR7")
+        ).setResultsName("name")
+        gp_register = (
+            pp.CaselessKeyword("AX")
+            | pp.CaselessKeyword("EAX")
+            | pp.CaselessKeyword("CX")
+            | pp.CaselessKeyword("ECX")
+            | pp.CaselessKeyword("DX")
+            | pp.CaselessKeyword("EDX")
+            | pp.CaselessKeyword("BX")
+            | pp.CaselessKeyword("EBX")
+            | pp.CaselessKeyword("DI")
+            | pp.CaselessKeyword("EDI")
+            | pp.CaselessKeyword("SI")
+            | pp.CaselessKeyword("ESI")
+            | pp.CaselessKeyword("BP")
+            | pp.CaselessKeyword("EBP")
+            | pp.CaselessKeyword("SP")
+            | pp.CaselessKeyword("ESP")
+            | pp.CaselessKeyword("R8W")
+            | pp.CaselessKeyword("R8D")
+            | pp.CaselessKeyword("R9W")
+            | pp.CaselessKeyword("R9D")
+            | pp.CaselessKeyword("R12D")
+            | pp.CaselessKeyword("R13W")
+            | pp.CaselessKeyword("R13D")
+            | pp.CaselessKeyword("R14W")
+            | pp.CaselessKeyword("R14D")
+        ).setResultsName("name")
+        byte_register = (
+            pp.CaselessKeyword("AL")
+            | pp.CaselessKeyword("AH")
+            | pp.CaselessKeyword("CL")
+            | pp.CaselessKeyword("CH")
+            | pp.CaselessKeyword("DL")
+            | pp.CaselessKeyword("DH")
+            | pp.CaselessKeyword("BL")
+            | pp.CaselessKeyword("BH")
+            | pp.CaselessKeyword("R8B")
+            | pp.CaselessKeyword("R9B")
+            | pp.CaselessKeyword("R10B")
+            | pp.CaselessKeyword("R11B")
+            | pp.CaselessKeyword("R12B")
+            | pp.CaselessKeyword("R13B")
+        ).setResultsName("name")
+        qword_register = (
+            pp.CaselessKeyword("RAX")
+            | pp.CaselessKeyword("RCX")
+            | pp.CaselessKeyword("RDX")
+            | pp.CaselessKeyword("RBX")
+            | pp.CaselessKeyword("RSP")
+            | pp.CaselessKeyword("RBP")
+            | pp.CaselessKeyword("RSI")
+            | pp.CaselessKeyword("RDI")
+            | pp.CaselessKeyword("R8")
+            | pp.CaselessKeyword("R9")
+            | pp.CaselessKeyword("R10")
+            | pp.CaselessKeyword("R11")
+            | pp.CaselessKeyword("R12")
+            | pp.CaselessKeyword("R13")
+            | pp.CaselessKeyword("R14")
+            | pp.CaselessKeyword("R15")
+        ).setResultsName("name")
+        fpu_register = pp.Combine(
+            pp.CaselessKeyword("ST")
+            + pp.Optional(pp.Literal("(") + pp.Word("01234567") + pp.Literal(")"))
+        ).setResultsName("name")
+        xmm_register = (
+            pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums))
+            | pp.Combine(pp.CaselessLiteral("XMM1") + pp.Word("012345"))
+        )
+        simd_register = (
+            pp.Combine(pp.CaselessLiteral("MM") + pp.Word("01234567"))
+            | xmm_register
+            | pp.Combine(pp.CaselessLiteral("YMM") + pp.Word(pp.nums))
+            | pp.Combine(pp.CaselessLiteral("YMM1") + pp.Word("012345"))
+        ).setResultsName("name")
+        segment_register = (
+            pp.CaselessKeyword("CS")
+            | pp.CaselessKeyword("DS")
+            | pp.CaselessKeyword("ES")
+            | pp.CaselessKeyword("FS")
+            | pp.CaselessKeyword("GS")
+            | pp.CaselessKeyword("SS")
+        ).setResultsName("name")
+        self.register = pp.Group(
+            special_register
+            | gp_register
+            | byte_register
+            | qword_register
+            | fpu_register
+            | simd_register
+            | segment_register
+            | pp.CaselessKeyword("RIP")
+        ).setResultsName(self.register_id)
+
+        # Register expressions.
+        base_register = self.register
+        index_register = self.register
+        scale = pp.Word("1248", exact=1)
+        post_displacement = pp.Group(
+            (pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign")
+            + integer_number | identifier
+        ).setResultsName(self.immediate_id)
+        pre_displacement = pp.Group(integer_number + pp.Literal("+")
+        ).setResultsName(self.immediate_id)
+        indexed = pp.Group(
+            index_register.setResultsName("index")
+            + pp.Optional(pp.Literal("*")
+            + scale.setResultsName("scale"))
+        ).setResultsName("indexed")
+        register_expression = pp.Group(
+            pp.Literal("[")
+            + pp.Optional(pp.Group(pre_displacement).setResultsName("pre_displacement"))
+            + pp.Group(
+                base_register.setResultsName("base")
+                ^ pp.Group(
+                    base_register.setResultsName("base")
+                    + pp.Literal("+")
+                    + indexed).setResultsName("base_and_indexed")
+                ^ indexed
+               ).setResultsName("non_displacement")
+            + pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement"))
+            + pp.Literal("]")
+        ).setResultsName("register_expression")
+
+        # Immediate.
+        immediate = pp.Group(
+            integer_number | float_number | identifier
+        ).setResultsName(self.immediate_id)
+
+        # Expressions.
+        # The ASM86 manual has weird expressions on page 130 (displacement outside of the register
+        # expression, multiple register expressions).  Let's ignore those for now, but see
+        # https://stackoverflow.com/questions/71540754/why-sometimes-use-offset-flatlabel-and-sometimes-not.
+        address_expression = pp.Group(
+            self.register.setResultsName("segment") + pp.Literal(":") + immediate
+            ^ immediate + register_expression
+            ^ register_expression
+            ^ identifier + pp.Optional(pp.Literal("+") + immediate)
+        ).setResultsName("address_expression")
+
+        offset_expression = pp.Group(
+            pp.CaselessKeyword("OFFSET")
+            + pp.Group(
+                pp.CaselessKeyword("GROUP")
+                | pp.CaselessKeyword("SEGMENT")
+                | pp.CaselessKeyword("FLAT")
+            )
+            # The MASM grammar has the ":" immediately after "OFFSET", but that's not what MSVC
+            # outputs.
+            + pp.Literal(":")
+            + identifier.setResultsName("identifier")
+            + pp.Optional(pp.Literal("+") + immediate.setResultsName("displacement"))
+        ).setResultsName("offset_expression")
+        ptr_expression = pp.Group(
+            data_type + pp.CaselessKeyword("PTR") + address_expression
+        ).setResultsName("ptr_expression")
+        short_expression = pp.Group(
+            pp.CaselessKeyword("SHORT") + identifier
+        ).setResultsName("short_expression")
+
+        # Instructions.
+        mnemonic = pp.Word(
+            pp.alphas, pp.alphanums
+        ).setResultsName("mnemonic")
+        operand = pp.Group(
+            self.register
+            | pp.Group(
+                offset_expression
+                | ptr_expression
+                | short_expression
+                | address_expression
+            ).setResultsName(self.memory_id)
+            | immediate
+        )
+        self.instruction_parser = (
+            mnemonic
+            + pp.Optional(operand.setResultsName("operand1"))
+            + pp.Optional(pp.Suppress(pp.Literal(",")))
+            + pp.Optional(operand.setResultsName("operand2"))
+            + pp.Optional(pp.Suppress(pp.Literal(",")))
+            + pp.Optional(operand.setResultsName("operand3"))
+            + pp.Optional(pp.Suppress(pp.Literal(",")))
+            + pp.Optional(operand.setResultsName("operand4"))
+            + pp.Optional(self.comment)
+        )
+
+        # Label.
+        self.label = pp.Group(
+            identifier.setResultsName("name")
+            + pp.Literal(":")
+            + pp.Optional(self.instruction_parser)
+            + pp.Optional(self.comment)
+        ).setResultsName(self.label_id)
+
+        # Directives.
+        # The identifiers at the beginnig of a directive cannot start with a "." otherwise we end up
+        # with ambiguities.
+        directive_first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + "$?@_<>", exact=1)
+        directive_rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>")
+        directive_identifier = pp.Group(
+            pp.Combine(directive_first + pp.Optional(directive_rest)).setResultsName("name")
+        ).setResultsName("identifier")
+
+        # Parameter can be any quoted string or sequence of characters besides ';' (for comments)
+        # or ',' (parameter delimiter).  See ASM386 p. 38.
+        directive_parameter = (
+            pp.quotedString
+            ^ (
+                pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS, excludeChars=",;")
+                + pp.Optional(pp.Suppress(pp.Literal(",")))
+            )
+            ^ pp.Suppress(pp.Literal(","))
+        )
+        # The directives that don't start with a "." are ambiguous with instructions, so we list
+        # them explicitly.
+        # TODO: The directives that are types introduce a nasty ambiguity with instructions.  Skip
+        # them for now, apparently the MSVC output uses the short D? directives.
+        directive_keywords = (
+            pp.CaselessKeyword("ALIAS")
+            | pp.CaselessKeyword("ALIGN")
+            | pp.CaselessKeyword("ASSUME")
+            #| pp.CaselessKeyword("BYTE")
+            | pp.CaselessKeyword("CATSTR")
+            | pp.CaselessKeyword("COMM")
+            | pp.CaselessKeyword("COMMENT")
+            | pp.CaselessKeyword("DB")
+            | pp.CaselessKeyword("DD")
+            | pp.CaselessKeyword("DF")
+            | pp.CaselessKeyword("DQ")
+            | pp.CaselessKeyword("DT")
+            | pp.CaselessKeyword("DW")
+            #| pp.CaselessKeyword("DWORD")
+            | pp.CaselessKeyword("ECHO")
+            | pp.CaselessKeyword("END")
+            | pp.CaselessKeyword("ENDP")
+            | pp.CaselessKeyword("ENDS")
+            | pp.CaselessKeyword("EQU")
+            | pp.CaselessKeyword("EVEN")
+            | pp.CaselessKeyword("EXTRN")
+            | pp.CaselessKeyword("EXTERNDEF")
+            #| pp.CaselessKeyword("FWORD")
+            | pp.CaselessKeyword("GROUP")
+            | pp.CaselessKeyword("INCLUDE")
+            | pp.CaselessKeyword("INCLUDELIB")
+            | pp.CaselessKeyword("INSTR")
+            | pp.CaselessKeyword("INVOKE")
+            | pp.CaselessKeyword("LABEL")
+            #| pp.CaselessKeyword("MMWORD")
+            | pp.CaselessKeyword("OPTION")
+            | pp.CaselessKeyword("ORG")
+            | pp.CaselessKeyword("PAGE")
+            | pp.CaselessKeyword("POPCONTEXT")
+            | pp.CaselessKeyword("PROC")
+            | pp.CaselessKeyword("PROTO")
+            | pp.CaselessKeyword("PUBLIC")
+            | pp.CaselessKeyword("PUSHCONTEXT")
+            #| pp.CaselessKeyword("QWORD")
+            #| pp.CaselessKeyword("REAL10")
+            #| pp.CaselessKeyword("REAL4")
+            #| pp.CaselessKeyword("REAL8")
+            | pp.CaselessKeyword("RECORD")
+            #| pp.CaselessKeyword("SBYTE")
+            #| pp.CaselessKeyword("SDWORD")
+            | pp.CaselessKeyword("SEGMENT")
+            | pp.CaselessKeyword("SIZESTR")
+            | pp.CaselessKeyword("STRUCT")
+            | pp.CaselessKeyword("SUBSTR")
+            | pp.CaselessKeyword("SUBTITLE")
+            #| pp.CaselessKeyword("SWORD")
+            #| pp.CaselessKeyword("TBYTE")
+            | pp.CaselessKeyword("TEXTEQU")
+            | pp.CaselessKeyword("TITLE")
+            | pp.CaselessKeyword("TYPEDEF")
+            | pp.CaselessKeyword("UNION")
+            #| pp.CaselessKeyword("WORD")
+            #| pp.CaselessKeyword("XMMWORD")
+            #| pp.CaselessKeyword("YMMWORD")
+        )
+        self.directive = pp.Group(
+            pp.Optional(~directive_keywords + directive_identifier)
+            + (
+                pp.Combine(pp.Literal(".") + pp.Word(pp.alphanums + "_"))
+                | pp.Literal("=")
+                | directive_keywords
+            ).setResultsName("name")
+            + pp.ZeroOrMore(directive_parameter).setResultsName("parameters")
+            + pp.Optional(self.comment)
+        ).setResultsName(self.directive_id)
+
+    def parse_line(self, line, line_number=None):
+        """
+        Parse line and return instruction form.
+
+        :param str line: line of assembly code
+        :param line_number: default None, identifier of instruction form
+        :type line_number: int, optional
+        :return: ``dict`` -- parsed asm line (comment, label, directive or instruction form)
+        """
+        instruction_form = InstructionForm(line=line, line_number=line_number)
+        result = None
+
+        # 1. Parse comment.
+        try:
+            result = self.process_operand(self.comment.parseString(line, parseAll=True))
+            instruction_form.comment = " ".join(result[self.comment_id])
+        except pp.ParseException:
+            pass
+
+        # 2. Parse label.
+        if not result:
+            try:
+                # Returns tuple with label operand and comment, if any.
+                result = self.process_operand(self.label.parseString(line, parseAll=True))
+                instruction_form.label = result[0].name
+                if result[1]:
+                    instruction_form.comment = " ".join(result[1])
+            except pp.ParseException:
+                pass
+
+        # 3. Parse directive.
+        if not result:
+            try:
+                # Returns tuple with directive operand and comment, if any.
+                result = self.process_operand(self.directive.parseString(line, parseAll=True))
+                instruction_form.directive = result[0]
+                if result[1]:
+                    instruction_form.comment = " ".join(result[1])
+            except pp.ParseException:
+                pass
+
+        # 4. Parse instruction.
+        if not result:
+            try:
+                result = self.parse_instruction(line)
+            except pp.ParseException as e:
+                raise ValueError(
+                    "Could not parse instruction on line {}: {!r}".format(line_number, line)
+                ) from e
+            instruction_form.mnemonic = result.mnemonic
+            instruction_form.operands = result.operands
+            instruction_form.comment = result.comment
+        return instruction_form
+
+    def make_instruction(self, parse_result):
+        """
+        Parse instruction in asm line.
+
+        :param parse_result: tuple resulting from calling `parseString` on the `instruction_parser`.
+        :returns: `dict` -- parsed instruction form
+        """
+        operands = []
+        # Add operands to list
+        # Check first operand
+        if "operand1" in parse_result:
+            operands.append(self.process_operand(parse_result.operand1))
+        # Check second operand
+        if "operand2" in parse_result:
+            operands.append(self.process_operand(parse_result.operand2))
+        # Check third operand
+        if "operand3" in parse_result:
+            operands.append(self.process_operand(parse_result.operand3))
+        # Check fourth operand
+        if "operand4" in parse_result:
+            operands.append(self.process_operand(parse_result.operand4))
+        return_dict = InstructionForm(
+            mnemonic=parse_result.mnemonic,
+            operands=operands,
+            label_id=None,
+            comment_id=" ".join(parse_result[self.comment_id])
+                       if self.comment_id in parse_result else None,
+        )
+
+        return return_dict
+
+    def parse_instruction(self, instruction):
+        """
+        Parse instruction in asm line.
+
+        :param str instruction: Assembly line string.
+        :returns: `dict` -- parsed instruction form
+        """
+        return self.make_instruction(
+            self.instruction_parser.parseString(instruction, parseAll=True)
+        )
+
+    def parse_register(self, register_string):
+        """Parse register string"""
+        try:
+            return self.process_operand(
+                self.register.parseString(register_string, parseAll=True)
+            )
+        except pp.ParseException:
+            return None
+
+    def process_operand(self, operand):
+        """Post-process operand"""
+        if self.directive_id in operand:
+            return self.process_directive(operand[self.directive_id])
+        if self.identifier in operand:
+            return self.process_identifier(operand[self.identifier])
+        if self.immediate_id in operand:
+            return self.process_immediate(operand[self.immediate_id])
+        if self.label_id in operand:
+            return self.process_label(operand[self.label_id])
+        if self.memory_id in operand:
+            return self.process_memory_address(operand[self.memory_id])
+        if self.register_id in operand:
+            return self.process_register(operand[self.register_id])
+        return operand
+
+    def process_directive(self, directive):
+        # TODO: This is putting the identifier in the parameters.  No idea if it's right.
+        parameters = [directive.identifier.name] if "identifier" in directive else []
+        parameters.extend(directive.parameters)
+        directive_new = DirectiveOperand(
+            name=directive.name,
+            parameters=parameters or None
+        )
+        # Interpret the "=" directives because the generated assembly is full of symbols that are
+        # defined there.
+        if directive.name == "=":
+            self._equ[parameters[0]] = parameters[1]
+        return directive_new, directive.get("comment")
+
+    def process_register(self, operand):
+        return RegisterOperand(name=operand.name)
+
+    def process_register_expression(self, register_expression):
+        pre_displacement = register_expression.get("pre_displacement")
+        post_displacement = register_expression.get("post_displacement")
+        non_displacement = register_expression.get("non_displacement")
+        base = None
+        indexed = None
+        if non_displacement:
+            base_and_indexed = non_displacement.get("base_and_indexed")
+            if base_and_indexed:
+                base = base_and_indexed.get("base")
+                indexed = base_and_indexed.get("indexed")
+            else:
+                base = non_displacement.get("base")
+                if not base:
+                    indexed = non_displacement.get("indexed")
+        if indexed:
+            index = indexed.get("index")
+            scale = int(indexed.get("scale", "1"), 0)
+        else:
+            index = None
+            scale = 1
+        displacement_op = (
+            self.process_immediate(pre_displacement.immediate) if pre_displacement else None
+        )
+        displacement_op = (
+            self.process_immediate(post_displacement.immediate)
+            if post_displacement else displacement_op
+        )
+        base_op = RegisterOperand(name=base.name) if base else None
+        index_op = RegisterOperand(name=index.name) if index else None
+        new_memory = MemoryOperand(offset=displacement_op, base=base_op, index=index_op, scale=scale)
+        return new_memory
+
+    def process_address_expression(self, address_expression, data_type=None):
+        # TODO: It seems that we could have a prefix immediate operand, a displacement in the
+        # brackets, and an offset.  How all of this works together is somewhat mysterious.
+        immediate_operand = (
+            self.process_immediate(address_expression.immediate)
+            if "immediate" in address_expression else None
+        )
+        register_expression = (
+            self.process_register_expression(address_expression.register_expression)
+            if "register_expression" in address_expression else None
+        )
+        segment = (
+            self.process_register(address_expression.segment)
+            if "segment" in address_expression else None
+        )
+        identifier = (
+            self.process_identifier(address_expression.identifier)
+            if "identifier" in address_expression else None
+        )
+        if register_expression:
+            if immediate_operand:
+                register_expression.offset = immediate_operand
+            if data_type:
+                register_expression.data_type = data_type
+            return register_expression
+        elif segment:
+            return MemoryOperand(base=segment, offset=immediate_operand, data_type=data_type)
+        elif identifier:
+            if immediate_operand:
+                identifier.offset = immediate_operand
+            elif not data_type:
+                # An address expression without a data type or an offset is just an identifier.
+                # This matters for jumps.
+                return identifier
+            return MemoryOperand(offset=identifier, data_type=data_type)
+        else:
+            return MemoryOperand(base=immediate_operand, data_type=data_type)
+
+    def process_offset_expression(self, offset_expression):
+        # TODO: Record that this is an offset expression.
+        displacement = (
+            self.process_immediate(offset_expression.displacement)
+            if "displacement" in offset_expression else None
+        )
+        identifier = self.process_identifier(offset_expression.identifier)
+        identifier.offset = displacement
+        return MemoryOperand(offset=identifier)
+
+    def process_ptr_expression(self, ptr_expression):
+        # TODO: Do something with the data_type.
+        return self.process_address_expression(
+            ptr_expression.address_expression,
+            ptr_expression.data_type
+        )
+
+    def process_short_expression(self, short_expression):
+        # TODO: Do something with the fact that it is short.
+        return LabelOperand(name=short_expression.identifier.name)
+
+    def process_memory_address(self, memory_address):
+        """Post-process memory address operand"""
+        if "address_expression" in memory_address:
+            return self.process_address_expression(memory_address.address_expression)
+        elif "offset_expression" in memory_address:
+            return self.process_offset_expression(memory_address.offset_expression)
+        elif "ptr_expression" in memory_address:
+            return self.process_ptr_expression(memory_address.ptr_expression)
+        elif "short_expression" in memory_address:
+            return self.process_short_expression(memory_address.short_expression)
+        return memory_address
+
+    def process_label(self, label):
+        """Post-process label asm line"""
+        # Remove duplicated 'name' level due to identifier.  Note that there is no place to put the
+        # comment, if any.
+        label["name"] = label["name"]["name"]
+        return (LabelOperand(name=label.name),
+                self.make_instruction(label) if "mnemonic" in label else None)
+
+    def process_immediate(self, immediate):
+        """Post-process immediate operand"""
+        if "identifier" in immediate:
+            # Actually an identifier, change declaration.
+            return self.process_identifier(immediate.identifier)
+        new_immediate = ImmediateOperand(value=immediate.get("sign", "") + immediate.value)
+        new_immediate.value = self.normalize_imd(new_immediate)
+        return new_immediate
+
+    def process_identifier(self, identifier):
+        if identifier.name in self._equ:
+            # Actually an immediate, change declaration.
+            new_immediate = ImmediateOperand(
+                identifier=identifier.name,
+                value=self._equ[identifier.name]
+            )
+            new_immediate.value = self.normalize_imd(new_immediate)
+            return new_immediate
+        return IdentifierOperand(name=identifier.name)
+
+    def normalize_imd(self, imd):
+        """Normalize immediate to decimal based representation"""
+        if isinstance(imd.value, str):
+            if '.' in imd.value:
+                return float(imd.value)
+            # Now parse depending on the base.
+            base = {'B': 2, 'O': 8, 'H': 16}.get(imd.value[-1], 10)
+            value = 0
+            negative = imd.value[0] == '-'
+            positive = imd.value[0] == '+'
+            start = +(negative or positive)
+            stop = len(imd.value) if base == 10 else -1
+            for c in imd.value[start:stop]:
+                value = value * base + int(c, base)
+            return -value if negative else value
+        else:
+            return imd.value
--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 """Semantics opbject responsible for architecture specific semantic operations"""

+from dis import Instruction
 import sys
 import warnings
 from itertools import chain
@@ -14,12 +15,24 @@ from osaca.parser.register import RegisterOperand


 class ArchSemantics(ISASemantics):
-    GAS_SUFFIXES = "bswlqt"
-
-    def __init__(self, machine_model: MachineModel, path_to_yaml=None):
-        super().__init__(machine_model.get_ISA().lower(), path_to_yaml=path_to_yaml)
+    def __init__(self, parser, machine_model: MachineModel, path_to_yaml=None):
+        super().__init__(parser, path_to_yaml=path_to_yaml)
        self._machine_model = machine_model
-        self._isa = machine_model.get_ISA().lower()
+
+    def normalize_instruction_form(self, instruction_form):
+        self.parser.normalize_instruction_form(
+            instruction_form,
+            self.isa_model,
+            self._machine_model
+        )
+
+    def normalize_instruction_forms(self, instruction_forms):
+        for instruction_form in instruction_forms:
+            self.normalize_instruction_form(instruction_form)
+
+    def _check_normalized(self, instruction_forms):
+        for instruction_form in instruction_forms:
+            instruction_form.check_normalized()

    # SUMMARY FUNCTION
    def add_semantics(self, kernel):
@@ -29,6 +42,7 @@ class ArchSemantics(ISASemantics):

        :param list kernel: kernel to apply semantics
        """
+        self._check_normalized(kernel)
        for instruction_form in kernel:
            self.assign_src_dst(instruction_form)
            self.assign_tp_lt(instruction_form)
@@ -41,6 +55,7 @@ class ArchSemantics(ISASemantics):

        :param list kernel: kernel to apply optimal port utilization
        """
+        self._check_normalized(kernel)
        INC = 0.01
        kernel.reverse()
        port_list = self._machine_model.get_ports()
@@ -137,6 +152,7 @@ class ArchSemantics(ISASemantics):

    def set_hidden_loads(self, kernel):
        """Hide loads behind stores if architecture supports hidden loads (depricated)"""
+        self._check_normalized(kernel)
        loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr.flags]
        stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr.flags]
        # Filter instructions including load and store
@@ -176,6 +192,7 @@ class ArchSemantics(ISASemantics):
    # mark instruction form with semantic flags
    def assign_tp_lt(self, instruction_form):
        """Assign throughput and latency to an instruction form."""
+        instruction_form.check_normalized()
        flags = []
        port_number = len(self._machine_model["ports"])
        if instruction_form.mnemonic is None:
@@ -189,25 +206,6 @@ class ArchSemantics(ISASemantics):
            instruction_data = self._machine_model.get_instruction(
                instruction_form.mnemonic, instruction_form.operands
            )
-            if (
-                not instruction_data
-                and self._isa == "x86"
-                and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
-            ):
-                # check for instruction without GAS suffix
-                instruction_data = self._machine_model.get_instruction(
-                    instruction_form.mnemonic[:-1], instruction_form.operands
-                )
-            if (
-                instruction_data is None
-                and self._isa == "aarch64"
-                and "." in instruction_form.mnemonic
-            ):
-                # Check for instruction without shape/cc suffix
-                suffix_start = instruction_form.mnemonic.index(".")
-                instruction_data = self._machine_model.get_instruction(
-                    instruction_form.mnemonic[:suffix_start], instruction_form.operands
-                )
            if instruction_data:
                # instruction form in DB
                (
@@ -232,25 +230,6 @@ class ArchSemantics(ISASemantics):
                    instruction_data_reg = self._machine_model.get_instruction(
                        instruction_form.mnemonic, operands
                    )
-                    if (
-                        not instruction_data_reg
-                        and self._isa == "x86"
-                        and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
-                    ):
-                        # check for instruction without GAS suffix
-                        instruction_data_reg = self._machine_model.get_instruction(
-                            instruction_form.mnemonic[:-1], operands
-                        )
-                    if (
-                        instruction_data_reg is None
-                        and self._isa == "aarch64"
-                        and "." in instruction_form.mnemonic
-                    ):
-                        # Check for instruction without shape/cc suffix
-                        suffix_start = instruction_form.mnemonic.index(".")
-                        instruction_data_reg = self._machine_model.get_instruction(
-                            instruction_form.mnemonic[:suffix_start], operands
-                        )
                    if instruction_data_reg:
                        assign_unknown = False
                        reg_type = self._parser.get_reg_type(
@@ -310,7 +289,7 @@ class ArchSemantics(ISASemantics):
                            #   - all mem operands in src_dst are pre-/post_indexed
                            # since it is no mem store
                            if (
-                                self._isa == "aarch64"
+                                self._parser.isa() == "aarch64"
                                and not isinstance(
                                    instruction_form.semantic_operands["destination"],
                                    MemoryOperand,
@@ -406,6 +385,7 @@ class ArchSemantics(ISASemantics):

    def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
        """Apply performance data to instruction if it was found in the archDB"""
+        instruction_form.check_normalized()
        throughput = instruction_data.throughput
        port_pressure = self._machine_model.average_port_pressure(instruction_data.port_pressure)
        instruction_form.port_uops = instruction_data.port_pressure
@@ -441,12 +421,12 @@ class ArchSemantics(ISASemantics):

    def convert_op_to_reg(self, reg_type, regtype="0"):
        """Create register operand for a memory addressing operand"""
-        if self._isa == "x86":
+        if self._parser.isa() == "x86":
            if reg_type == "gpr":
                register = RegisterOperand(name="r" + str(int(regtype) + 9))
            else:
                register = RegisterOperand(name=reg_type + regtype)
-        elif self._isa == "aarch64":
+        elif self._parser.isa() == "aarch64":
            register = RegisterOperand(name=regtype, prefix=reg_type)
        return register

--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -11,7 +11,6 @@ from pathlib import Path

 import ruamel.yaml
 from osaca import __version__, utils
-from osaca.parser import ParserX86ATT
 from osaca.parser.instruction_form import InstructionForm
 from osaca.parser.operand import Operand
 from osaca.parser.memory import MemoryOperand
@@ -79,7 +78,7 @@ class MachineModel(object):
            else:
                yaml = self._create_yaml_object()
                # otherwise load
-                with open(self._path, "r") as f:
+                with open(self._path, "r", encoding="utf8") as f:
                    if not lazy:
                        self._data = yaml.load(f)
                    else:
@@ -286,23 +285,38 @@ class MachineModel(object):
    ######################################################

    def get_instruction(self, name, operands):
-        """Find and return instruction data from name and operands."""
+        """Find and return instruction data from name and operands/arity."""
        # For use with dict instead of list as DB
        if name is None:
            return None
        name_matched_iforms = self._data["instruction_forms_dict"].get(name.upper(), [])

        try:
-            return next(
-                instruction_form
-                for instruction_form in name_matched_iforms
-                if self._match_operands(
-                    instruction_form.operands,
-                    operands,
+            # If `operands` is an integer, it represents the arity of the instruction.  This is
+            # useful to reorder the operands in the Intel syntax because in their original order
+            # they may not match the model.
+            if isinstance(operands, int):
+                arity = operands
+                return next(
+                    (
+                        instruction_form
+                        for instruction_form in name_matched_iforms
+                        if len(instruction_form.operands) == arity
+                    ),
+                    None
+                )
+            else:
+                return next(
+                    (
+                        instruction_form
+                        for instruction_form in name_matched_iforms
+                        if self._match_operands(
+                            instruction_form.operands,
+                            operands
+                        )
+                    ),
+                    None
                )
-            )
-        except StopIteration:
-            return None
        except TypeError as e:
            print("\nname: {}\noperands: {}".format(name, operands))
            raise TypeError from e
@@ -878,6 +892,7 @@ class MachineModel(object):
        return True

    def _is_x86_reg_type(self, i_reg, reg, consider_masking=False):
+        from osaca.parser import ParserX86
        """Check if register type match."""
        if reg is None:
            if i_reg is None:
@@ -895,7 +910,7 @@ class MachineModel(object):
        if i_reg_name == self.WILDCARD or reg.name == self.WILDCARD:
            return True
        # differentiate between vector registers (mm, xmm, ymm, zmm) and others (gpr)
-        parser_x86 = ParserX86ATT()
+        parser_x86 = ParserX86()
        if parser_x86.is_vector_register(reg):
            if reg.name.rstrip(string.digits).lower() == i_reg_name:
                # Consider masking and zeroing for AVX512
--- a/osaca/semantics/isa_semantics.py
+++ b/osaca/semantics/isa_semantics.py
@@ -2,7 +2,6 @@
 from itertools import chain

 from osaca import utils
-from osaca.parser import ParserAArch64, ParserX86ATT
 from osaca.parser.memory import MemoryOperand
 from osaca.parser.operand import Operand
 from osaca.parser.register import RegisterOperand
@@ -26,20 +25,23 @@ class INSTR_FLAGS:


 class ISASemantics(object):
-    GAS_SUFFIXES = "bswlqt"
-
-    def __init__(self, isa, path_to_yaml=None):
-        self._isa = isa.lower()
-        path = path_to_yaml or utils.find_datafile("isa/" + self._isa + ".yml")
+    def __init__(self, parser, path_to_yaml=None):
+        path = path_to_yaml or utils.find_datafile("isa/" + parser.isa() + ".yml")
        self._isa_model = MachineModel(path_to_yaml=path)
-        if self._isa == "x86":
-            self._parser = ParserX86ATT()
-        elif self._isa == "aarch64":
-            self._parser = ParserAArch64()
+        self._parser = parser
+
+    @property
+    def parser(self):
+        return self._parser
+
+    @property
+    def isa_model(self):
+        return self._isa_model

    def process(self, instruction_forms):
        """Process a list of instruction forms."""
        for i in instruction_forms:
+            i.check_normalized()
            self.assign_src_dst(i)

    # get ;parser result and assign operands to
@@ -48,6 +50,7 @@ class ISASemantics(object):
    # - source/destination
    def assign_src_dst(self, instruction_form):
        """Update instruction form dictionary with source, destination and flag information."""
+        instruction_form.check_normalized()
        # if the instruction form doesn't have operands or is None, there's nothing to do
        if instruction_form.operands is None or instruction_form.mnemonic is None:
            instruction_form.semantic_operands = {"source": [], "destination": [], "src_dst": []}
@@ -57,21 +60,6 @@ class ISASemantics(object):
        isa_data = self._isa_model.get_instruction(
            instruction_form.mnemonic, instruction_form.operands
        )
-        if (
-            isa_data is None
-            and self._isa == "x86"
-            and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
-        ):
-            # Check for instruction without GAS suffix
-            isa_data = self._isa_model.get_instruction(
-                instruction_form.mnemonic[:-1], instruction_form.operands
-            )
-        if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic:
-            # Check for instruction without shape/cc suffix
-            suffix_start = instruction_form.mnemonic.index(".")
-            isa_data = self._isa_model.get_instruction(
-                instruction_form.mnemonic[:suffix_start], instruction_form.operands
-            )
        operands = instruction_form.operands
        op_dict = {}

@@ -88,36 +76,17 @@ class ISASemantics(object):
                isa_data_reg = self._isa_model.get_instruction(
                    instruction_form.mnemonic, operands_reg
                )
-                if (
-                    isa_data_reg is None
-                    and self._isa == "x86"
-                    and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
-                ):
-                    # Check for instruction without GAS suffix
-                    isa_data_reg = self._isa_model.get_instruction(
-                        instruction_form.mnemonic[:-1], operands_reg
-                    )
-                if (
-                    isa_data_reg is None
-                    and self._isa == "aarch64"
-                    and "." in instruction_form.mnemonic
-                ):
-                    # Check for instruction without shape/cc suffix
-                    suffix_start = instruction_form.mnemonic.index(".")
-                    isa_data_reg = self._isa_model.get_instruction(
-                        instruction_form.mnemonic[:suffix_start], operands_reg
-                    )
                if isa_data_reg:
                    assign_default = False
                    op_dict = self._apply_found_ISA_data(isa_data_reg, operands)

        if assign_default:
            # no irregular operand structure, apply default
-            op_dict["source"] = self._get_regular_source_operands(instruction_form)
-            op_dict["destination"] = self._get_regular_destination_operands(instruction_form)
+            op_dict["source"] = self._parser.get_regular_source_operands(instruction_form)
+            op_dict["destination"] = self._parser.get_regular_destination_operands(instruction_form)
            op_dict["src_dst"] = []
        # post-process pre- and post-indexing for aarch64 memory operands
-        if self._isa == "aarch64":
+        if self._parser.isa() == "aarch64":
            for operand in [op for op in op_dict["source"] if isinstance(op, MemoryOperand)]:
                post_indexed = operand.post_indexed
                pre_indexed = operand.pre_indexed
@@ -161,6 +130,7 @@ class ISASemantics(object):
        Empty dict if no changes of registers occured. None for registers with unknown changes.
        If only_postindexed is True, only considers changes due to post_indexed memory references.
        """
+        instruction_form.check_normalized()
        if instruction_form.mnemonic is None:
            return {}
        dest_reg_names = [
@@ -174,21 +144,6 @@ class ISASemantics(object):
        isa_data = self._isa_model.get_instruction(
            instruction_form.mnemonic, instruction_form.operands
        )
-        if (
-            isa_data is None
-            and self._isa == "x86"
-            and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
-        ):
-            # Check for instruction without GAS suffix
-            isa_data = self._isa_model.get_instruction(
-                instruction_form.mnemonic[:-1], instruction_form.operands
-            )
-        if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic:
-            # Check for instruction without shape/cc suffix
-            suffix_start = instruction_form.mnemonic.index(".")
-            isa_data = self._isa_model.get_instruction(
-                instruction_form.mnemonic[:suffix_start], instruction_form.operands
-            )

        if only_postindexed:
            for o in instruction_form.operands:
@@ -301,6 +256,7 @@ class ISASemantics(object):

    def _has_load(self, instruction_form):
        """Check if instruction form performs a LOAD"""
+        instruction_form.check_normalized()
        for operand in chain(
            instruction_form.semantic_operands["source"],
            instruction_form.semantic_operands["src_dst"],
@@ -311,6 +267,7 @@ class ISASemantics(object):

    def _has_store(self, instruction_form):
        """Check if instruction form perfroms a STORE"""
+        instruction_form.check_normalized()
        for operand in chain(
            instruction_form.semantic_operands["destination"],
            instruction_form.semantic_operands["src_dst"],
@@ -319,33 +276,6 @@ class ISASemantics(object):
                return True
        return False

-    def _get_regular_source_operands(self, instruction_form):
-        """Get source operand of given instruction form assuming regular src/dst behavior."""
-        # if there is only one operand, assume it is a source operand
-        if len(instruction_form.operands) == 1:
-            return [instruction_form.operands[0]]
-        if self._isa == "x86":
-            # return all but last operand
-            return [op for op in instruction_form.operands[0:-1]]
-        elif self._isa == "aarch64":
-            return [op for op in instruction_form.operands[1:]]
-        else:
-            raise ValueError("Unsupported ISA {}.".format(self._isa))
-
-    def _get_regular_destination_operands(self, instruction_form):
-        """Get destination operand of given instruction form assuming regular src/dst behavior."""
-        # if there is only one operand, assume no destination
-        if len(instruction_form.operands) == 1:
-            return []
-        if self._isa == "x86":
-            # return last operand
-            return instruction_form.operands[-1:]
-        if self._isa == "aarch64":
-            # return first operand
-            return instruction_form.operands[:1]
-        else:
-            raise ValueError("Unsupported ISA {}.".format(self._isa))
-
    def substitute_mem_address(self, operands):
        """Create memory wildcard for all memory operands"""
        return [
--- a/osaca/semantics/kernel_dg.py
+++ b/osaca/semantics/kernel_dg.py
@@ -38,7 +38,8 @@ class KernelDG(nx.DiGraph):
            self.kernel, timeout, flag_dependencies
        )

-    def _extend_path(self, dst_list, kernel, dg, offset):
+    @classmethod
+    def _extend_path(cls, dst_list, kernel, dg, offset):
        for instr in kernel:
            generator_path = nx.algorithms.simple_paths.all_simple_paths(
                dg, instr.line_number, instr.line_number + offset
@@ -138,7 +139,7 @@ class KernelDG(nx.DiGraph):
                all_paths = manager.list()
                processes = [
                    Process(
-                        target=self._extend_path,
+                        target=KernelDG._extend_path,
                        args=(all_paths, instr_section, dg, offset),
                    )
                    for instr_section in instrs
@@ -164,9 +165,7 @@ class KernelDG(nx.DiGraph):
                        # terminate running processes
                        for p in processes:
                            if p.is_alive():
-                                # Python 3.6 does not support Process.kill().
-                                # Can be changed to `p.kill()` after EoL (01/22) of Py3.6
-                                os.kill(p.pid, signal.SIGKILL)
+                                p.kill()
                            p.join()
                all_paths = list(all_paths)
        else:
@@ -186,11 +185,11 @@ class KernelDG(nx.DiGraph):
            for s, d in nx.utils.pairwise(path):
                edge_lat = dg.edges[s, d]["latency"]
                # map source node back to original line numbers
-                if s >= offset:
+                if s > offset:
                    s -= offset
                lat_path.append((s, edge_lat))
                lat_sum += edge_lat
-            if d >= offset:
+            if d > offset:
                d -= offset
            lat_path.sort()

@@ -413,7 +412,7 @@ class KernelDG(nx.DiGraph):
            addr_change = 0
            if isinstance(src.offset, ImmediateOperand) and src.offset.value is not None:
                addr_change += src.offset.value
-            if mem.offset:
+            if isinstance(mem.offset, ImmediateOperand) and mem.offset.value is not None:
                addr_change -= mem.offset.value
            if mem.base and src.base:
                base_change = register_changes.get(
--- a/osaca/semantics/marker_utils.py
+++ b/osaca/semantics/marker_utils.py
@@ -1,29 +1,36 @@
 #!/usr/bin/env python3
 from collections import OrderedDict
+from enum import Enum
+from functools import partial

-from osaca.parser import ParserAArch64, ParserX86ATT, get_parser
-from osaca.parser.register import RegisterOperand
+from osaca.parser.instruction_form import InstructionForm
+from osaca.parser.directive import DirectiveOperand
 from osaca.parser.identifier import IdentifierOperand
 from osaca.parser.immediate import ImmediateOperand
+from osaca.parser.memory import MemoryOperand
+from osaca.parser.register import RegisterOperand

 COMMENT_MARKER = {"start": "OSACA-BEGIN", "end": "OSACA-END"}

+# State of marker matching.
+#   No: we have determined that the code doesn't match the marker.
+#   Partial: so far the code matches the marker, but we have not reached the end of the marker yet.
+#   Full: the code matches all instructions in the marker.
+class Matching(Enum):
+    No = 0
+    Partial = 1
+    Full = 2

-def reduce_to_section(kernel, isa):
+
+def reduce_to_section(kernel, parser):
    """
    Finds OSACA markers in given kernel and returns marked section

    :param list kernel: kernel to check
-    :param str isa: ISA of given kernel
+    :param BaseParser parser: parser used to produce the kernel
    :returns: `list` -- marked section of kernel as list of instruction forms
    """
-    isa = isa.lower()
-    if isa == "x86":
-        start, end = find_marked_kernel_x86ATT(kernel)
-    elif isa == "aarch64":
-        start, end = find_marked_kernel_AArch64(kernel)
-    else:
-        raise ValueError("ISA not supported.")
+    start, end = find_marked_section(kernel, parser, COMMENT_MARKER)
    if start == -1:
        start = 0
    if end == -1:
@@ -31,109 +38,21 @@ def reduce_to_section(kernel, isa):
    return kernel[start:end]


-def find_marked_kernel_AArch64(lines):
-    """
-    Find marked section for AArch64
-
-    :param list lines: kernel
-    :returns: `tuple of int` -- start and end line of marked section
-    """
-    nop_bytes = [213, 3, 32, 31]
-    return find_marked_section(
-        lines,
-        ParserAArch64(),
-        ["mov"],
-        "x1",
-        [111, 222],
-        nop_bytes,
-        reverse=True,
-        comments=COMMENT_MARKER,
-    )
-
-
-def find_marked_kernel_x86ATT(lines):
-    """
-    Find marked section for x86
-
-    :param list lines: kernel
-    :returns: `tuple of int` -- start and end line of marked section
-    """
-    nop_bytes = [100, 103, 144]
-    return find_marked_section(
-        lines,
-        ParserX86ATT(),
-        ["mov", "movl"],
-        "ebx",
-        [111, 222],
-        nop_bytes,
-        comments=COMMENT_MARKER,
-    )
-
-
-def get_marker(isa, comment=""):
-    """Return tuple of start and end marker lines."""
-    isa = isa.lower()
-    if isa == "x86":
-        start_marker_raw = (
-            "movl      $111, %ebx # OSACA START MARKER\n"
-            ".byte     100        # OSACA START MARKER\n"
-            ".byte     103        # OSACA START MARKER\n"
-            ".byte     144        # OSACA START MARKER\n"
-        )
-        if comment:
-            start_marker_raw += "# {}\n".format(comment)
-        end_marker_raw = (
-            "movl      $222, %ebx # OSACA END MARKER\n"
-            ".byte     100        # OSACA END MARKER\n"
-            ".byte     103        # OSACA END MARKER\n"
-            ".byte     144        # OSACA END MARKER\n"
-        )
-    elif isa == "aarch64":
-        start_marker_raw = (
-            "mov       x1, #111    // OSACA START MARKER\n"
-            ".byte     213,3,32,31 // OSACA START MARKER\n"
-        )
-        if comment:
-            start_marker_raw += "// {}\n".format(comment)
-        # After loop
-        end_marker_raw = (
-            "mov       x1, #222    // OSACA END MARKER\n"
-            ".byte     213,3,32,31 // OSACA END MARKER\n"
-        )
-
-    parser = get_parser(isa)
-    start_marker = parser.parse_file(start_marker_raw)
-    end_marker = parser.parse_file(end_marker_raw)
-
-    return start_marker, end_marker
-
-
-def find_marked_section(
-    lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes, reverse=False, comments=None
-):
+def find_marked_section(lines, parser, comments=None):
    """
    Return indexes of marked section

    :param list lines: kernel
    :param parser: parser to use for checking
    :type parser: :class:`~parser.BaseParser`
-    :param mov_instr: all MOV instruction possible for the marker
-    :type mov_instr: `list of str`
-    :param mov_reg: register used for the marker
-    :type mov_reg: `str`
-    :param mov_vals: values needed to be moved to ``mov_reg`` for valid marker
-    :type mov_vals: `list of int`
-    :param nop_bytes: bytes representing opcode of NOP
-    :type nop_bytes: `list of int`
-    :param reverse: indicating if ISA syntax requires reverse operand order, defaults to `False`
-    :type reverse: boolean, optional
    :param comments: dictionary with start and end markers in comment format, defaults to None
    :type comments: dict, optional
    :returns: `tuple of int` -- start and end line of marked section
    """
-    # TODO match to instructions returned by get_marker
    index_start = -1
    index_end = -1
+    start_marker = parser.start_marker()
+    end_marker = parser.end_marker()
    for i, line in enumerate(lines):
        try:
            if line.mnemonic is None and comments is not None and line.comment is not None:
@@ -141,59 +60,151 @@ def find_marked_section(
                    index_start = i + 1
                elif comments["end"] == line.comment:
                    index_end = i
-            elif (
-                line.mnemonic in mov_instr
-                and len(lines) > i + 1
-                and lines[i + 1].directive is not None
-            ):
-                source = line.operands[0 if not reverse else 1]
-                destination = line.operands[1 if not reverse else 0]
-                # instruction pair matches, check for operands
-                if (
-                    isinstance(source, ImmediateOperand)
-                    and parser.normalize_imd(source) == mov_vals[0]
-                    and isinstance(destination, RegisterOperand)
-                    and parser.get_full_reg_name(destination) == mov_reg
-                ):
-                    # operands of first instruction match start, check for second one
-                    match, line_count = match_bytes(lines, i + 1, nop_bytes)
-                    if match:
-                        # return first line after the marker
-                        index_start = i + 1 + line_count
-                elif (
-                    isinstance(source, ImmediateOperand)
-                    and parser.normalize_imd(source) == mov_vals[1]
-                    and isinstance(destination, RegisterOperand)
-                    and parser.get_full_reg_name(destination) == mov_reg
-                ):
-                    # operand of first instruction match end, check for second one
-                    match, line_count = match_bytes(lines, i + 1, nop_bytes)
-                    if match:
-                        # return line of the marker
-                        index_end = i
-        except TypeError:
-            print(i, line)
+            if index_start == -1:
+                matching_lines = match_lines(parser, lines[i:], start_marker)
+                if matching_lines > 0:
+                    # Return the first line after the marker.
+                    index_start = i + matching_lines
+            if index_end == -1:
+                if match_lines(parser, lines[i:], end_marker):
+                    index_end = i
+        except TypeError as e:
+            print(i, e, line)
        if index_start != -1 and index_end != -1:
            break
    return index_start, index_end


-def match_bytes(lines, index, byte_list):
-    """Match bytes directives of markers"""
-    # either all bytes are in one line or in separate ones
-    extracted_bytes = []
-    line_count = 0
-    while (
-        index < len(lines)
-        and lines[index].directive is not None
-        and lines[index].directive.name == "byte"
+# This function and the following ones traverse the syntactic tree produced by the parser and try to
+# match it to the marker.  This is necessary because the IACA markers are significantly different on
+# MSVC x86 than on other ISA/compilers.  Therefore, simple string matching is not sufficient.  Also,
+# the syntax of numeric literals depends on the parser and should not be known to this class.
+# The matching only checks for a limited number of properties (and the marker doesn't specify the
+# rest).
+def match_lines(parser, lines, marker):
+    """
+    Returns True iff the `lines` match the `marker`.
+
+    :param list of `InstructionForm` lines: parsed assembly code.
+    :param list of `InstructionForm` marker: pattern to match against the `lines`.
+    :return int: the length of the match in the parsed code, 0 if there is no match.
+    """
+    marker_iter = iter(marker)
+    marker_line = next(marker_iter)
+    for matched_lines, line in enumerate(lines):
+        if isinstance(marker_line, list):
+            # No support for partial matching in lists.
+            for marker_alternative in marker_line:
+                matching = match_line(parser, line, marker_alternative)
+                if matching == Matching.Full:
+                    break
+            else:
+                return 0
+            marker_line = next(marker_iter, None)
+        else:
+            matching = match_line(parser, line, marker_line)
+            if matching == Matching.No:
+                return 0
+            elif matching == Matching.Partial:
+                # Try the same marker line again.  The call to `match_line` consumed some of the
+                # directive parameters.
+                pass
+            elif matching == Matching.Full:
+                # Move to the next marker line, the current one has been fully matched.
+                marker_line = next(marker_iter, None)
+        # If we have reached the last marker line, the parsed code matches the marker.
+        if not marker_line:
+            return matched_lines + 1
+
+def match_line(parser, line, marker_line):
+    """
+    Returns whether `line` matches `marker_line`.
+
+    :param `IntructionForm` line: parsed assembly code.
+    :param marker_line `InstructionForm` marker: pattern to match against `line`.
+    :return: Matching. In case of partial match, `marker_line` is modified and should be reused for
+                       matching the next line in the parsed assembly code.
+    """
+    if (
+        line.mnemonic
+        and marker_line.mnemonic
+        and line.mnemonic == marker_line.mnemonic
+        and match_operands(line.operands, marker_line.operands)
    ):
-        line_count += 1
-        extracted_bytes += [int(x, 0) for x in lines[index].directive.parameters]
-        index += 1
-    if extracted_bytes[0 : len(byte_list)] == byte_list:
-        return True, line_count
-    return False, -1
+        return Matching.Full
+    if (
+        line.directive
+        and marker_line.directive
+        and line.directive.name == marker_line.directive.name
+    ):
+        return match_parameters(parser, line.directive.parameters, marker_line.directive.parameters)
+    else:
+        return Matching.No
+
+def match_operands(line_operands, marker_line_operands):
+    if len(line_operands) != len(marker_line_operands):
+        return False
+    return all(
+        match_operand(line_operand, marker_line_operand)
+        for line_operand, marker_line_operand in
+        zip(line_operands, marker_line_operands)
+    )
+
+def match_operand(line_operand, marker_line_operand):
+    if (
+        isinstance(line_operand, ImmediateOperand)
+        and isinstance(marker_line_operand, ImmediateOperand)
+        and line_operand.value == marker_line_operand.value
+    ):
+        return True
+    if (
+        isinstance(line_operand, RegisterOperand)
+        and isinstance(marker_line_operand, RegisterOperand)
+        and line_operand.name.lower() == marker_line_operand.name.lower()
+    ):
+        return True
+    if (
+        isinstance(line_operand, MemoryOperand)
+        and isinstance(marker_line_operand, MemoryOperand)
+        and match_operand(line_operand.base, marker_line_operand.base)
+        and match_operand(line_operand.offset, line_operand.offset)
+        ):
+        return True
+    return False
+
+def match_parameters(parser, line_parameters, marker_line_parameters):
+    """
+    Returns whether `line_parameters` matches `marker_line_parameters`.
+
+    :param list of strings line_parameters: parameters of a directive in the parsed assembly code.
+    :param list of strings marker_line_parameters: parameters of a directive in the marker.
+    :return: Matching. In case of partial match, `marker_line_parameters` is modified and should be
+                       reused for matching the next line in the parsed assembly code.
+    """
+    line_parameter_count = len(line_parameters)
+    marker_line_parameter_count = len(marker_line_parameters)
+
+    # The elements of `marker_line_parameters` are consumed as they are matched.
+    for line_parameter in line_parameters:
+        if not marker_line_parameters:
+            break;
+        marker_line_parameter = marker_line_parameters[0]
+        if not match_parameter(parser, line_parameter, marker_line_parameter):
+            return Matching.No
+        marker_line_parameters.pop(0)
+    if marker_line_parameters:
+        return Matching.Partial
+    else:
+        return Matching.Full
+
+def match_parameter(parser, line_parameter, marker_line_parameter):
+    if line_parameter.lower() == marker_line_parameter.lower():
+        return True
+    else:
+        # If the parameters don't match verbatim, check if they represent the same immediate value.
+        line_immediate = ImmediateOperand(value=line_parameter)
+        marker_line_immediate = ImmediateOperand(value=marker_line_parameter)
+        return parser.normalize_imd(line_immediate) == parser.normalize_imd(marker_line_immediate)


 def find_jump_labels(lines):