Add support for the Intel syntax supported by MSVC and ICC

This commit is contained in:
pleroy
2025-02-02 14:02:16 +01:00
parent dffea6d066
commit b4d342266d
30 changed files with 2744 additions and 499 deletions

View File

@@ -3,6 +3,8 @@
import operator
import re
from osaca.semantics.hw_model import MachineModel
class BaseParser(object):
# Identifiers for operand types
@@ -25,20 +27,62 @@ class BaseParser(object):
self.construct_parser()
self._parser_constructed = True
def isa(self):
# Done in derived classes
raise NotImplementedError
# The marker functions return lists of `InstructionForm` that are used to find the IACA markers
# in the parsed code. In addition to just a list, the marker may have a structure like
# [I1, [I2, I3], I4, ...] where the nested list indicates that at least one of I2 and I3 must
# match the second instruction in the fragment of parsed code.
# If an instruction form is a `DirectiveOperand`, the match may happen over several directive
# operands in the parsed code, provided that the directives have the same name and the
# parameters are in sequence with respect to the pattern. This provides an easy way to describe
# a sequence of bytes irrespective of the way it was grouped in the assembly source.
# Note that markers must be matched *before* normalization.
def start_marker(self):
# Done in derived classes
raise NotImplementedError
def end_marker(self):
# Done in derived classes
raise NotImplementedError
# Performs all the normalization needed to match the instruction to the ISO/arch model. This
# method must set the `normalized` property of the instruction and must be idempotent.
def normalize_instruction_form(
self,
instruction_form,
isa_model: MachineModel,
arch_model: MachineModel
):
raise NotImplementedError
@staticmethod
def detect_ISA(file_content):
"""Detect the ISA of the assembly based on the used registers and return the ISA code."""
"""
Detect the ISA of the assembly based on the used registers and return the ISA code.
:param str file_content: assembly code.
:return: a tuple isa, syntax describing the architecture and the assembly syntax,
if appropriate. If there is no notion of syntax, the second element is None.
"""
# Check for the amount of registers in the code to determine the ISA
# 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86
# AT&T syntax. There is a % before each register name.
heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"]
# 2) check for v and z vector registers and x/w general-purpose registers
# 2) Same as above, but for the Intel syntax. There is no % before the register names.
heuristics_x86Intel = [r"[^%][xyz]mm[0-9]", r"[^%][er][abcd]x[0-9]"]
# 3) check for v and z vector registers and x/w general-purpose registers
heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"]
matches = {"x86": 0, "aarch64": 0}
matches = {("x86", "ATT"): 0, ("x86", "INTEL"): 0, ("aarch64", None): 0}
for h in heuristics_x86ATT:
matches["x86"] += len(re.findall(h, file_content))
matches[("x86", "ATT")] += len(re.findall(h, file_content))
for h in heuristics_x86Intel:
matches[("x86", "INTEL")] += len(re.findall(h, file_content))
for h in heuristics_aarch64:
matches["aarch64"] += len(re.findall(h, file_content))
matches[("aarch64", None)] += len(re.findall(h, file_content))
return max(matches.items(), key=operator.itemgetter(1))[0]
@@ -94,6 +138,14 @@ class BaseParser(object):
def get_full_reg_name(self, register):
raise NotImplementedError
# Must be called on a *normalized* instruction.
def get_regular_source_operands(self, instruction_form):
raise NotImplementedError
# Must be called on a *normalized* instruction.
def get_regular_destination_operands(self, instruction_form):
raise NotImplementedError
def normalize_imd(self, imd):
raise NotImplementedError