From 1a7c1588f615286a9a2369e08df221513c63101d Mon Sep 17 00:00:00 2001 From: pleroy Date: Sun, 2 Feb 2025 14:02:16 +0100 Subject: [PATCH] Add support for the Intel syntax supported by MSVC and ICC --- .gitignore | 4 + osaca/data/model_importer.py | 3 +- osaca/osaca.py | 95 ++- osaca/parser/__init__.py | 10 +- osaca/parser/base_parser.py | 62 +- osaca/parser/identifier.py | 9 + osaca/parser/instruction_form.py | 14 + osaca/parser/label.py | 5 + osaca/parser/memory.py | 10 + osaca/parser/parser_AArch64.py | 69 +- osaca/parser/parser_x86.py | 123 +++ osaca/parser/parser_x86att.py | 158 ++-- osaca/parser/parser_x86intel.py | 830 +++++++++++++++++++ osaca/semantics/arch_semantics.py | 72 +- osaca/semantics/hw_model.py | 41 +- osaca/semantics/isa_semantics.py | 108 +-- osaca/semantics/kernel_dg.py | 15 +- osaca/semantics/marker_utils.py | 309 +++---- tests/test_base_parser.py | 11 +- tests/test_files/gs_x86_icc.asm | 227 +++++ tests/test_files/kernel_x86_intel.asm | 9 + tests/test_files/kernel_x86_intel_memdep.asm | 19 + tests/test_files/triad_x86_intel.asm | 124 +++ tests/test_files/triad_x86_intel_iaca.asm | 139 ++++ tests/test_frontend.py | 2 +- tests/test_marker_utils.py | 54 +- tests/test_parser_x86intel.py | 365 ++++++++ tests/test_semantics.py | 332 +++++++- validation/kernels/striad.c | 12 + validation/kernels/triad.c | 12 + 30 files changed, 2744 insertions(+), 499 deletions(-) create mode 100644 osaca/parser/parser_x86.py create mode 100644 osaca/parser/parser_x86intel.py create mode 100644 tests/test_files/gs_x86_icc.asm create mode 100644 tests/test_files/kernel_x86_intel.asm create mode 100644 tests/test_files/kernel_x86_intel_memdep.asm create mode 100644 tests/test_files/triad_x86_intel.asm create mode 100644 tests/test_files/triad_x86_intel_iaca.asm create mode 100644 tests/test_parser_x86intel.py diff --git a/.gitignore b/.gitignore index 6ef37fa..426ace6 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,7 @@ venv.bak/ # mypy .mypy_cache/ + +# Visual Studio +.vs +x64/ diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py index d10555e..f1ab348 100644 --- a/osaca/data/model_importer.py +++ b/osaca/data/model_importer.py @@ -111,7 +111,8 @@ def extract_model(tree, arch, skip_mem=True): print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) - parser = get_parser(isa) + # The model uses the AT&T syntax. + parser = get_parser(isa, "ATT") for instruction_tag in tree.findall(".//instruction"): ignore = False diff --git a/osaca/osaca.py b/osaca/osaca.py index d0fb49d..f865e9c 100644 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -11,7 +11,7 @@ from ruamel.yaml import YAML from osaca.db_interface import import_benchmark_output, sanity_check from osaca.frontend import Frontend -from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT +from osaca.parser import BaseParser, ParserAArch64, ParserX86, ParserX86ATT, ParserX86Intel from osaca.semantics import ( INSTR_FLAGS, ArchSemantics, @@ -47,6 +47,10 @@ DEFAULT_ARCHS = { "aarch64": "V2", "x86": "SPR", } +SUPPORTED_SYNTAXES = [ + "ATT", + "INTEL", +] # Stolen from pip @@ -108,6 +112,12 @@ def create_parser(parser=None): "ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a " "default uarch for x86/AArch64.", ) + parser.add_argument( + "--syntax", + type=str, + help="Define the assembly syntax (ATT, Intel) for x86. If no syntax is given, OSACA " + "tries to determine automatically the syntax to use.", + ) parser.add_argument( "--fixed", action="store_true", @@ -232,6 +242,14 @@ def check_arguments(args, parser): parser.error( "Microarchitecture not supported. Please see --help for all valid architecture codes." ) + if args.syntax and args.arch and MachineModel.get_isa_for_arch(args.arch) != "x86": + parser.error( + "Syntax can only be explicitly specified for an x86 microarchitecture" + ) + if args.syntax and args.syntax.upper() not in SUPPORTED_SYNTAXES: + parser.error( + "Assembly syntax not supported. Please see --help for all valid assembly syntaxes." + ) if "import_data" in args and args.import_data not in supported_import_files: parser.error( "Microbenchmark not supported for data import. Please see --help for all valid " @@ -310,30 +328,56 @@ def inspect(args, output_file=sys.stdout): code = args.file.read() # Detect ISA if necessary - arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)] - print_arch_warning = False if args.arch else True - isa = MachineModel.get_isa_for_arch(arch) + detected_isa, detected_syntax = BaseParser.detect_ISA(code) + detected_arch = DEFAULT_ARCHS[detected_isa] + + print_arch_warning = not args.arch verbose = args.verbose ignore_unknown = args.ignore_unknown - # Parse file - parser = get_asm_parser(arch) - try: - parsed_code = parser.parse_file(code) - except Exception as e: - # probably the wrong parser based on heuristic - if args.arch is None: - # change ISA and try again - arch = ( - DEFAULT_ARCHS["x86"] - if BaseParser.detect_ISA(code) == "aarch64" - else DEFAULT_ARCHS["aarch64"] - ) - isa = MachineModel.get_isa_for_arch(arch) - parser = get_asm_parser(arch) + # If the arch/syntax is explicitly specified, that's the only thing we'll try. Otherwise, we'll + # look at all the possible archs/syntaxes, but with our detected arch/syntax last in the list, + # thus tried first. + if args.arch: + archs_to_try = [args.arch] + else: + archs_to_try = list(DEFAULT_ARCHS) + archs_to_try.remove(detected_arch) + archs_to_try.append(detected_arch) + if args.syntax: + syntaxes_to_try = [args.syntax] + else: + syntaxes_to_try = SUPPORTED_SYNTAXES + [None] + syntaxes_to_try.remove(detected_syntax) + syntaxes_to_try.append(detected_syntax) + + # Filter the cross-product of archs and syntaxes to eliminate the combinations that don't make + # sense. + combinations_to_try = [ + (arch, syntax) + for arch in archs_to_try + for syntax in syntaxes_to_try + if (syntax != None) == (MachineModel.get_isa_for_arch(arch) == "x86") + ] + + # Parse file. + message = "" + single_combination = len(combinations_to_try) == 1 + while True: + arch, syntax = combinations_to_try.pop() + parser = get_asm_parser(arch, syntax) + try: parsed_code = parser.parse_file(code) - else: - raise e + break + except Exception as e: + message += f"\nWith arch {arch} and syntax {syntax} got error: {e}." + # Either the wrong parser based on heuristic, or a bona fide syntax error (or + # unsupported syntax). For ease of debugging, we emit the entire exception trace if + # we tried a single arch/syntax combination. If we tried multiple combinations, we + # don't emit the traceback as it would apply to the latest combination tried, which is + # probably the less interesting. + if not combinations_to_try: + raise SyntaxError(message) from e if single_combination else None # Reduce to marked kernel or chosen section and add semantics if args.lines: @@ -341,13 +385,14 @@ def inspect(args, output_file=sys.stdout): kernel = [line for line in parsed_code if line.line_number in line_range] print_length_warning = False else: - kernel = reduce_to_section(parsed_code, isa) + kernel = reduce_to_section(parsed_code, parser) # Print warning if kernel has no markers and is larger than threshold (100) print_length_warning = ( True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False ) machine_model = MachineModel(arch=arch) - semantics = ArchSemantics(machine_model) + semantics = ArchSemantics(parser, machine_model) + semantics.normalize_instruction_forms(kernel) semantics.add_semantics(kernel) # Do optimal schedule for kernel throughput if wished if not args.fixed: @@ -417,7 +462,7 @@ def run(args, output_file=sys.stdout): @lru_cache() -def get_asm_parser(arch) -> BaseParser: +def get_asm_parser(arch, syntax) -> BaseParser: """ Helper function to create the right parser for a specific architecture. @@ -427,7 +472,7 @@ def get_asm_parser(arch) -> BaseParser: """ isa = MachineModel.get_isa_for_arch(arch) if isa == "x86": - return ParserX86ATT() + return ParserX86ATT() if syntax == "ATT" else ParserX86Intel() elif isa == "aarch64": return ParserAArch64() diff --git a/osaca/parser/__init__.py b/osaca/parser/__init__.py index 3b5e8ba..4f225cf 100644 --- a/osaca/parser/__init__.py +++ b/osaca/parser/__init__.py @@ -1,11 +1,13 @@ """ Collection of parsers supported by OSACA. -Only the parser below will be exported, so please add new parsers to __all__. +Only the parsers below will be exported, so please add new parsers to __all__. """ from .base_parser import BaseParser +from .parser_x86 import ParserX86 from .parser_x86att import ParserX86ATT +from .parser_x86intel import ParserX86Intel from .parser_AArch64 import ParserAArch64 from .instruction_form import InstructionForm from .operand import Operand @@ -14,15 +16,17 @@ __all__ = [ "Operand", "InstructionForm", "BaseParser", + "ParserX86", "ParserX86ATT", + "ParserX86Intel", "ParserAArch64", "get_parser", ] -def get_parser(isa): +def get_parser(isa, syntax): if isa.lower() == "x86": - return ParserX86ATT() + return ParserX86ATT() if syntax == "ATT" else ParserX86Intel() elif isa.lower() == "aarch64": return ParserAArch64() else: diff --git a/osaca/parser/base_parser.py b/osaca/parser/base_parser.py index 3ac2124..0a5d620 100644 --- a/osaca/parser/base_parser.py +++ b/osaca/parser/base_parser.py @@ -3,6 +3,8 @@ import operator import re +from osaca.semantics.hw_model import MachineModel + class BaseParser(object): # Identifiers for operand types @@ -25,20 +27,62 @@ class BaseParser(object): self.construct_parser() self._parser_constructed = True + def isa(self): + # Done in derived classes + raise NotImplementedError + + # The marker functions return lists of `InstructionForm` that are used to find the IACA markers + # in the parsed code. In addition to just a list, the marker may have a structure like + # [I1, [I2, I3], I4, ...] where the nested list indicates that at least one of I2 and I3 must + # match the second instruction in the fragment of parsed code. + # If an instruction form is a `DirectiveOperand`, the match may happen over several directive + # operands in the parsed code, provided that the directives have the same name and the + # parameters are in sequence with respect to the pattern. This provides an easy way to describe + # a sequence of bytes irrespective of the way it was grouped in the assembly source. + # Note that markers must be matched *before* normalization. + def start_marker(self): + # Done in derived classes + raise NotImplementedError + + def end_marker(self): + # Done in derived classes + raise NotImplementedError + + # Performs all the normalization needed to match the instruction to the ISO/arch model. This + # method must set the `normalized` property of the instruction and must be idempotent. + def normalize_instruction_form( + self, + instruction_form, + isa_model: MachineModel, + arch_model: MachineModel + ): + raise NotImplementedError + @staticmethod def detect_ISA(file_content): - """Detect the ISA of the assembly based on the used registers and return the ISA code.""" + """ + Detect the ISA of the assembly based on the used registers and return the ISA code. + + :param str file_content: assembly code. + :return: a tuple isa, syntax describing the architecture and the assembly syntax, + if appropriate. If there is no notion of syntax, the second element is None. + """ # Check for the amount of registers in the code to determine the ISA # 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86 + # AT&T syntax. There is a % before each register name. heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"] - # 2) check for v and z vector registers and x/w general-purpose registers + # 2) Same as above, but for the Intel syntax. There is no % before the register names. + heuristics_x86Intel = [r"[^%][xyz]mm[0-9]", r"[^%][er][abcd]x[0-9]"] + # 3) check for v and z vector registers and x/w general-purpose registers heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"] - matches = {"x86": 0, "aarch64": 0} + matches = {("x86", "ATT"): 0, ("x86", "INTEL"): 0, ("aarch64", None): 0} for h in heuristics_x86ATT: - matches["x86"] += len(re.findall(h, file_content)) + matches[("x86", "ATT")] += len(re.findall(h, file_content)) + for h in heuristics_x86Intel: + matches[("x86", "INTEL")] += len(re.findall(h, file_content)) for h in heuristics_aarch64: - matches["aarch64"] += len(re.findall(h, file_content)) + matches[("aarch64", None)] += len(re.findall(h, file_content)) return max(matches.items(), key=operator.itemgetter(1))[0] @@ -94,6 +138,14 @@ class BaseParser(object): def get_full_reg_name(self, register): raise NotImplementedError + # Must be called on a *normalized* instruction. + def get_regular_source_operands(self, instruction_form): + raise NotImplementedError + + # Must be called on a *normalized* instruction. + def get_regular_destination_operands(self, instruction_form): + raise NotImplementedError + def normalize_imd(self, imd): raise NotImplementedError diff --git a/osaca/parser/identifier.py b/osaca/parser/identifier.py index e5c0209..87c3d76 100644 --- a/osaca/parser/identifier.py +++ b/osaca/parser/identifier.py @@ -41,3 +41,12 @@ class IdentifierOperand(Operand): def __repr__(self): return self.__str__() + + def __eq__(self, other): + if isinstance(other, IdentifierOperand): + return ( + self._name == other._name + and self._offset == other._offset + and self._relocation == other._relocation + ) + return False diff --git a/osaca/parser/instruction_form.py b/osaca/parser/instruction_form.py index d32bc34..5a04c7a 100644 --- a/osaca/parser/instruction_form.py +++ b/osaca/parser/instruction_form.py @@ -19,6 +19,7 @@ class InstructionForm: port_pressure=None, operation=None, breaks_dependency_on_equal_operands=False, + normalized=False, ): self._mnemonic = mnemonic self._operands = operands @@ -33,6 +34,7 @@ class InstructionForm: self._operation = operation self._uops = uops self._breaks_dependency_on_equal_operands = breaks_dependency_on_equal_operands + self._normalized = normalized self._latency = latency self._throughput = throughput self._latency_cp = [] @@ -42,6 +44,10 @@ class InstructionForm: self._port_uops = [] self._flags = [] + def check_normalized(self): + if not self._normalized: + raise AssertionError("Unnormalized instruction") + @property def semantic_operands(self): return self._semantic_operands @@ -114,6 +120,10 @@ class InstructionForm: def breaks_dependency_on_equal_operands(self): return self._breaks_dependency_on_equal_operands + @property + def normalized(self): + return self._normalized + @semantic_operands.setter def semantic_operands(self, semantic_operands): self._semantic_operands = semantic_operands @@ -142,6 +152,10 @@ class InstructionForm: def breaks_dependency_on_equal_operands(self, boolean): self._breaks_dependency_on_equal_operands = boolean + @normalized.setter + def normalized(self, normalized): + self._normalized = normalized + @mnemonic.setter def mnemonic(self, mnemonic): self._mnemonic = mnemonic diff --git a/osaca/parser/label.py b/osaca/parser/label.py index 39b1ece..62cbfcd 100644 --- a/osaca/parser/label.py +++ b/osaca/parser/label.py @@ -20,3 +20,8 @@ class LabelOperand(Operand): def __repr__(self): return self.__str__() + + def __eq__(self, other): + if isinstance(other, LabelOperand): + return self._name == other._name + return False diff --git a/osaca/parser/memory.py b/osaca/parser/memory.py index 9e79c3c..96d812d 100644 --- a/osaca/parser/memory.py +++ b/osaca/parser/memory.py @@ -15,6 +15,7 @@ class MemoryOperand(Operand): pre_indexed=False, post_indexed=False, indexed_val=None, + data_type=None, src=None, dst=None, source=False, @@ -30,6 +31,7 @@ class MemoryOperand(Operand): self._pre_indexed = pre_indexed self._post_indexed = post_indexed self._indexed_val = indexed_val + self._data_type = data_type # type of register we store from (`src`) or load to (`dst`) self._src = src self._dst = dst @@ -74,6 +76,14 @@ class MemoryOperand(Operand): def indexed_val(self): return self._indexed_val + @property + def data_type(self): + return self._data_type + + @data_type.setter + def data_type(self, data_type): + self._data_type = data_type + @property def src(self): return self._src diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index 85a8ad0..7b76b6e 100644 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -13,6 +13,7 @@ from osaca.parser.identifier import IdentifierOperand from osaca.parser.immediate import ImmediateOperand from osaca.parser.condition import ConditionOperand from osaca.parser.prefetch import PrefetchOperand +from osaca.semantics.hw_model import MachineModel class ParserAArch64(BaseParser): @@ -26,7 +27,58 @@ class ParserAArch64(BaseParser): def __init__(self): super().__init__() - self.isa = "aarch64" + + def isa(self): + return "aarch64" + + def start_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=111)] + ), + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"]) + ) + ] + + def end_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=222)] + ), + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"]) + ) + ] + + def normalize_instruction_form( + self, + instruction_form, + isa_model: MachineModel, + arch_model: MachineModel + ): + """ + If the instruction doesn't exist in the machine model, normalize it by dropping the shape + suffix. + """ + if instruction_form.normalized: + return + instruction_form.normalized = True + + mnemonic = instruction_form.mnemonic + if not mnemonic: + return + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if not model: + if "." in mnemonic: + # Check for instruction without shape/cc suffix. + suffix_start = mnemonic.index(".") + mnemonic = mnemonic[:suffix_start] + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if model: + instruction_form.mnemonic = mnemonic def construct_parser(self): """Create parser for ARM AArch64 ISA.""" @@ -589,6 +641,21 @@ class ParserAArch64(BaseParser): name += "[" + str(register.index) + "]" return name + def get_regular_source_operands(self, instruction_form): + """Get source operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume it is a source operand + if len(instruction_form.operands) == 1: + return [instruction_form.operands[0]] + return [op for op in instruction_form.operands[1:]] + + def get_regular_destination_operands(self, instruction_form): + """Get destination operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume no destination + if len(instruction_form.operands) == 1: + return [] + # return first operand + return instruction_form.operands[:1] + def normalize_imd(self, imd): """Normalize immediate to decimal based representation""" if isinstance(imd, IdentifierOperand): diff --git a/osaca/parser/parser_x86.py b/osaca/parser/parser_x86.py new file mode 100644 index 0000000..e9b1837 --- /dev/null +++ b/osaca/parser/parser_x86.py @@ -0,0 +1,123 @@ +import re +import string + +from osaca.parser import BaseParser + + +class ParserX86(BaseParser): + _instance = None + + # Singleton pattern, as this is created very many times. + def __new__(cls): + if cls._instance is None: + cls._instance = super(ParserX86, cls).__new__(cls) + return cls._instance + + def __init__(self): + super().__init__() + + def isa(self): + return "x86" + + def is_reg_dependend_of(self, reg_a, reg_b): + """Check if ``reg_a`` is dependent on ``reg_b``""" + reg_a_name = reg_a.name.upper() + reg_b_name = reg_b.name.upper() + + # Check if they are the same registers + if reg_a_name == reg_b_name: + return True + # Check vector registers first + if self.is_vector_register(reg_a): + if self.is_vector_register(reg_b): + if reg_a_name[1:] == reg_b_name[1:]: + # Registers in the same vector space + return True + return False + # Check basic GPRs + gpr_groups = { + "A": ["RAX", "EAX", "AX", "AH", "AL"], + "B": ["RBX", "EBX", "BX", "BH", "BL"], + "C": ["RCX", "ECX", "CX", "CH", "CL"], + "D": ["RDX", "EDX", "DX", "DH", "DL"], + "SP": ["RSP", "ESP", "SP", "SPL"], + "SRC": ["RSI", "ESI", "SI", "SIL"], + "DST": ["RDI", "EDI", "DI", "DIL"], + } + if self.is_basic_gpr(reg_a): + if self.is_basic_gpr(reg_b): + for dep_group in gpr_groups.values(): + if reg_a_name in dep_group: + if reg_b_name in dep_group: + return True + return False + + # Check other GPRs + ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name) + mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name) + if ma and mb and ma.group(1) == mb.group(1): + return True + + # No dependencies + return False + + def is_basic_gpr(self, register): + """Check if register is a basic general purpose register (ebi, rax, ...)""" + if any(char.isdigit() for char in register.name) or any( + register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"] + ): + return False + return True + + def is_gpr(self, register): + """Check if register is a general purpose register""" + if register is None: + return False + if self.is_basic_gpr(register): + return True + return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE) + + def is_vector_register(self, register): + """Check if register is a vector register""" + if register is None or register.name is None: + return False + if register.name.rstrip(string.digits).lower() in [ + "mm", + "xmm", + "ymm", + "zmm", + ]: + return True + return False + + def get_reg_type(self, register): + """Get register type""" + if register is None: + return False + if self.is_gpr(register): + return "gpr" + elif self.is_vector_register(register): + return register.name.rstrip(string.digits).lower() + raise ValueError + + def is_flag_dependend_of(self, flag_a, flag_b): + """Check if ``flag_a`` is dependent on ``flag_b``""" + # we assume flags are independent of each other, e.g., CF can be read while ZF gets written + # TODO validate this assumption + return flag_a.name == flag_b.name + + def get_regular_source_operands(self, instruction_form): + """Get source operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume it is a source operand + if len(instruction_form.operands) == 1: + return [instruction_form.operands[0]] + # return all but last operand + return [op for op in instruction_form.operands[0:-1]] + + def get_regular_destination_operands(self, instruction_form): + """Get destination operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume no destination + if len(instruction_form.operands) == 1: + return [] + # return last operand + return instruction_form.operands[-1:] diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index c5f0627..d51dad3 100644 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -5,7 +5,7 @@ import re import pyparsing as pp -from osaca.parser import BaseParser +from osaca.parser import ParserX86 from osaca.parser.instruction_form import InstructionForm from osaca.parser.directive import DirectiveOperand from osaca.parser.memory import MemoryOperand @@ -13,10 +13,12 @@ from osaca.parser.label import LabelOperand from osaca.parser.register import RegisterOperand from osaca.parser.identifier import IdentifierOperand from osaca.parser.immediate import ImmediateOperand +from osaca.semantics.hw_model import MachineModel -class ParserX86ATT(BaseParser): +class ParserX86ATT(ParserX86): _instance = None + GAS_SUFFIXES = "bswlqt" # Singelton pattern, as this is created very many times def __new__(cls): @@ -26,7 +28,66 @@ class ParserX86ATT(BaseParser): def __init__(self): super().__init__() - self.isa = "x86" + + def start_marker(self): + return [ + [ + InstructionForm( + mnemonic="mov", + operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")] + ), + InstructionForm( + mnemonic="movl", + operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")] + ) + ], + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"]) + ) + ] + + def end_marker(self): + return [ + [ + InstructionForm( + mnemonic="mov", + operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")] + ), + InstructionForm( + mnemonic="movl", + operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")] + ) + ], + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"]) + ) + ] + + def normalize_instruction_form( + self, + instruction_form, + isa_model: MachineModel, + arch_model: MachineModel + ): + """ + If the instruction doesn't exist in the machine model, normalize it by dropping the GAS + suffix. + """ + if instruction_form.normalized: + return + instruction_form.normalized = True + + mnemonic = instruction_form.mnemonic + if not mnemonic: + return + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if not model: + # Check for instruction without GAS suffix. + if mnemonic[-1] in self.GAS_SUFFIXES: + mnemonic = mnemonic[:-1] + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if model: + instruction_form.mnemonic = mnemonic def construct_parser(self): """Create parser for x86 AT&T ISA.""" @@ -253,10 +314,10 @@ class ParserX86ATT(BaseParser): if result is None: try: result = self.parse_instruction(line) - except pp.ParseException: + except pp.ParseException as e: raise ValueError( "Could not parse instruction on line {}: {!r}".format(line_number, line) - ) + ) from e instruction_form.mnemonic = result.mnemonic instruction_form.operands = result.operands instruction_form.comment = result.comment @@ -393,90 +454,3 @@ class ParserX86ATT(BaseParser): return imd.value # identifier return imd - - def is_flag_dependend_of(self, flag_a, flag_b): - """Check if ``flag_a`` is dependent on ``flag_b``""" - # we assume flags are independent of each other, e.g., CF can be read while ZF gets written - # TODO validate this assumption - return flag_a.name == flag_b.name - - def is_reg_dependend_of(self, reg_a, reg_b): - """Check if ``reg_a`` is dependent on ``reg_b``""" - reg_a_name = reg_a.name.upper() - reg_b_name = reg_b.name.upper() - - # Check if they are the same registers - if reg_a_name == reg_b_name: - return True - # Check vector registers first - if self.is_vector_register(reg_a): - if self.is_vector_register(reg_b): - if reg_a_name[1:] == reg_b_name[1:]: - # Registers in the same vector space - return True - return False - # Check basic GPRs - gpr_groups = { - "A": ["RAX", "EAX", "AX", "AH", "AL"], - "B": ["RBX", "EBX", "BX", "BH", "BL"], - "C": ["RCX", "ECX", "CX", "CH", "CL"], - "D": ["RDX", "EDX", "DX", "DH", "DL"], - "SP": ["RSP", "ESP", "SP", "SPL"], - "SRC": ["RSI", "ESI", "SI", "SIL"], - "DST": ["RDI", "EDI", "DI", "DIL"], - } - if self.is_basic_gpr(reg_a): - if self.is_basic_gpr(reg_b): - for dep_group in gpr_groups.values(): - if reg_a_name in dep_group: - if reg_b_name in dep_group: - return True - return False - - # Check other GPRs - ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name) - mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name) - if ma and mb and ma.group(1) == mb.group(1): - return True - - # No dependencies - return False - - def is_basic_gpr(self, register): - """Check if register is a basic general purpose register (ebi, rax, ...)""" - if any(char.isdigit() for char in register.name) or any( - register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"] - ): - return False - return True - - def is_gpr(self, register): - """Check if register is a general purpose register""" - if register is None: - return False - if self.is_basic_gpr(register): - return True - return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE) - - def is_vector_register(self, register): - """Check if register is a vector register""" - if register is None or register.name is None: - return False - if register.name.rstrip(string.digits).lower() in [ - "mm", - "xmm", - "ymm", - "zmm", - ]: - return True - return False - - def get_reg_type(self, register): - """Get register type""" - if register is None: - return False - if self.is_gpr(register): - return "gpr" - elif self.is_vector_register(register): - return register.name.rstrip(string.digits).lower() - raise ValueError diff --git a/osaca/parser/parser_x86intel.py b/osaca/parser/parser_x86intel.py new file mode 100644 index 0000000..f9a1426 --- /dev/null +++ b/osaca/parser/parser_x86intel.py @@ -0,0 +1,830 @@ +#!/usr/bin/env python3 + +import pyparsing as pp +import re +import string +import unicodedata + +from osaca.parser import ParserX86 +from osaca.parser.directive import DirectiveOperand +from osaca.parser.identifier import IdentifierOperand +from osaca.parser.immediate import ImmediateOperand +from osaca.parser.instruction_form import InstructionForm +from osaca.parser.label import LabelOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.register import RegisterOperand +from osaca.semantics.hw_model import MachineModel + +# We assume any non-ASCII characters except control characters and line terminators can be part of +# identifiers; this is based on the assumption that no assembler uses non-ASCII white space and +# syntax characters. +# This approach is described at the end of https://www.unicode.org/reports/tr55/#Whitespace-Syntax. +# It is appropriate for tools, such as this one, which process source code but do not fully validate +# it (in this case, that’s the job of the assembler). +NON_ASCII_PRINTABLE_CHARACTERS = "".join( + chr(cp) for cp in range(0x80, 0x10FFFF + 1) + if unicodedata.category(chr(cp)) not in ("Cc", "Zl", "Zp", "Cs", "Cn") +) + +# References: +# ASM386 Assembly Language Reference, document number 469165-003, https://mirror.math.princeton.edu/pub/oldlinux/Linux.old/Ref-docs/asm-ref.pdf. +# Microsoft Macro Assembler BNF Grammar, https://learn.microsoft.com/en-us/cpp/assembler/masm/masm-bnf-grammar?view=msvc-170. +# Intel Architecture Code Analyzer User's Guide, https://www.intel.com/content/dam/develop/external/us/en/documents/intel-architecture-code-analyzer-3-0-users-guide-157552.pdf. +class ParserX86Intel(ParserX86): + _instance = None + + # Singleton pattern, as this is created very many times. + def __new__(cls): + if cls._instance is None: + cls._instance = super(ParserX86Intel, cls).__new__(cls) + return cls._instance + + def __init__(self): + super().__init__() + self._equ = {} + + # The IACA manual says: "For For Microsoft* Visual C++ compiler, 64-bit version, use + # IACA_VC64_START and IACA_VC64_END, instead" (of IACA_START and IACA_END). + # TODO: Inconveniently, the code generated with optimization disabled (/Od) has two + # instructions. We should support both patterns, but then who runs OSACA with /Od? + def start_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[ + MemoryOperand( + base=RegisterOperand(name="GS"), + offset=ImmediateOperand(value=111) + ), + ImmediateOperand(value=111) + ] + ), + ] + + def end_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[ + MemoryOperand( + base=RegisterOperand(name="GS"), + offset=ImmediateOperand(value=222) + ), + ImmediateOperand(value=222) + ] + ), + ] + + def normalize_instruction_form( + self, + instruction_form, + isa_model: MachineModel, + arch_model: MachineModel + ): + """ + If the model indicates that this instruction has a single destination that is the last + operand, move the first operand to the last position. This effectively converts the Intel + syntax to the AT&T one. + """ + if instruction_form.normalized: + return + instruction_form.normalized = True + + mnemonic = instruction_form.mnemonic + if not mnemonic: + return + + # The model may only contain the VEX-encoded instruction and we may have the non-VEX-encoded + # one, or vice-versa. Note that this doesn't work when the arguments differ between VEX- + # encoded and non-VEX-encoded, e.g., for psubq. + if not arch_model.get_instruction( + mnemonic, + len(instruction_form.operands) + ): + if mnemonic[0] == 'v': + unvexed_mnemonic = mnemonic[1:] + if arch_model.get_instruction( + unvexed_mnemonic, + len(instruction_form.operands) + ): + mnemonic = unvexed_mnemonic + else: + vexed_mnemonic = 'v' + mnemonic + if arch_model.get_instruction( + vexed_mnemonic, + len(instruction_form.operands) + ): + mnemonic = vexed_mnemonic + instruction_form.mnemonic = mnemonic + + # We cannot pass the operands because they may not match before the reordering. We just + # pass the arity instead. Also, this must use the ISA model, because that's where the + # source/destination information is found. + model = isa_model.get_instruction(mnemonic, len(instruction_form.operands)) + has_single_destination_at_end = False + has_destination = False + if model: + for o in model.operands: + if o.source: + if has_destination: + has_single_destination_at_end = False + if o.destination: + if has_destination: + has_single_destination_at_end = False + else: + has_destination = True + has_single_destination_at_end = True + else: + # if there is only one operand, assume it is a source operand + has_single_destination_at_end = len(instruction_form.operands) > 1 + + if has_single_destination_at_end: + # It is important to reverse the operands, we cannot just move the first one last. This + # makes a difference for instructions with 3 operands or more, such as roundsd: the + # model files expect the rounding mode (an immediate) first but the Intel syntax has it + # last. + instruction_form.operands.reverse() + + # A hack to help with comparison instruction: if the instruction is in the model, and has + # exactly two sources, swap its operands. + if (model and + not has_destination and + len(instruction_form.operands) == 2 + and not isa_model.get_instruction( + mnemonic, + instruction_form.operands + ) and not arch_model.get_instruction( + mnemonic, + instruction_form.operands + )): + instruction_form.operands.reverse() + + # If the instruction has a well-known data type, append a suffix. + data_type_to_suffix = {"DWORD": "d", "QWORD": "q"} + for o in instruction_form.operands: + if isinstance(o, MemoryOperand) and o.data_type: + suffix = data_type_to_suffix.get(o.data_type, None) + if suffix: + suffixed_mnemonic = mnemonic + suffix + if isa_model.get_instruction( + suffixed_mnemonic, + len(instruction_form.operands) + ) or arch_model.get_instruction( + suffixed_mnemonic, + len(instruction_form.operands) + ): + instruction_form.mnemonic = suffixed_mnemonic + break + + + def construct_parser(self): + """Create parser for x86 Intel ISA.""" + # Numeric literal. + binary_number = pp.Combine( + pp.Word("01") + pp.CaselessLiteral("B") + ) + octal_number = pp.Combine( + pp.Word("01234567") + pp.CaselessLiteral("O") + ) + decimal_number = pp.Combine( + pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + ) + hex_number = pp.Combine( + pp.Word(pp.hexnums) + pp.CaselessLiteral("H") + ) + float_number = pp.Combine( + pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + pp.Word(".", pp.nums) + ).setResultsName("value") + integer_number = ( + binary_number ^ octal_number ^ decimal_number ^ hex_number + ).setResultsName("value") + + # Comment. + self.comment = pp.Word(";#", exact=1) + pp.Group( + pp.ZeroOrMore(pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS)) + ).setResultsName(self.comment_id) + + # Types. + data_type = ( + pp.CaselessKeyword("BYTE") + | pp.CaselessKeyword("DWORD") + | pp.CaselessKeyword("FWORD") + | pp.CaselessKeyword("MMWORD") + | pp.CaselessKeyword("OWORD") + | pp.CaselessKeyword("QWORD") + | pp.CaselessKeyword("REAL10") + | pp.CaselessKeyword("REAL4") + | pp.CaselessKeyword("REAL8") + | pp.CaselessKeyword("SBYTE") + | pp.CaselessKeyword("SDWORD") + | pp.CaselessKeyword("SQWORD") + | pp.CaselessKeyword("SWORD") + | pp.CaselessKeyword("TBYTE") + | pp.CaselessKeyword("WORD") + | pp.CaselessKeyword("XMMWORD") + | pp.CaselessKeyword("YMMWORD") + ).setResultsName("data_type") + + # Identifier. Note that $ is not mentioned in the ASM386 Assembly Language Reference, + # but it is mentioned in the MASM syntax. < and > apparently show up in C++ mangled names. + # ICC allows ".", at least in labels. + first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>", exact=1) + rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>") + identifier = pp.Group( + pp.Combine(first + pp.Optional(rest)).setResultsName("name") + ).setResultsName("identifier") + + # Register. + # This follows the MASM grammar. + special_register = ( + pp.CaselessKeyword("CR0") + | pp.CaselessKeyword("CR2") + | pp.CaselessKeyword("CR3") + | pp.CaselessKeyword("DR0") + | pp.CaselessKeyword("DR1") + | pp.CaselessKeyword("DR2") + | pp.CaselessKeyword("DR3") + | pp.CaselessKeyword("DR6") + | pp.CaselessKeyword("DR7") + | pp.CaselessKeyword("TR3") + | pp.CaselessKeyword("TR4") + | pp.CaselessKeyword("TR5") + | pp.CaselessKeyword("TR6") + | pp.CaselessKeyword("TR7") + ).setResultsName("name") + gp_register = ( + pp.CaselessKeyword("AX") + | pp.CaselessKeyword("EAX") + | pp.CaselessKeyword("CX") + | pp.CaselessKeyword("ECX") + | pp.CaselessKeyword("DX") + | pp.CaselessKeyword("EDX") + | pp.CaselessKeyword("BX") + | pp.CaselessKeyword("EBX") + | pp.CaselessKeyword("DI") + | pp.CaselessKeyword("EDI") + | pp.CaselessKeyword("SI") + | pp.CaselessKeyword("ESI") + | pp.CaselessKeyword("BP") + | pp.CaselessKeyword("EBP") + | pp.CaselessKeyword("SP") + | pp.CaselessKeyword("ESP") + | pp.CaselessKeyword("R8W") + | pp.CaselessKeyword("R8D") + | pp.CaselessKeyword("R9W") + | pp.CaselessKeyword("R9D") + | pp.CaselessKeyword("R12D") + | pp.CaselessKeyword("R13W") + | pp.CaselessKeyword("R13D") + | pp.CaselessKeyword("R14W") + | pp.CaselessKeyword("R14D") + ).setResultsName("name") + byte_register = ( + pp.CaselessKeyword("AL") + | pp.CaselessKeyword("AH") + | pp.CaselessKeyword("CL") + | pp.CaselessKeyword("CH") + | pp.CaselessKeyword("DL") + | pp.CaselessKeyword("DH") + | pp.CaselessKeyword("BL") + | pp.CaselessKeyword("BH") + | pp.CaselessKeyword("R8B") + | pp.CaselessKeyword("R9B") + | pp.CaselessKeyword("R10B") + | pp.CaselessKeyword("R11B") + | pp.CaselessKeyword("R12B") + | pp.CaselessKeyword("R13B") + ).setResultsName("name") + qword_register = ( + pp.CaselessKeyword("RAX") + | pp.CaselessKeyword("RCX") + | pp.CaselessKeyword("RDX") + | pp.CaselessKeyword("RBX") + | pp.CaselessKeyword("RSP") + | pp.CaselessKeyword("RBP") + | pp.CaselessKeyword("RSI") + | pp.CaselessKeyword("RDI") + | pp.CaselessKeyword("R8") + | pp.CaselessKeyword("R9") + | pp.CaselessKeyword("R10") + | pp.CaselessKeyword("R11") + | pp.CaselessKeyword("R12") + | pp.CaselessKeyword("R13") + | pp.CaselessKeyword("R14") + | pp.CaselessKeyword("R15") + ).setResultsName("name") + fpu_register = pp.Combine( + pp.CaselessKeyword("ST") + + pp.Optional(pp.Literal("(") + pp.Word("01234567") + pp.Literal(")")) + ).setResultsName("name") + xmm_register = ( + pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums)) + | pp.Combine(pp.CaselessLiteral("XMM1") + pp.Word("012345")) + ) + simd_register = ( + pp.Combine(pp.CaselessLiteral("MM") + pp.Word("01234567")) + | xmm_register + | pp.Combine(pp.CaselessLiteral("YMM") + pp.Word(pp.nums)) + | pp.Combine(pp.CaselessLiteral("YMM1") + pp.Word("012345")) + ).setResultsName("name") + segment_register = ( + pp.CaselessKeyword("CS") + | pp.CaselessKeyword("DS") + | pp.CaselessKeyword("ES") + | pp.CaselessKeyword("FS") + | pp.CaselessKeyword("GS") + | pp.CaselessKeyword("SS") + ).setResultsName("name") + self.register = pp.Group( + special_register + | gp_register + | byte_register + | qword_register + | fpu_register + | simd_register + | segment_register + | pp.CaselessKeyword("RIP") + ).setResultsName(self.register_id) + + # Register expressions. + base_register = self.register + index_register = self.register + scale = pp.Word("1248", exact=1) + post_displacement = pp.Group( + (pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign") + + integer_number | identifier + ).setResultsName(self.immediate_id) + pre_displacement = pp.Group(integer_number + pp.Literal("+") + ).setResultsName(self.immediate_id) + indexed = pp.Group( + index_register.setResultsName("index") + + pp.Optional(pp.Literal("*") + + scale.setResultsName("scale")) + ).setResultsName("indexed") + register_expression = pp.Group( + pp.Literal("[") + + pp.Optional(pp.Group(pre_displacement).setResultsName("pre_displacement")) + + pp.Group( + base_register.setResultsName("base") + ^ pp.Group( + base_register.setResultsName("base") + + pp.Literal("+") + + indexed).setResultsName("base_and_indexed") + ^ indexed + ).setResultsName("non_displacement") + + pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement")) + + pp.Literal("]") + ).setResultsName("register_expression") + + # Immediate. + immediate = pp.Group( + integer_number | float_number | identifier + ).setResultsName(self.immediate_id) + + # Expressions. + # The ASM86 manual has weird expressions on page 130 (displacement outside of the register + # expression, multiple register expressions). Let's ignore those for now, but see + # https://stackoverflow.com/questions/71540754/why-sometimes-use-offset-flatlabel-and-sometimes-not. + address_expression = pp.Group( + self.register.setResultsName("segment") + pp.Literal(":") + immediate + ^ immediate + register_expression + ^ register_expression + ^ identifier + pp.Optional(pp.Literal("+") + immediate) + ).setResultsName("address_expression") + + offset_expression = pp.Group( + pp.CaselessKeyword("OFFSET") + + pp.Group( + pp.CaselessKeyword("GROUP") + | pp.CaselessKeyword("SEGMENT") + | pp.CaselessKeyword("FLAT") + ) + # The MASM grammar has the ":" immediately after "OFFSET", but that's not what MSVC + # outputs. + + pp.Literal(":") + + identifier.setResultsName("identifier") + + pp.Optional(pp.Literal("+") + immediate.setResultsName("displacement")) + ).setResultsName("offset_expression") + ptr_expression = pp.Group( + data_type + pp.CaselessKeyword("PTR") + address_expression + ).setResultsName("ptr_expression") + short_expression = pp.Group( + pp.CaselessKeyword("SHORT") + identifier + ).setResultsName("short_expression") + + # Instructions. + mnemonic = pp.Word( + pp.alphas, pp.alphanums + ).setResultsName("mnemonic") + operand = pp.Group( + self.register + | pp.Group( + offset_expression + | ptr_expression + | short_expression + | address_expression + ).setResultsName(self.memory_id) + | immediate + ) + self.instruction_parser = ( + mnemonic + + pp.Optional(operand.setResultsName("operand1")) + + pp.Optional(pp.Suppress(pp.Literal(","))) + + pp.Optional(operand.setResultsName("operand2")) + + pp.Optional(pp.Suppress(pp.Literal(","))) + + pp.Optional(operand.setResultsName("operand3")) + + pp.Optional(pp.Suppress(pp.Literal(","))) + + pp.Optional(operand.setResultsName("operand4")) + + pp.Optional(self.comment) + ) + + # Label. + self.label = pp.Group( + identifier.setResultsName("name") + + pp.Literal(":") + + pp.Optional(self.instruction_parser) + + pp.Optional(self.comment) + ).setResultsName(self.label_id) + + # Directives. + # The identifiers at the beginnig of a directive cannot start with a "." otherwise we end up + # with ambiguities. + directive_first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + "$?@_<>", exact=1) + directive_rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>") + directive_identifier = pp.Group( + pp.Combine(directive_first + pp.Optional(directive_rest)).setResultsName("name") + ).setResultsName("identifier") + + # Parameter can be any quoted string or sequence of characters besides ';' (for comments) + # or ',' (parameter delimiter). See ASM386 p. 38. + directive_parameter = ( + pp.quotedString + ^ ( + pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS, excludeChars=",;") + + pp.Optional(pp.Suppress(pp.Literal(","))) + ) + ^ pp.Suppress(pp.Literal(",")) + ) + # The directives that don't start with a "." are ambiguous with instructions, so we list + # them explicitly. + # TODO: The directives that are types introduce a nasty ambiguity with instructions. Skip + # them for now, apparently the MSVC output uses the short D? directives. + directive_keywords = ( + pp.CaselessKeyword("ALIAS") + | pp.CaselessKeyword("ALIGN") + | pp.CaselessKeyword("ASSUME") + #| pp.CaselessKeyword("BYTE") + | pp.CaselessKeyword("CATSTR") + | pp.CaselessKeyword("COMM") + | pp.CaselessKeyword("COMMENT") + | pp.CaselessKeyword("DB") + | pp.CaselessKeyword("DD") + | pp.CaselessKeyword("DF") + | pp.CaselessKeyword("DQ") + | pp.CaselessKeyword("DT") + | pp.CaselessKeyword("DW") + #| pp.CaselessKeyword("DWORD") + | pp.CaselessKeyword("ECHO") + | pp.CaselessKeyword("END") + | pp.CaselessKeyword("ENDP") + | pp.CaselessKeyword("ENDS") + | pp.CaselessKeyword("EQU") + | pp.CaselessKeyword("EVEN") + | pp.CaselessKeyword("EXTRN") + | pp.CaselessKeyword("EXTERNDEF") + #| pp.CaselessKeyword("FWORD") + | pp.CaselessKeyword("GROUP") + | pp.CaselessKeyword("INCLUDE") + | pp.CaselessKeyword("INCLUDELIB") + | pp.CaselessKeyword("INSTR") + | pp.CaselessKeyword("INVOKE") + | pp.CaselessKeyword("LABEL") + #| pp.CaselessKeyword("MMWORD") + | pp.CaselessKeyword("OPTION") + | pp.CaselessKeyword("ORG") + | pp.CaselessKeyword("PAGE") + | pp.CaselessKeyword("POPCONTEXT") + | pp.CaselessKeyword("PROC") + | pp.CaselessKeyword("PROTO") + | pp.CaselessKeyword("PUBLIC") + | pp.CaselessKeyword("PUSHCONTEXT") + #| pp.CaselessKeyword("QWORD") + #| pp.CaselessKeyword("REAL10") + #| pp.CaselessKeyword("REAL4") + #| pp.CaselessKeyword("REAL8") + | pp.CaselessKeyword("RECORD") + #| pp.CaselessKeyword("SBYTE") + #| pp.CaselessKeyword("SDWORD") + | pp.CaselessKeyword("SEGMENT") + | pp.CaselessKeyword("SIZESTR") + | pp.CaselessKeyword("STRUCT") + | pp.CaselessKeyword("SUBSTR") + | pp.CaselessKeyword("SUBTITLE") + #| pp.CaselessKeyword("SWORD") + #| pp.CaselessKeyword("TBYTE") + | pp.CaselessKeyword("TEXTEQU") + | pp.CaselessKeyword("TITLE") + | pp.CaselessKeyword("TYPEDEF") + | pp.CaselessKeyword("UNION") + #| pp.CaselessKeyword("WORD") + #| pp.CaselessKeyword("XMMWORD") + #| pp.CaselessKeyword("YMMWORD") + ) + self.directive = pp.Group( + pp.Optional(~directive_keywords + directive_identifier) + + ( + pp.Combine(pp.Literal(".") + pp.Word(pp.alphanums + "_")) + | pp.Literal("=") + | directive_keywords + ).setResultsName("name") + + pp.ZeroOrMore(directive_parameter).setResultsName("parameters") + + pp.Optional(self.comment) + ).setResultsName(self.directive_id) + + def parse_line(self, line, line_number=None): + """ + Parse line and return instruction form. + + :param str line: line of assembly code + :param line_number: default None, identifier of instruction form + :type line_number: int, optional + :return: ``dict`` -- parsed asm line (comment, label, directive or instruction form) + """ + instruction_form = InstructionForm(line=line, line_number=line_number) + result = None + + # 1. Parse comment. + try: + result = self.process_operand(self.comment.parseString(line, parseAll=True)) + instruction_form.comment = " ".join(result[self.comment_id]) + except pp.ParseException: + pass + + # 2. Parse label. + if not result: + try: + # Returns tuple with label operand and comment, if any. + result = self.process_operand(self.label.parseString(line, parseAll=True)) + instruction_form.label = result[0].name + if result[1]: + instruction_form.comment = " ".join(result[1]) + except pp.ParseException: + pass + + # 3. Parse directive. + if not result: + try: + # Returns tuple with directive operand and comment, if any. + result = self.process_operand(self.directive.parseString(line, parseAll=True)) + instruction_form.directive = result[0] + if result[1]: + instruction_form.comment = " ".join(result[1]) + except pp.ParseException: + pass + + # 4. Parse instruction. + if not result: + try: + result = self.parse_instruction(line) + except pp.ParseException as e: + raise ValueError( + "Could not parse instruction on line {}: {!r}".format(line_number, line) + ) from e + instruction_form.mnemonic = result.mnemonic + instruction_form.operands = result.operands + instruction_form.comment = result.comment + return instruction_form + + def make_instruction(self, parse_result): + """ + Parse instruction in asm line. + + :param parse_result: tuple resulting from calling `parseString` on the `instruction_parser`. + :returns: `dict` -- parsed instruction form + """ + operands = [] + # Add operands to list + # Check first operand + if "operand1" in parse_result: + operands.append(self.process_operand(parse_result.operand1)) + # Check second operand + if "operand2" in parse_result: + operands.append(self.process_operand(parse_result.operand2)) + # Check third operand + if "operand3" in parse_result: + operands.append(self.process_operand(parse_result.operand3)) + # Check fourth operand + if "operand4" in parse_result: + operands.append(self.process_operand(parse_result.operand4)) + return_dict = InstructionForm( + mnemonic=parse_result.mnemonic, + operands=operands, + label_id=None, + comment_id=" ".join(parse_result[self.comment_id]) + if self.comment_id in parse_result else None, + ) + + return return_dict + + def parse_instruction(self, instruction): + """ + Parse instruction in asm line. + + :param str instruction: Assembly line string. + :returns: `dict` -- parsed instruction form + """ + return self.make_instruction( + self.instruction_parser.parseString(instruction, parseAll=True) + ) + + def parse_register(self, register_string): + """Parse register string""" + try: + return self.process_operand( + self.register.parseString(register_string, parseAll=True) + ) + except pp.ParseException: + return None + + def process_operand(self, operand): + """Post-process operand""" + if self.directive_id in operand: + return self.process_directive(operand[self.directive_id]) + if self.identifier in operand: + return self.process_identifier(operand[self.identifier]) + if self.immediate_id in operand: + return self.process_immediate(operand[self.immediate_id]) + if self.label_id in operand: + return self.process_label(operand[self.label_id]) + if self.memory_id in operand: + return self.process_memory_address(operand[self.memory_id]) + if self.register_id in operand: + return self.process_register(operand[self.register_id]) + return operand + + def process_directive(self, directive): + # TODO: This is putting the identifier in the parameters. No idea if it's right. + parameters = [directive.identifier.name] if "identifier" in directive else [] + parameters.extend(directive.parameters) + directive_new = DirectiveOperand( + name=directive.name, + parameters=parameters or None + ) + # Interpret the "=" directives because the generated assembly is full of symbols that are + # defined there. + if directive.name == "=": + self._equ[parameters[0]] = parameters[1] + return directive_new, directive.get("comment") + + def process_register(self, operand): + return RegisterOperand(name=operand.name) + + def process_register_expression(self, register_expression): + pre_displacement = register_expression.get("pre_displacement") + post_displacement = register_expression.get("post_displacement") + non_displacement = register_expression.get("non_displacement") + base = None + indexed = None + if non_displacement: + base_and_indexed = non_displacement.get("base_and_indexed") + if base_and_indexed: + base = base_and_indexed.get("base") + indexed = base_and_indexed.get("indexed") + else: + base = non_displacement.get("base") + if not base: + indexed = non_displacement.get("indexed") + if indexed: + index = indexed.get("index") + scale = int(indexed.get("scale", "1"), 0) + else: + index = None + scale = 1 + displacement_op = ( + self.process_immediate(pre_displacement.immediate) if pre_displacement else None + ) + displacement_op = ( + self.process_immediate(post_displacement.immediate) + if post_displacement else displacement_op + ) + base_op = RegisterOperand(name=base.name) if base else None + index_op = RegisterOperand(name=index.name) if index else None + new_memory = MemoryOperand(offset=displacement_op, base=base_op, index=index_op, scale=scale) + return new_memory + + def process_address_expression(self, address_expression, data_type=None): + # TODO: It seems that we could have a prefix immediate operand, a displacement in the + # brackets, and an offset. How all of this works together is somewhat mysterious. + immediate_operand = ( + self.process_immediate(address_expression.immediate) + if "immediate" in address_expression else None + ) + register_expression = ( + self.process_register_expression(address_expression.register_expression) + if "register_expression" in address_expression else None + ) + segment = ( + self.process_register(address_expression.segment) + if "segment" in address_expression else None + ) + identifier = ( + self.process_identifier(address_expression.identifier) + if "identifier" in address_expression else None + ) + if register_expression: + if immediate_operand: + register_expression.offset = immediate_operand + if data_type: + register_expression.data_type = data_type + return register_expression + elif segment: + return MemoryOperand(base=segment, offset=immediate_operand, data_type=data_type) + elif identifier: + if immediate_operand: + identifier.offset = immediate_operand + elif not data_type: + # An address expression without a data type or an offset is just an identifier. + # This matters for jumps. + return identifier + return MemoryOperand(offset=identifier, data_type=data_type) + else: + return MemoryOperand(base=immediate_operand, data_type=data_type) + + def process_offset_expression(self, offset_expression): + # TODO: Record that this is an offset expression. + displacement = ( + self.process_immediate(offset_expression.displacement) + if "displacement" in offset_expression else None + ) + identifier = self.process_identifier(offset_expression.identifier) + identifier.offset = displacement + return MemoryOperand(offset=identifier) + + def process_ptr_expression(self, ptr_expression): + # TODO: Do something with the data_type. + return self.process_address_expression( + ptr_expression.address_expression, + ptr_expression.data_type + ) + + def process_short_expression(self, short_expression): + # TODO: Do something with the fact that it is short. + return LabelOperand(name=short_expression.identifier.name) + + def process_memory_address(self, memory_address): + """Post-process memory address operand""" + if "address_expression" in memory_address: + return self.process_address_expression(memory_address.address_expression) + elif "offset_expression" in memory_address: + return self.process_offset_expression(memory_address.offset_expression) + elif "ptr_expression" in memory_address: + return self.process_ptr_expression(memory_address.ptr_expression) + elif "short_expression" in memory_address: + return self.process_short_expression(memory_address.short_expression) + return memory_address + + def process_label(self, label): + """Post-process label asm line""" + # Remove duplicated 'name' level due to identifier. Note that there is no place to put the + # comment, if any. + label["name"] = label["name"]["name"] + return (LabelOperand(name=label.name), + self.make_instruction(label) if "mnemonic" in label else None) + + def process_immediate(self, immediate): + """Post-process immediate operand""" + if "identifier" in immediate: + # Actually an identifier, change declaration. + return self.process_identifier(immediate.identifier) + new_immediate = ImmediateOperand(value=immediate.get("sign", "") + immediate.value) + new_immediate.value = self.normalize_imd(new_immediate) + return new_immediate + + def process_identifier(self, identifier): + if identifier.name in self._equ: + # Actually an immediate, change declaration. + new_immediate = ImmediateOperand( + identifier=identifier.name, + value=self._equ[identifier.name] + ) + new_immediate.value = self.normalize_imd(new_immediate) + return new_immediate + return IdentifierOperand(name=identifier.name) + + def normalize_imd(self, imd): + """Normalize immediate to decimal based representation""" + if isinstance(imd.value, str): + if '.' in imd.value: + return float(imd.value) + # Now parse depending on the base. + base = {'B': 2, 'O': 8, 'H': 16}.get(imd.value[-1], 10) + value = 0 + negative = imd.value[0] == '-' + positive = imd.value[0] == '+' + start = +(negative or positive) + stop = len(imd.value) if base == 10 else -1 + for c in imd.value[start:stop]: + value = value * base + int(c, base) + return -value if negative else value + else: + return imd.value diff --git a/osaca/semantics/arch_semantics.py b/osaca/semantics/arch_semantics.py index e87e5e7..5e485d2 100644 --- a/osaca/semantics/arch_semantics.py +++ b/osaca/semantics/arch_semantics.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """Semantics opbject responsible for architecture specific semantic operations""" +from dis import Instruction import sys import warnings from itertools import chain @@ -14,12 +15,24 @@ from osaca.parser.register import RegisterOperand class ArchSemantics(ISASemantics): - GAS_SUFFIXES = "bswlqt" - - def __init__(self, machine_model: MachineModel, path_to_yaml=None): - super().__init__(machine_model.get_ISA().lower(), path_to_yaml=path_to_yaml) + def __init__(self, parser, machine_model: MachineModel, path_to_yaml=None): + super().__init__(parser, path_to_yaml=path_to_yaml) self._machine_model = machine_model - self._isa = machine_model.get_ISA().lower() + + def normalize_instruction_form(self, instruction_form): + self.parser.normalize_instruction_form( + instruction_form, + self.isa_model, + self._machine_model + ) + + def normalize_instruction_forms(self, instruction_forms): + for instruction_form in instruction_forms: + self.normalize_instruction_form(instruction_form) + + def _check_normalized(self, instruction_forms): + for instruction_form in instruction_forms: + instruction_form.check_normalized() # SUMMARY FUNCTION def add_semantics(self, kernel): @@ -29,6 +42,7 @@ class ArchSemantics(ISASemantics): :param list kernel: kernel to apply semantics """ + self._check_normalized(kernel) for instruction_form in kernel: self.assign_src_dst(instruction_form) self.assign_tp_lt(instruction_form) @@ -41,6 +55,7 @@ class ArchSemantics(ISASemantics): :param list kernel: kernel to apply optimal port utilization """ + self._check_normalized(kernel) INC = 0.01 kernel.reverse() port_list = self._machine_model.get_ports() @@ -137,6 +152,7 @@ class ArchSemantics(ISASemantics): def set_hidden_loads(self, kernel): """Hide loads behind stores if architecture supports hidden loads (depricated)""" + self._check_normalized(kernel) loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr.flags] stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr.flags] # Filter instructions including load and store @@ -176,6 +192,7 @@ class ArchSemantics(ISASemantics): # mark instruction form with semantic flags def assign_tp_lt(self, instruction_form): """Assign throughput and latency to an instruction form.""" + instruction_form.check_normalized() flags = [] port_number = len(self._machine_model["ports"]) if instruction_form.mnemonic is None: @@ -189,25 +206,6 @@ class ArchSemantics(ISASemantics): instruction_data = self._machine_model.get_instruction( instruction_form.mnemonic, instruction_form.operands ) - if ( - not instruction_data - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # check for instruction without GAS suffix - instruction_data = self._machine_model.get_instruction( - instruction_form.mnemonic[:-1], instruction_form.operands - ) - if ( - instruction_data is None - and self._isa == "aarch64" - and "." in instruction_form.mnemonic - ): - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - instruction_data = self._machine_model.get_instruction( - instruction_form.mnemonic[:suffix_start], instruction_form.operands - ) if instruction_data: # instruction form in DB ( @@ -232,25 +230,6 @@ class ArchSemantics(ISASemantics): instruction_data_reg = self._machine_model.get_instruction( instruction_form.mnemonic, operands ) - if ( - not instruction_data_reg - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # check for instruction without GAS suffix - instruction_data_reg = self._machine_model.get_instruction( - instruction_form.mnemonic[:-1], operands - ) - if ( - instruction_data_reg is None - and self._isa == "aarch64" - and "." in instruction_form.mnemonic - ): - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - instruction_data_reg = self._machine_model.get_instruction( - instruction_form.mnemonic[:suffix_start], operands - ) if instruction_data_reg: assign_unknown = False reg_type = self._parser.get_reg_type( @@ -310,7 +289,7 @@ class ArchSemantics(ISASemantics): # - all mem operands in src_dst are pre-/post_indexed # since it is no mem store if ( - self._isa == "aarch64" + self._parser.isa() == "aarch64" and not isinstance( instruction_form.semantic_operands["destination"], MemoryOperand, @@ -406,6 +385,7 @@ class ArchSemantics(ISASemantics): def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags): """Apply performance data to instruction if it was found in the archDB""" + instruction_form.check_normalized() throughput = instruction_data.throughput port_pressure = self._machine_model.average_port_pressure(instruction_data.port_pressure) instruction_form.port_uops = instruction_data.port_pressure @@ -441,12 +421,12 @@ class ArchSemantics(ISASemantics): def convert_op_to_reg(self, reg_type, regtype="0"): """Create register operand for a memory addressing operand""" - if self._isa == "x86": + if self._parser.isa() == "x86": if reg_type == "gpr": register = RegisterOperand(name="r" + str(int(regtype) + 9)) else: register = RegisterOperand(name=reg_type + regtype) - elif self._isa == "aarch64": + elif self._parser.isa() == "aarch64": register = RegisterOperand(name=regtype, prefix=reg_type) return register diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 45f9f2b..5befd52 100644 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -11,7 +11,6 @@ from pathlib import Path import ruamel.yaml from osaca import __version__, utils -from osaca.parser import ParserX86ATT from osaca.parser.instruction_form import InstructionForm from osaca.parser.operand import Operand from osaca.parser.memory import MemoryOperand @@ -79,7 +78,7 @@ class MachineModel(object): else: yaml = self._create_yaml_object() # otherwise load - with open(self._path, "r") as f: + with open(self._path, "r", encoding="utf8") as f: if not lazy: self._data = yaml.load(f) else: @@ -286,23 +285,38 @@ class MachineModel(object): ###################################################### def get_instruction(self, name, operands): - """Find and return instruction data from name and operands.""" + """Find and return instruction data from name and operands/arity.""" # For use with dict instead of list as DB if name is None: return None name_matched_iforms = self._data["instruction_forms_dict"].get(name.upper(), []) try: - return next( - instruction_form - for instruction_form in name_matched_iforms - if self._match_operands( - instruction_form.operands, - operands, + # If `operands` is an integer, it represents the arity of the instruction. This is + # useful to reorder the operands in the Intel syntax because in their original order + # they may not match the model. + if isinstance(operands, int): + arity = operands + return next( + ( + instruction_form + for instruction_form in name_matched_iforms + if len(instruction_form.operands) == arity + ), + None + ) + else: + return next( + ( + instruction_form + for instruction_form in name_matched_iforms + if self._match_operands( + instruction_form.operands, + operands + ) + ), + None ) - ) - except StopIteration: - return None except TypeError as e: print("\nname: {}\noperands: {}".format(name, operands)) raise TypeError from e @@ -878,6 +892,7 @@ class MachineModel(object): return True def _is_x86_reg_type(self, i_reg, reg, consider_masking=False): + from osaca.parser import ParserX86 """Check if register type match.""" if reg is None: if i_reg is None: @@ -895,7 +910,7 @@ class MachineModel(object): if i_reg_name == self.WILDCARD or reg.name == self.WILDCARD: return True # differentiate between vector registers (mm, xmm, ymm, zmm) and others (gpr) - parser_x86 = ParserX86ATT() + parser_x86 = ParserX86() if parser_x86.is_vector_register(reg): if reg.name.rstrip(string.digits).lower() == i_reg_name: # Consider masking and zeroing for AVX512 diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index a84602d..40ba118 100644 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -2,7 +2,6 @@ from itertools import chain from osaca import utils -from osaca.parser import ParserAArch64, ParserX86ATT from osaca.parser.memory import MemoryOperand from osaca.parser.operand import Operand from osaca.parser.register import RegisterOperand @@ -26,20 +25,23 @@ class INSTR_FLAGS: class ISASemantics(object): - GAS_SUFFIXES = "bswlqt" - - def __init__(self, isa, path_to_yaml=None): - self._isa = isa.lower() - path = path_to_yaml or utils.find_datafile("isa/" + self._isa + ".yml") + def __init__(self, parser, path_to_yaml=None): + path = path_to_yaml or utils.find_datafile("isa/" + parser.isa() + ".yml") self._isa_model = MachineModel(path_to_yaml=path) - if self._isa == "x86": - self._parser = ParserX86ATT() - elif self._isa == "aarch64": - self._parser = ParserAArch64() + self._parser = parser + + @property + def parser(self): + return self._parser + + @property + def isa_model(self): + return self._isa_model def process(self, instruction_forms): """Process a list of instruction forms.""" for i in instruction_forms: + i.check_normalized() self.assign_src_dst(i) # get ;parser result and assign operands to @@ -48,6 +50,7 @@ class ISASemantics(object): # - source/destination def assign_src_dst(self, instruction_form): """Update instruction form dictionary with source, destination and flag information.""" + instruction_form.check_normalized() # if the instruction form doesn't have operands or is None, there's nothing to do if instruction_form.operands is None or instruction_form.mnemonic is None: instruction_form.semantic_operands = {"source": [], "destination": [], "src_dst": []} @@ -57,21 +60,6 @@ class ISASemantics(object): isa_data = self._isa_model.get_instruction( instruction_form.mnemonic, instruction_form.operands ) - if ( - isa_data is None - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # Check for instruction without GAS suffix - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:-1], instruction_form.operands - ) - if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic: - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:suffix_start], instruction_form.operands - ) operands = instruction_form.operands op_dict = {} @@ -88,36 +76,17 @@ class ISASemantics(object): isa_data_reg = self._isa_model.get_instruction( instruction_form.mnemonic, operands_reg ) - if ( - isa_data_reg is None - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # Check for instruction without GAS suffix - isa_data_reg = self._isa_model.get_instruction( - instruction_form.mnemonic[:-1], operands_reg - ) - if ( - isa_data_reg is None - and self._isa == "aarch64" - and "." in instruction_form.mnemonic - ): - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - isa_data_reg = self._isa_model.get_instruction( - instruction_form.mnemonic[:suffix_start], operands_reg - ) if isa_data_reg: assign_default = False op_dict = self._apply_found_ISA_data(isa_data_reg, operands) if assign_default: # no irregular operand structure, apply default - op_dict["source"] = self._get_regular_source_operands(instruction_form) - op_dict["destination"] = self._get_regular_destination_operands(instruction_form) + op_dict["source"] = self._parser.get_regular_source_operands(instruction_form) + op_dict["destination"] = self._parser.get_regular_destination_operands(instruction_form) op_dict["src_dst"] = [] # post-process pre- and post-indexing for aarch64 memory operands - if self._isa == "aarch64": + if self._parser.isa() == "aarch64": for operand in [op for op in op_dict["source"] if isinstance(op, MemoryOperand)]: post_indexed = operand.post_indexed pre_indexed = operand.pre_indexed @@ -161,6 +130,7 @@ class ISASemantics(object): Empty dict if no changes of registers occured. None for registers with unknown changes. If only_postindexed is True, only considers changes due to post_indexed memory references. """ + instruction_form.check_normalized() if instruction_form.mnemonic is None: return {} dest_reg_names = [ @@ -174,21 +144,6 @@ class ISASemantics(object): isa_data = self._isa_model.get_instruction( instruction_form.mnemonic, instruction_form.operands ) - if ( - isa_data is None - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # Check for instruction without GAS suffix - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:-1], instruction_form.operands - ) - if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic: - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:suffix_start], instruction_form.operands - ) if only_postindexed: for o in instruction_form.operands: @@ -301,6 +256,7 @@ class ISASemantics(object): def _has_load(self, instruction_form): """Check if instruction form performs a LOAD""" + instruction_form.check_normalized() for operand in chain( instruction_form.semantic_operands["source"], instruction_form.semantic_operands["src_dst"], @@ -311,6 +267,7 @@ class ISASemantics(object): def _has_store(self, instruction_form): """Check if instruction form perfroms a STORE""" + instruction_form.check_normalized() for operand in chain( instruction_form.semantic_operands["destination"], instruction_form.semantic_operands["src_dst"], @@ -319,33 +276,6 @@ class ISASemantics(object): return True return False - def _get_regular_source_operands(self, instruction_form): - """Get source operand of given instruction form assuming regular src/dst behavior.""" - # if there is only one operand, assume it is a source operand - if len(instruction_form.operands) == 1: - return [instruction_form.operands[0]] - if self._isa == "x86": - # return all but last operand - return [op for op in instruction_form.operands[0:-1]] - elif self._isa == "aarch64": - return [op for op in instruction_form.operands[1:]] - else: - raise ValueError("Unsupported ISA {}.".format(self._isa)) - - def _get_regular_destination_operands(self, instruction_form): - """Get destination operand of given instruction form assuming regular src/dst behavior.""" - # if there is only one operand, assume no destination - if len(instruction_form.operands) == 1: - return [] - if self._isa == "x86": - # return last operand - return instruction_form.operands[-1:] - if self._isa == "aarch64": - # return first operand - return instruction_form.operands[:1] - else: - raise ValueError("Unsupported ISA {}.".format(self._isa)) - def substitute_mem_address(self, operands): """Create memory wildcard for all memory operands""" return [ diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index c9d64a5..d8d48ac 100644 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -38,7 +38,8 @@ class KernelDG(nx.DiGraph): self.kernel, timeout, flag_dependencies ) - def _extend_path(self, dst_list, kernel, dg, offset): + @classmethod + def _extend_path(cls, dst_list, kernel, dg, offset): for instr in kernel: generator_path = nx.algorithms.simple_paths.all_simple_paths( dg, instr.line_number, instr.line_number + offset @@ -138,7 +139,7 @@ class KernelDG(nx.DiGraph): all_paths = manager.list() processes = [ Process( - target=self._extend_path, + target=KernelDG._extend_path, args=(all_paths, instr_section, dg, offset), ) for instr_section in instrs @@ -164,9 +165,7 @@ class KernelDG(nx.DiGraph): # terminate running processes for p in processes: if p.is_alive(): - # Python 3.6 does not support Process.kill(). - # Can be changed to `p.kill()` after EoL (01/22) of Py3.6 - os.kill(p.pid, signal.SIGKILL) + p.kill() p.join() all_paths = list(all_paths) else: @@ -186,11 +185,11 @@ class KernelDG(nx.DiGraph): for s, d in nx.utils.pairwise(path): edge_lat = dg.edges[s, d]["latency"] # map source node back to original line numbers - if s >= offset: + if s > offset: s -= offset lat_path.append((s, edge_lat)) lat_sum += edge_lat - if d >= offset: + if d > offset: d -= offset lat_path.sort() @@ -413,7 +412,7 @@ class KernelDG(nx.DiGraph): addr_change = 0 if isinstance(src.offset, ImmediateOperand) and src.offset.value is not None: addr_change += src.offset.value - if mem.offset: + if isinstance(mem.offset, ImmediateOperand) and mem.offset.value is not None: addr_change -= mem.offset.value if mem.base and src.base: base_change = register_changes.get( diff --git a/osaca/semantics/marker_utils.py b/osaca/semantics/marker_utils.py index 5f2eb4a..1de6a87 100644 --- a/osaca/semantics/marker_utils.py +++ b/osaca/semantics/marker_utils.py @@ -1,29 +1,36 @@ #!/usr/bin/env python3 from collections import OrderedDict +from enum import Enum +from functools import partial -from osaca.parser import ParserAArch64, ParserX86ATT, get_parser -from osaca.parser.register import RegisterOperand +from osaca.parser.instruction_form import InstructionForm +from osaca.parser.directive import DirectiveOperand from osaca.parser.identifier import IdentifierOperand from osaca.parser.immediate import ImmediateOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.register import RegisterOperand COMMENT_MARKER = {"start": "OSACA-BEGIN", "end": "OSACA-END"} +# State of marker matching. +# No: we have determined that the code doesn't match the marker. +# Partial: so far the code matches the marker, but we have not reached the end of the marker yet. +# Full: the code matches all instructions in the marker. +class Matching(Enum): + No = 0 + Partial = 1 + Full = 2 -def reduce_to_section(kernel, isa): + +def reduce_to_section(kernel, parser): """ Finds OSACA markers in given kernel and returns marked section :param list kernel: kernel to check - :param str isa: ISA of given kernel + :param BaseParser parser: parser used to produce the kernel :returns: `list` -- marked section of kernel as list of instruction forms """ - isa = isa.lower() - if isa == "x86": - start, end = find_marked_kernel_x86ATT(kernel) - elif isa == "aarch64": - start, end = find_marked_kernel_AArch64(kernel) - else: - raise ValueError("ISA not supported.") + start, end = find_marked_section(kernel, parser, COMMENT_MARKER) if start == -1: start = 0 if end == -1: @@ -31,109 +38,21 @@ def reduce_to_section(kernel, isa): return kernel[start:end] -def find_marked_kernel_AArch64(lines): - """ - Find marked section for AArch64 - - :param list lines: kernel - :returns: `tuple of int` -- start and end line of marked section - """ - nop_bytes = [213, 3, 32, 31] - return find_marked_section( - lines, - ParserAArch64(), - ["mov"], - "x1", - [111, 222], - nop_bytes, - reverse=True, - comments=COMMENT_MARKER, - ) - - -def find_marked_kernel_x86ATT(lines): - """ - Find marked section for x86 - - :param list lines: kernel - :returns: `tuple of int` -- start and end line of marked section - """ - nop_bytes = [100, 103, 144] - return find_marked_section( - lines, - ParserX86ATT(), - ["mov", "movl"], - "ebx", - [111, 222], - nop_bytes, - comments=COMMENT_MARKER, - ) - - -def get_marker(isa, comment=""): - """Return tuple of start and end marker lines.""" - isa = isa.lower() - if isa == "x86": - start_marker_raw = ( - "movl $111, %ebx # OSACA START MARKER\n" - ".byte 100 # OSACA START MARKER\n" - ".byte 103 # OSACA START MARKER\n" - ".byte 144 # OSACA START MARKER\n" - ) - if comment: - start_marker_raw += "# {}\n".format(comment) - end_marker_raw = ( - "movl $222, %ebx # OSACA END MARKER\n" - ".byte 100 # OSACA END MARKER\n" - ".byte 103 # OSACA END MARKER\n" - ".byte 144 # OSACA END MARKER\n" - ) - elif isa == "aarch64": - start_marker_raw = ( - "mov x1, #111 // OSACA START MARKER\n" - ".byte 213,3,32,31 // OSACA START MARKER\n" - ) - if comment: - start_marker_raw += "// {}\n".format(comment) - # After loop - end_marker_raw = ( - "mov x1, #222 // OSACA END MARKER\n" - ".byte 213,3,32,31 // OSACA END MARKER\n" - ) - - parser = get_parser(isa) - start_marker = parser.parse_file(start_marker_raw) - end_marker = parser.parse_file(end_marker_raw) - - return start_marker, end_marker - - -def find_marked_section( - lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes, reverse=False, comments=None -): +def find_marked_section(lines, parser, comments=None): """ Return indexes of marked section :param list lines: kernel :param parser: parser to use for checking :type parser: :class:`~parser.BaseParser` - :param mov_instr: all MOV instruction possible for the marker - :type mov_instr: `list of str` - :param mov_reg: register used for the marker - :type mov_reg: `str` - :param mov_vals: values needed to be moved to ``mov_reg`` for valid marker - :type mov_vals: `list of int` - :param nop_bytes: bytes representing opcode of NOP - :type nop_bytes: `list of int` - :param reverse: indicating if ISA syntax requires reverse operand order, defaults to `False` - :type reverse: boolean, optional :param comments: dictionary with start and end markers in comment format, defaults to None :type comments: dict, optional :returns: `tuple of int` -- start and end line of marked section """ - # TODO match to instructions returned by get_marker index_start = -1 index_end = -1 + start_marker = parser.start_marker() + end_marker = parser.end_marker() for i, line in enumerate(lines): try: if line.mnemonic is None and comments is not None and line.comment is not None: @@ -141,59 +60,151 @@ def find_marked_section( index_start = i + 1 elif comments["end"] == line.comment: index_end = i - elif ( - line.mnemonic in mov_instr - and len(lines) > i + 1 - and lines[i + 1].directive is not None - ): - source = line.operands[0 if not reverse else 1] - destination = line.operands[1 if not reverse else 0] - # instruction pair matches, check for operands - if ( - isinstance(source, ImmediateOperand) - and parser.normalize_imd(source) == mov_vals[0] - and isinstance(destination, RegisterOperand) - and parser.get_full_reg_name(destination) == mov_reg - ): - # operands of first instruction match start, check for second one - match, line_count = match_bytes(lines, i + 1, nop_bytes) - if match: - # return first line after the marker - index_start = i + 1 + line_count - elif ( - isinstance(source, ImmediateOperand) - and parser.normalize_imd(source) == mov_vals[1] - and isinstance(destination, RegisterOperand) - and parser.get_full_reg_name(destination) == mov_reg - ): - # operand of first instruction match end, check for second one - match, line_count = match_bytes(lines, i + 1, nop_bytes) - if match: - # return line of the marker - index_end = i - except TypeError: - print(i, line) + if index_start == -1: + matching_lines = match_lines(parser, lines[i:], start_marker) + if matching_lines > 0: + # Return the first line after the marker. + index_start = i + matching_lines + if index_end == -1: + if match_lines(parser, lines[i:], end_marker): + index_end = i + except TypeError as e: + print(i, e, line) if index_start != -1 and index_end != -1: break return index_start, index_end -def match_bytes(lines, index, byte_list): - """Match bytes directives of markers""" - # either all bytes are in one line or in separate ones - extracted_bytes = [] - line_count = 0 - while ( - index < len(lines) - and lines[index].directive is not None - and lines[index].directive.name == "byte" +# This function and the following ones traverse the syntactic tree produced by the parser and try to +# match it to the marker. This is necessary because the IACA markers are significantly different on +# MSVC x86 than on other ISA/compilers. Therefore, simple string matching is not sufficient. Also, +# the syntax of numeric literals depends on the parser and should not be known to this class. +# The matching only checks for a limited number of properties (and the marker doesn't specify the +# rest). +def match_lines(parser, lines, marker): + """ + Returns True iff the `lines` match the `marker`. + + :param list of `InstructionForm` lines: parsed assembly code. + :param list of `InstructionForm` marker: pattern to match against the `lines`. + :return int: the length of the match in the parsed code, 0 if there is no match. + """ + marker_iter = iter(marker) + marker_line = next(marker_iter) + for matched_lines, line in enumerate(lines): + if isinstance(marker_line, list): + # No support for partial matching in lists. + for marker_alternative in marker_line: + matching = match_line(parser, line, marker_alternative) + if matching == Matching.Full: + break + else: + return 0 + marker_line = next(marker_iter, None) + else: + matching = match_line(parser, line, marker_line) + if matching == Matching.No: + return 0 + elif matching == Matching.Partial: + # Try the same marker line again. The call to `match_line` consumed some of the + # directive parameters. + pass + elif matching == Matching.Full: + # Move to the next marker line, the current one has been fully matched. + marker_line = next(marker_iter, None) + # If we have reached the last marker line, the parsed code matches the marker. + if not marker_line: + return matched_lines + 1 + +def match_line(parser, line, marker_line): + """ + Returns whether `line` matches `marker_line`. + + :param `IntructionForm` line: parsed assembly code. + :param marker_line `InstructionForm` marker: pattern to match against `line`. + :return: Matching. In case of partial match, `marker_line` is modified and should be reused for + matching the next line in the parsed assembly code. + """ + if ( + line.mnemonic + and marker_line.mnemonic + and line.mnemonic == marker_line.mnemonic + and match_operands(line.operands, marker_line.operands) ): - line_count += 1 - extracted_bytes += [int(x, 0) for x in lines[index].directive.parameters] - index += 1 - if extracted_bytes[0 : len(byte_list)] == byte_list: - return True, line_count - return False, -1 + return Matching.Full + if ( + line.directive + and marker_line.directive + and line.directive.name == marker_line.directive.name + ): + return match_parameters(parser, line.directive.parameters, marker_line.directive.parameters) + else: + return Matching.No + +def match_operands(line_operands, marker_line_operands): + if len(line_operands) != len(marker_line_operands): + return False + return all( + match_operand(line_operand, marker_line_operand) + for line_operand, marker_line_operand in + zip(line_operands, marker_line_operands) + ) + +def match_operand(line_operand, marker_line_operand): + if ( + isinstance(line_operand, ImmediateOperand) + and isinstance(marker_line_operand, ImmediateOperand) + and line_operand.value == marker_line_operand.value + ): + return True + if ( + isinstance(line_operand, RegisterOperand) + and isinstance(marker_line_operand, RegisterOperand) + and line_operand.name.lower() == marker_line_operand.name.lower() + ): + return True + if ( + isinstance(line_operand, MemoryOperand) + and isinstance(marker_line_operand, MemoryOperand) + and match_operand(line_operand.base, marker_line_operand.base) + and match_operand(line_operand.offset, line_operand.offset) + ): + return True + return False + +def match_parameters(parser, line_parameters, marker_line_parameters): + """ + Returns whether `line_parameters` matches `marker_line_parameters`. + + :param list of strings line_parameters: parameters of a directive in the parsed assembly code. + :param list of strings marker_line_parameters: parameters of a directive in the marker. + :return: Matching. In case of partial match, `marker_line_parameters` is modified and should be + reused for matching the next line in the parsed assembly code. + """ + line_parameter_count = len(line_parameters) + marker_line_parameter_count = len(marker_line_parameters) + + # The elements of `marker_line_parameters` are consumed as they are matched. + for line_parameter in line_parameters: + if not marker_line_parameters: + break; + marker_line_parameter = marker_line_parameters[0] + if not match_parameter(parser, line_parameter, marker_line_parameter): + return Matching.No + marker_line_parameters.pop(0) + if marker_line_parameters: + return Matching.Partial + else: + return Matching.Full + +def match_parameter(parser, line_parameter, marker_line_parameter): + if line_parameter.lower() == marker_line_parameter.lower(): + return True + else: + # If the parameters don't match verbatim, check if they represent the same immediate value. + line_immediate = ImmediateOperand(value=line_parameter) + marker_line_immediate = ImmediateOperand(value=marker_line_parameter) + return parser.normalize_imd(line_immediate) == parser.normalize_imd(marker_line_immediate) def find_jump_labels(lines): diff --git a/tests/test_base_parser.py b/tests/test_base_parser.py index 9794ce9..f62d134 100755 --- a/tests/test_base_parser.py +++ b/tests/test_base_parser.py @@ -20,6 +20,8 @@ class TestBaseParser(unittest.TestCase): pass with open(self._find_file("triad_x86_iaca.s")) as f: self.triad_code = f.read() + with open(self._find_file("triad_x86_intel.asm")) as f: + self.triad_code_intel = f.read() with open(self._find_file("triad_arm_iaca.s")) as f: self.triad_code_arm = f.read() with open(self._find_file("kernel_x86.s")) as f: @@ -68,10 +70,11 @@ class TestBaseParser(unittest.TestCase): self.parser.normalize_imd(imd_hex_1) def test_detect_ISA(self): - self.assertEqual(BaseParser.detect_ISA(self.triad_code), "x86") - self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), "aarch64") - self.assertEqual(BaseParser.detect_ISA(self.x86_code), "x86") - self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), "aarch64") + self.assertEqual(BaseParser.detect_ISA(self.triad_code), ("x86", "ATT")) + self.assertEqual(BaseParser.detect_ISA(self.triad_code_intel), ("x86", "INTEL")) + self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), ("aarch64", None)) + self.assertEqual(BaseParser.detect_ISA(self.x86_code), ("x86", "ATT")) + self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), ("aarch64", None)) ################## # Helper functions diff --git a/tests/test_files/gs_x86_icc.asm b/tests/test_files/gs_x86_icc.asm new file mode 100644 index 0000000..3da0cec --- /dev/null +++ b/tests/test_files/gs_x86_icc.asm @@ -0,0 +1,227 @@ +# Produced with ICC 2021.10.0 with -O3 -xcore-avx512, https://godbolt.org/z/87bYseh8r +..B1.1: # Preds ..B1.0 + push rbp #5.32 + mov rbp, rsp #5.32 + and rsp, -128 #5.32 + push r15 #5.32 + push rbx #5.32 + sub rsp, 112 #5.32 + mov edi, 3 #5.32 + mov rsi, 0x64199d9ffe #5.32 + call __intel_new_feature_proc_init #5.32 +..B1.34: # Preds ..B1.1 + vstmxcsr DWORD PTR [rsp] #5.32 + xor edi, edi #11.7 + or DWORD PTR [rsp], 32832 #5.32 + vldmxcsr DWORD PTR [rsp] #5.32 + call time #11.7 +..B1.2: # Preds ..B1.34 + mov edi, eax #11.1 + call srand #11.1 +..B1.3: # Preds ..B1.2 + mov edi, 1600 #13.16 + call malloc #13.16 +..B1.35: # Preds ..B1.3 + mov rsi, rax #13.16 +..B1.4: # Preds ..B1.35 + xor eax, eax #14.1 + mov rbx, rsi #14.1 + mov r15, rax #14.1 +..B1.5: # Preds ..B1.6 ..B1.4 + mov edi, 1600 #15.22 + call malloc #15.22 +..B1.6: # Preds ..B1.5 + mov QWORD PTR [rbx+r15*8], rax #15.5 + inc r15 #14.1 + cmp r15, 200 #14.1 + jb ..B1.5 # Prob 82% #14.1 +..B1.7: # Preds ..B1.6 + xor eax, eax #17.1 + mov rsi, rbx # + mov r15, rax #19.44 + mov QWORD PTR [rsp], r13 #19.44[spill] + mov QWORD PTR [8+rsp], r14 #19.44[spill] +..B1.8: # Preds ..B1.11 ..B1.7 + mov r13, QWORD PTR [8+rbx+r15*8] #19.5 + xor r14d, r14d #18.3 +..B1.9: # Preds ..B1.10 ..B1.8 + call rand #19.26 +..B1.37: # Preds ..B1.9 + mov r8d, eax #19.26 +..B1.10: # Preds ..B1.37 + mov eax, 351843721 #19.33 + mov ecx, r8d #19.33 + imul r8d #19.33 + sar ecx, 31 #19.33 + vxorpd xmm0, xmm0, xmm0 #19.33 + sar edx, 13 #19.33 + sub edx, ecx #19.33 + imul edi, edx, -100000 #19.33 + add r8d, edi #19.33 + vcvtsi2sd xmm0, xmm0, r8d #19.33 + vdivsd xmm1, xmm0, QWORD PTR .L_2il0floatpacket.0[rip] #19.44 + vmovsd QWORD PTR [8+r13+r14*8], xmm1 #19.5 + inc r14 #18.3 + cmp r14, 198 #18.3 + jb ..B1.9 # Prob 82% #18.3 +..B1.11: # Preds ..B1.10 + inc r15 #17.1 + cmp r15, 198 #17.1 + jb ..B1.8 # Prob 91% #17.1 +..B1.12: # Preds ..B1.11 + mov r13, QWORD PTR [rsp] #[spill] + mov rsi, rbx # + mov r14, QWORD PTR [8+rsp] #[spill] + xor ecx, ecx #23.1 + vmovsd xmm0, QWORD PTR .L_2il0floatpacket.1[rip] #10.14 + xor dil, dil #10.14 + mov edx, 196 #10.14 +..B1.13: # Preds ..B1.27 ..B1.12 + mov rax, QWORD PTR [8+rsi+rcx*8] #25.5 + mov r8, rax #25.5 + lea r9, QWORD PTR [8+rax] #25.5 + sub r8, r9 #25.5 + cmp r8, 1584 #24.3 + jge ..B1.15 # Prob 50% #24.3 +..B1.14: # Preds ..B1.13 + neg r8 #26.7 + cmp r8, 1584 #24.3 + jl ..B1.22 # Prob 50% #24.3 +..B1.15: # Preds ..B1.13 ..B1.14 + lea r8, QWORD PTR [16+rax] #27.9 + sub r9, r8 #27.9 + cmp r9, 1584 #24.3 + jge ..B1.17 # Prob 50% #24.3 +..B1.16: # Preds ..B1.15 + neg r9 #25.5 + cmp r9, 1584 #24.3 + jl ..B1.22 # Prob 50% #24.3 +..B1.17: # Preds ..B1.15 ..B1.16 + vmovsd xmm1, QWORD PTR [rax] #27.9 + mov bl, dil #24.3 + mov r9, QWORD PTR [rsi+rcx*8] #27.21 + xor r11d, r11d #25.5 + mov r10, QWORD PTR [16+rsi+rcx*8] #26.19 + mov r8, QWORD PTR [8+rsi+rcx*8] #27.9 +..B1.18: # Preds ..B1.18 ..B1.17 + vmovsd xmm2, QWORD PTR [8+r11+r10] #26.19 + inc bl #24.3 + vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #25.5 + vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #25.5 + vaddsd xmm1, xmm4, xmm1 #25.5 + vmulsd xmm8, xmm0, xmm1 #27.21 + vmovsd QWORD PTR [8+r11+r8], xmm8 #25.5 + vmovsd xmm5, QWORD PTR [16+r11+r10] #26.19 + vaddsd xmm6, xmm5, QWORD PTR [24+r11+r8] #26.19 + vaddsd xmm7, xmm6, QWORD PTR [16+r11+r9] #27.9 + vaddsd xmm9, xmm7, xmm8 #27.21 + vmulsd xmm13, xmm0, xmm9 #27.21 + vmovsd QWORD PTR [16+r11+r8], xmm13 #25.5 + vmovsd xmm10, QWORD PTR [24+r11+r10] #26.19 + vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #26.19 + vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.9 + vaddsd xmm14, xmm12, xmm13 #27.21 + vmulsd xmm18, xmm0, xmm14 #27.21 + vmovsd QWORD PTR [24+r11+r8], xmm18 #25.5 + vmovsd xmm15, QWORD PTR [32+r11+r10] #26.19 + vaddsd xmm16, xmm15, QWORD PTR [40+r11+r8] #26.19 + vaddsd xmm17, xmm16, QWORD PTR [32+r11+r9] #27.9 + vaddsd xmm19, xmm17, xmm18 #27.21 + vmulsd xmm1, xmm0, xmm19 #27.21 + vmovsd QWORD PTR [32+r11+r8], xmm1 #25.5 + add r11, 32 #24.3 + cmp bl, 49 #24.3 + jb ..B1.18 # Prob 27% #24.3 +..B1.19: # Preds ..B1.18 + mov r11, rdx #24.3 +..B1.20: # Preds ..B1.20 ..B1.19 + vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7 + vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19 + vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9 + vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21 + vmulsd xmm5, xmm0, xmm4 #27.21 + vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5 + inc r11 #24.3 + cmp r11, 198 #24.3 + jb ..B1.20 # Prob 66% #24.3 + jmp ..B1.27 # Prob 100% #24.3 +..B1.22: # Preds ..B1.14 ..B1.16 + mov r9, QWORD PTR [rsi+rcx*8] #27.21 + mov bl, dil #24.3 + mov r10, QWORD PTR [16+rsi+rcx*8] #26.19 + xor r11d, r11d #25.5 + mov r8, QWORD PTR [8+rsi+rcx*8] #26.7 +..B1.23: # Preds ..B1.23 ..B1.22 + inc bl #24.3 + vmovsd xmm1, QWORD PTR [r11+r8] #26.7 + vaddsd xmm2, xmm1, QWORD PTR [8+r11+r10] #26.19 + vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #27.9 + vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #27.21 + vmulsd xmm5, xmm0, xmm4 #27.21 + vmovsd QWORD PTR [8+r11+r8], xmm5 #25.5 + vaddsd xmm6, xmm5, QWORD PTR [16+r11+r10] #26.19 + vaddsd xmm7, xmm6, QWORD PTR [24+r11+r8] #27.9 + vaddsd xmm8, xmm7, QWORD PTR [16+r11+r9] #27.21 + vmulsd xmm9, xmm0, xmm8 #27.21 + vmovsd QWORD PTR [16+r11+r8], xmm9 #25.5 + vaddsd xmm10, xmm9, QWORD PTR [24+r11+r10] #26.19 + vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #27.9 + vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.21 + vmulsd xmm13, xmm0, xmm12 #27.21 + vmovsd QWORD PTR [24+r11+r8], xmm13 #25.5 + vaddsd xmm14, xmm13, QWORD PTR [32+r11+r10] #26.19 + vaddsd xmm15, xmm14, QWORD PTR [40+r11+r8] #27.9 + vaddsd xmm16, xmm15, QWORD PTR [32+r11+r9] #27.21 + vmulsd xmm17, xmm0, xmm16 #27.21 + vmovsd QWORD PTR [32+r11+r8], xmm17 #25.5 + add r11, 32 #24.3 + cmp bl, 49 #24.3 + jb ..B1.23 # Prob 27% #24.3 +..B1.24: # Preds ..B1.23 + mov r11, rdx #24.3 +..B1.25: # Preds ..B1.25 ..B1.24 + vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7 + vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19 + vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9 + vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21 + vmulsd xmm5, xmm0, xmm4 #27.21 + vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5 + inc r11 #24.3 + cmp r11, 198 #24.3 + jb ..B1.25 # Prob 66% #24.3 +..B1.27: # Preds ..B1.25 ..B1.20 + mov r8, QWORD PTR [16+rsi+rcx*8] #30.3 + inc rcx #23.1 + mov rax, QWORD PTR [1592+rax] #30.15 + mov QWORD PTR [8+r8], rax #30.3 + cmp rcx, 198 #23.1 + jb ..B1.13 # Prob 91% #23.1 +..B1.28: # Preds ..B1.27 + mov rax, QWORD PTR [1584+rsi] #33.4 + vmovsd xmm0, QWORD PTR [1584+rax] #33.4 + vucomisd xmm0, QWORD PTR .L_2il0floatpacket.2[rip] #33.29 + jp ..B1.29 # Prob 0% #33.29 + je ..B1.30 # Prob 5% #33.29 +..B1.29: # Preds ..B1.28 ..B1.30 + xor eax, eax #34.1 + add rsp, 112 #34.1 + pop rbx #34.1 + pop r15 #34.1 + mov rsp, rbp #34.1 + pop rbp #34.1 + ret #34.1 +..B1.30: # Preds ..B1.28 + mov rax, QWORD PTR [rsi] #33.39 + mov edi, offset flat: .L_2__STRING.0 #33.39 + vmovsd xmm0, QWORD PTR [rax] #33.39 + mov eax, 1 #33.39 + call printf #33.39 + jmp ..B1.29 # Prob 100% #33.39 +.L_2il0floatpacket.0: + .long 0x00000000,0x408f4000 +.L_2il0floatpacket.1: + .long 0x7ae147ae,0x3ff3ae14 +.L_2il0floatpacket.2: + .long 0xfc8f3238,0x3ff3c0c1 +.L_2__STRING.0: + .long 681509 diff --git a/tests/test_files/kernel_x86_intel.asm b/tests/test_files/kernel_x86_intel.asm new file mode 100644 index 0000000..eb76ea1 --- /dev/null +++ b/tests/test_files/kernel_x86_intel.asm @@ -0,0 +1,9 @@ +; https://godbolt.org/z/o49jjojnx /std:c++latest /O1 /fp:contract /arch:AVX2 +$LL13@foo: + vmovsd xmm1, QWORD PTR [rax] + vmovsd xmm0, QWORD PTR [rcx+rax] + vfmadd213sd xmm1, xmm0, QWORD PTR [rdx+rax] + vmovsd QWORD PTR [r8+rax], xmm1 + lea rax, QWORD PTR [rax+8] + sub rbx, 1 + jne SHORT $LL13@foo diff --git a/tests/test_files/kernel_x86_intel_memdep.asm b/tests/test_files/kernel_x86_intel_memdep.asm new file mode 100644 index 0000000..e94caeb --- /dev/null +++ b/tests/test_files/kernel_x86_intel_memdep.asm @@ -0,0 +1,19 @@ +; Translated from kernel_x86_memdep.s +L4: + vmovsd [rax+8], xmm0 + add rax, 8 + vmovsd [rax+rcx*8+8], xmm0 + vaddsd xmm0, xmm0, [rax] + sub rax, -8 + vaddsd xmm0, xmm0, [rax-8] + dec rcx + vaddsd xmm0, xmm0, [rax+rcx*8+8] + mov rdx, rcx + vaddsd xmm0, xmm0, [rax+rdx*8+8] + vmulsd xmm0, xmm0, xmm1 + add rax, 8 + cmp rsi, rax + jne L4 +; Added to test LOAD dependencies + shl rax, 5 + subsd xmm10, QWORD PTR [rax+r8] diff --git a/tests/test_files/triad_x86_intel.asm b/tests/test_files/triad_x86_intel.asm new file mode 100644 index 0000000..3de2eae --- /dev/null +++ b/tests/test_files/triad_x86_intel.asm @@ -0,0 +1,124 @@ +; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0 + +include listing.inc + +INCLUDELIB MSVCRTD +INCLUDELIB OLDNAMES + +msvcjmc SEGMENT +__FAC6D534_triad@c DB 01H +msvcjmc ENDS +PUBLIC kernel +PUBLIC __JustMyCode_Default +EXTRN dummy:PROC +EXTRN _RTC_InitBase:PROC +EXTRN _RTC_Shutdown:PROC +EXTRN __CheckForDebuggerJustMyCode:PROC +EXTRN _fltused:DWORD +; COMDAT pdata +pdata SEGMENT +$pdata$kernel DD imagerel $LN9 + DD imagerel $LN9+194 + DD imagerel $unwind$kernel +pdata ENDS +; COMDAT rtc$TMZ +rtc$TMZ SEGMENT +_RTC_Shutdown.rtc$TMZ DQ FLAT:_RTC_Shutdown +rtc$TMZ ENDS +; COMDAT rtc$IMZ +rtc$IMZ SEGMENT +_RTC_InitBase.rtc$IMZ DQ FLAT:_RTC_InitBase +rtc$IMZ ENDS +; COMDAT xdata +xdata SEGMENT +$unwind$kernel DD 025052301H + DD 011e2323H + DD 070170025H + DD 05016H +xdata ENDS +; Function compile flags: /Odt +; COMDAT __JustMyCode_Default +_TEXT SEGMENT +__JustMyCode_Default PROC ; COMDAT + ret 0 +__JustMyCode_Default ENDP +_TEXT ENDS +; Function compile flags: /Odtp /RTCsu /ZI +; COMDAT kernel +_TEXT SEGMENT +r$1 = 4 +i$2 = 36 +a$ = 288 +b$ = 296 +c$ = 304 +s$ = 312 +repeat$ = 320 +cur_elements$ = 328 +kernel PROC ; COMDAT +; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c +; Line 16 +$LN9: + movsd QWORD PTR [rsp+32], xmm3 + mov QWORD PTR [rsp+24], r8 + mov QWORD PTR [rsp+16], rdx + mov QWORD PTR [rsp+8], rcx + push rbp + push rdi + sub rsp, 296 ; 00000128H + lea rbp, QWORD PTR [rsp+32] + lea rcx, OFFSET FLAT:__FAC6D534_triad@c + call __CheckForDebuggerJustMyCode + npad 1 +; Line 17 + mov DWORD PTR r$1[rbp], 0 + jmp SHORT $LN4@kernel +$LN2@kernel: + mov eax, DWORD PTR r$1[rbp] + inc eax + mov DWORD PTR r$1[rbp], eax +$LN4@kernel: + mov eax, DWORD PTR repeat$[rbp] + cmp DWORD PTR r$1[rbp], eax + jge SHORT $LN3@kernel +; Line 18 + mov DWORD PTR i$2[rbp], 0 + jmp SHORT $LN7@kernel +$LN5@kernel: + mov eax, DWORD PTR i$2[rbp] + inc eax + mov DWORD PTR i$2[rbp], eax +$LN7@kernel: + mov eax, DWORD PTR cur_elements$[rbp] + cmp DWORD PTR i$2[rbp], eax + jge SHORT $LN6@kernel +; Line 19 + movsxd rax, DWORD PTR i$2[rbp] + movsxd rcx, DWORD PTR i$2[rbp] + mov rdx, QWORD PTR c$[rbp] + movsd xmm0, QWORD PTR s$[rbp] + mulsd xmm0, QWORD PTR [rdx+rcx*8] + mov rcx, QWORD PTR b$[rbp] + movsd xmm1, QWORD PTR [rcx+rax*8] + addsd xmm1, xmm0 + movaps xmm0, xmm1 + movsxd rax, DWORD PTR i$2[rbp] + mov rcx, QWORD PTR a$[rbp] + movsd QWORD PTR [rcx+rax*8], xmm0 +; Line 20 + jmp SHORT $LN5@kernel +$LN6@kernel: +; Line 21 + mov rcx, QWORD PTR a$[rbp] + call dummy + npad 1 +; Line 22 + jmp SHORT $LN2@kernel +$LN3@kernel: +; Line 23 + lea rsp, QWORD PTR [rbp+264] + pop rdi + pop rbp + ret 0 +kernel ENDP +_TEXT ENDS +END diff --git a/tests/test_files/triad_x86_intel_iaca.asm b/tests/test_files/triad_x86_intel_iaca.asm new file mode 100644 index 0000000..447406c --- /dev/null +++ b/tests/test_files/triad_x86_intel_iaca.asm @@ -0,0 +1,139 @@ +; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0 + +include listing.inc + +INCLUDELIB MSVCRTD +INCLUDELIB OLDNAMES + +msvcjmc SEGMENT +__68D132EB_concurrencysal@h DB 01H +__4DC47379_sal@h DB 01H +__B6ADDB23_vadefs@h DB 01H +__A2A1025A_vcruntime@h DB 01H +__0EF3BC42_intrin0@inl@h DB 01H +__5EC35D46_setjmp@h DB 01H +__368E74E0_mmintrin@h DB 01H +__735960E1_corecrt@h DB 01H +__211DB995_corecrt_malloc@h DB 01H +__7CD62D9E_malloc@h DB 01H +__22746E0E_xmmintrin@h DB 01H +__4716E7C2_emmintrin@h DB 01H +__98B78F4B_pmmintrin@h DB 01H +__286EFCC9_tmmintrin@h DB 01H +__0155E94A_smmintrin@h DB 01H +__64376086_nmmintrin@h DB 01H +__B18C9AC8_wmmintrin@h DB 01H +__7A18D7CF_zmmintrin@h DB 01H +__4D0C7505_immintrin@h DB 01H +__F7CF9440_ammintrin@h DB 01H +__78F5E131_intrin@h DB 01H +__6A584D4A_iacaMarks@h DB 01H +__FAC6D534_triad@c DB 01H +msvcjmc ENDS +PUBLIC kernel +PUBLIC __JustMyCode_Default +EXTRN dummy:PROC +EXTRN __CheckForDebuggerJustMyCode:PROC +EXTRN _fltused:DWORD +; COMDAT pdata +pdata SEGMENT +$pdata$kernel DD imagerel $LN18 + DD imagerel $LN18+182 + DD imagerel $unwind$kernel +pdata ENDS +; COMDAT voltbl +voltbl SEGMENT +_volmd DB 05bH + DB 079H +voltbl ENDS +; COMDAT xdata +xdata SEGMENT +$unwind$kernel DD 0c2001H + DD 026820H + DD 0b7419H + DD 0a6419H + DD 095419H + DD 083419H + DD 0e0155219H +xdata ENDS +; Function compile flags: /Odt +; COMDAT __JustMyCode_Default +_TEXT SEGMENT +__JustMyCode_Default PROC ; COMDAT + ret 0 +__JustMyCode_Default ENDP +_TEXT ENDS +; Function compile flags: /Ogspy +; COMDAT kernel +_TEXT SEGMENT +a$ = 64 +b$ = 72 +c$ = 80 +s$ = 88 +repeat$ = 96 +cur_elements$ = 104 +kernel PROC ; COMDAT +; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c +; Line 22 +$LN18: + mov rax, rsp + mov QWORD PTR [rax+8], rbx + mov QWORD PTR [rax+16], rbp + mov QWORD PTR [rax+24], rsi + mov QWORD PTR [rax+32], rdi + push r14 + sub rsp, 48 ; 00000030H + mov rbp, rcx + movaps XMMWORD PTR [rax-24], xmm6 + lea rcx, OFFSET FLAT:__FAC6D534_triad@c + movaps xmm6, xmm3 + mov r14, r8 + mov rdi, rdx + call __CheckForDebuggerJustMyCode + mov eax, DWORD PTR repeat$[rsp] + movsxd rsi, DWORD PTR cur_elements$[rsp] + test eax, eax + jle SHORT $LN3@kernel + mov ebx, eax +$LL4@kernel: +; Line 24 + test rsi, rsi + jle SHORT $LN6@kernel + mov rcx, r14 + mov rdx, rbp + sub rcx, rdi + mov rax, rdi + sub rdx, rdi + mov r8, rsi +$LL7@kernel: +; Line 26 + mov BYTE PTR gs:111, 111 ; 0000006fH +; Line 28 + movaps xmm0, xmm6 + mulsd xmm0, QWORD PTR [rax+rcx] + addsd xmm0, QWORD PTR [rax] + movsd QWORD PTR [rdx+rax], xmm0 + add rax, 8 +; Line 30 + mov BYTE PTR gs:222, 222 ; 000000deH + sub r8, 1 + jne SHORT $LL7@kernel +$LN6@kernel: +; Line 33 + mov rcx, rbp + call dummy + sub rbx, 1 + jne SHORT $LL4@kernel +$LN3@kernel: +; Line 35 + mov rbx, QWORD PTR [rsp+64] + mov rbp, QWORD PTR [rsp+72] + mov rsi, QWORD PTR [rsp+80] + mov rdi, QWORD PTR [rsp+88] + movaps xmm6, XMMWORD PTR [rsp+32] + add rsp, 48 ; 00000030H + pop r14 + ret 0 +kernel ENDP +_TEXT ENDS +END diff --git a/tests/test_frontend.py b/tests/test_frontend.py index 1436bd0..0029d6a 100755 --- a/tests/test_frontend.py +++ b/tests/test_frontend.py @@ -114,7 +114,7 @@ class TestFrontend(unittest.TestCase): self.assertEqual(line.line_number, analysis_dict["Kernel"][i]["LineNumber"]) def test_dict_output_AArch64(self): - reduced_kernel = reduce_to_section(self.kernel_AArch64, self.semantics_tx2._isa) + reduced_kernel = reduce_to_section(self.kernel_AArch64, self.semantics_tx2._isa, None) dg = KernelDG( reduced_kernel, self.parser_AArch64, diff --git a/tests/test_marker_utils.py b/tests/test_marker_utils.py index 49da8e8..691280a 100755 --- a/tests/test_marker_utils.py +++ b/tests/test_marker_utils.py @@ -12,37 +12,47 @@ from osaca.semantics import ( find_jump_labels, find_basic_loop_bodies, ) -from osaca.parser import ParserAArch64, ParserX86ATT +from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel class TestMarkerUtils(unittest.TestCase): @classmethod def setUpClass(self): self.parser_AArch = ParserAArch64() - self.parser_x86 = ParserX86ATT() + self.parser_x86_att = ParserX86ATT() + self.parser_x86_intel = ParserX86Intel() with open(self._find_file("triad_arm_iaca.s")) as f: triad_code_arm = f.read() with open(self._find_file("triad_x86_iaca.s")) as f: - triad_code_x86 = f.read() + triad_code_x86_att = f.read() + with open(self._find_file("triad_x86_intel_iaca.asm")) as f: + triad_code_x86_intel = f.read() self.parsed_AArch = self.parser_AArch.parse_file(triad_code_arm) - self.parsed_x86 = self.parser_x86.parse_file(triad_code_x86) + self.parsed_x86_att = self.parser_x86_att.parse_file(triad_code_x86_att) + self.parsed_x86_intel = self.parser_x86_intel.parse_file(triad_code_x86_intel) ################# # Test ################# def test_marker_detection_AArch64(self): - kernel = reduce_to_section(self.parsed_AArch, "AArch64") + kernel = reduce_to_section(self.parsed_AArch, ParserAArch64()) self.assertEqual(len(kernel), 138) self.assertEqual(kernel[0].line_number, 307) self.assertEqual(kernel[-1].line_number, 444) - def test_marker_detection_x86(self): - kernel = reduce_to_section(self.parsed_x86, "x86") + def test_marker_detection_x86_att(self): + kernel = reduce_to_section(self.parsed_x86_att, ParserX86ATT()) self.assertEqual(len(kernel), 9) self.assertEqual(kernel[0].line_number, 146) self.assertEqual(kernel[-1].line_number, 154) + def test_marker_detection_x86_intel(self): + kernel = reduce_to_section(self.parsed_x86_intel, ParserX86Intel()) + self.assertEqual(len(kernel), 7) + self.assertEqual(kernel[0].line_number, 111) + self.assertEqual(kernel[-1].line_number, 117) + def test_marker_matching_AArch64(self): # preparation bytes_1_line = ".byte 213,3,32,31\n" @@ -108,7 +118,7 @@ class TestMarkerUtils(unittest.TestCase): bytes_end=bytes_var_2, ): sample_parsed = self.parser_AArch.parse_file(sample_code) - sample_kernel = reduce_to_section(sample_parsed, "AArch64") + sample_kernel = reduce_to_section(sample_parsed, ParserAArch64()) self.assertEqual(len(sample_kernel), kernel_length) kernel_start = len( list( @@ -179,8 +189,8 @@ class TestMarkerUtils(unittest.TestCase): mov_end=mov_end_var, bytes_end=bytes_var_2, ): - sample_parsed = self.parser_x86.parse_file(sample_code) - sample_kernel = reduce_to_section(sample_parsed, "x86") + sample_parsed = self.parser_x86_att.parse_file(sample_code) + sample_kernel = reduce_to_section(sample_parsed, ParserX86ATT()) self.assertEqual(len(sample_kernel), kernel_length) kernel_start = len( list( @@ -190,7 +200,7 @@ class TestMarkerUtils(unittest.TestCase): ) ) ) - parsed_kernel = self.parser_x86.parse_file( + parsed_kernel = self.parser_x86_att.parse_file( kernel, start_line=kernel_start ) self.assertEqual(sample_kernel, parsed_kernel) @@ -222,7 +232,7 @@ class TestMarkerUtils(unittest.TestCase): for test_name, pro, kernel, epi in samples: code = pro + kernel + epi parsed = self.parser_AArch.parse_file(code) - test_kernel = reduce_to_section(parsed, "AArch64") + test_kernel = reduce_to_section(parsed, ParserAArch64()) if kernel: kernel_length = len(kernel.strip().split("\n")) else: @@ -230,7 +240,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( len(test_kernel), kernel_length, - msg="Invalid exctracted kernel length on {!r} sample".format(test_name), + msg="Invalid extracted kernel length on {!r} sample".format(test_name), ) if pro: kernel_start = len((pro).strip().split("\n")) @@ -240,7 +250,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( test_kernel, parsed_kernel, - msg="Invalid exctracted kernel on {!r}".format(test_name), + msg="Invalid extracted kernel on {!r}".format(test_name), ) def test_marker_special_cases_x86(self): @@ -269,8 +279,8 @@ class TestMarkerUtils(unittest.TestCase): for test_name, pro, kernel, epi in samples: code = pro + kernel + epi - parsed = self.parser_x86.parse_file(code) - test_kernel = reduce_to_section(parsed, "x86") + parsed = self.parser_x86_att.parse_file(code) + test_kernel = reduce_to_section(parsed, ParserX86ATT()) if kernel: kernel_length = len(kernel.strip().split("\n")) else: @@ -278,23 +288,23 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( len(test_kernel), kernel_length, - msg="Invalid exctracted kernel length on {!r} sample".format(test_name), + msg="Invalid extracted kernel length on {!r} sample".format(test_name), ) if pro: kernel_start = len((pro).strip().split("\n")) else: kernel_start = 0 - parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start) + parsed_kernel = self.parser_x86_att.parse_file(kernel, start_line=kernel_start) self.assertEqual( test_kernel, parsed_kernel, - msg="Invalid exctracted kernel on {!r}".format(test_name), + msg="Invalid extracted kernel on {!r}".format(test_name), ) def test_find_jump_labels(self): self.assertEqual( - find_jump_labels(self.parsed_x86), + find_jump_labels(self.parsed_x86_att), OrderedDict( [ (".LFB24", 10), @@ -358,7 +368,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( [ (k, v[0].line_number, v[-1].line_number) - for k, v in find_basic_blocks(self.parsed_x86).items() + for k, v in find_basic_blocks(self.parsed_x86_att).items() ], [ (".LFB24", 11, 56), @@ -422,7 +432,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( [ (k, v[0].line_number, v[-1].line_number) - for k, v in find_basic_loop_bodies(self.parsed_x86).items() + for k, v in find_basic_loop_bodies(self.parsed_x86_att).items() ], [(".L4", 66, 74), (".L10", 146, 154), (".L28", 290, 300)], ) diff --git a/tests/test_parser_x86intel.py b/tests/test_parser_x86intel.py new file mode 100644 index 0000000..5d52b9a --- /dev/null +++ b/tests/test_parser_x86intel.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python3 +""" +Unit tests for x86 Intel assembly parser +""" + +import os +import unittest + +from pyparsing import ParseException + +from osaca.parser import ParserX86Intel, InstructionForm +from osaca.parser.directive import DirectiveOperand +from osaca.parser.identifier import IdentifierOperand +from osaca.parser.immediate import ImmediateOperand +from osaca.parser.label import LabelOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.register import RegisterOperand + + +class TestParserX86Intel(unittest.TestCase): + @classmethod + def setUpClass(self): + self.parser = ParserX86Intel() + with open(self._find_file("triad_x86_intel.asm")) as f: + self.triad_code = f.read() + with open(self._find_file("triad_x86_intel_iaca.asm")) as f: + self.triad_iaca_code = f.read() + with open(self._find_file("gs_x86_icc.asm")) as f: + self.gs_icc_code = f.read() + + ################## + # Test + ################## + + def test_comment_parser(self): + self.assertEqual(self._get_comment(self.parser, "; some comments"), "some comments") + self.assertEqual(self._get_comment(self.parser, "\t\t;AA BB CC \t end \t"), "AA BB CC end") + self.assertEqual( + self._get_comment(self.parser, "\t;; comment ;; comment"), + "; comment ;; comment", + ) + + def test_label_parser(self): + self.assertEqual(self._get_label(self.parser, "main:")[0].name, "main") + self.assertEqual(self._get_label(self.parser, "$$B1?10:")[0].name, "$$B1?10") + self.assertEqual( + self._get_label(self.parser, "$LN9:\tcall\t__CheckForDebuggerJustMyCode")[0].name, + "$LN9" + ) + self.assertEqual( + self._get_label(self.parser, "$LN9:\tcall\t__CheckForDebuggerJustMyCode")[1], + InstructionForm( + mnemonic="call", + operands=[ + {"identifier": {"name": "__CheckForDebuggerJustMyCode"}}, + ], + directive_id=None, + comment_id=None, + label_id=None, + line=None, + line_number=None, + ) + ) + + def test_directive_parser(self): + self.assertEqual(self._get_directive(self.parser, "\t.allocstack 16")[0], + DirectiveOperand(name=".allocstack", + parameters=["16"])) + self.assertEqual(self._get_directive(self.parser, "INCLUDELIB MSVCRTD")[0], + DirectiveOperand(name="INCLUDELIB", + parameters=["MSVCRTD"])) + self.assertEqual(self._get_directive(self.parser, "msvcjmc\tSEGMENT")[0], + DirectiveOperand(name="SEGMENT", + parameters=["msvcjmc"])) + self.assertEqual(self._get_directive(self.parser, "EXTRN\t_RTC_InitBase:PROC")[0], + DirectiveOperand(name="EXTRN", + parameters=["_RTC_InitBase:PROC"])) + self.assertEqual(self._get_directive(self.parser, "$pdata$kernel DD imagerel $LN9")[0], + DirectiveOperand(name="DD", + parameters=["$pdata$kernel", "imagerel", "$LN9"])) + self.assertEqual(self._get_directive(self.parser, "repeat$ = 320")[0], + DirectiveOperand(name="=", + parameters=["repeat$", "320"])) + + def test_parse_instruction(self): + instr1 = "\tsub\trsp, 296\t\t\t\t; 00000128H" + instr2 = " fst ST(3)\t; Good ol' x87." + instr3 = "\tmulsd\txmm0, QWORD PTR [rdx+rcx*8]" + instr4 = "\tmov\teax, DWORD PTR cur_elements$[rbp]" + instr5 = "\tmov\tQWORD PTR [rsp+24], r8" + instr6 = "\tjmp\tSHORT $LN2@kernel" + instr7 = "\tlea\trcx, OFFSET FLAT:__FAC6D534_triad@c" + instr8 = "\tmov\tBYTE PTR gs:111, al" + instr9 = "\tlea\tr8, QWORD PTR [r8*4]" + instr10 = "\tmovsd\txmm1, QWORD PTR boost@@XZ@4V456@A+16" + instr11 = "\tlea\trcx, OFFSET FLAT:??_R0N@8+8" + instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555" + instr13 = "\tjmp\t$LN18@operator" + + parsed_1 = self.parser.parse_instruction(instr1) + parsed_2 = self.parser.parse_instruction(instr2) + parsed_3 = self.parser.parse_instruction(instr3) + parsed_4 = self.parser.parse_instruction(instr4) + parsed_5 = self.parser.parse_instruction(instr5) + parsed_6 = self.parser.parse_instruction(instr6) + parsed_7 = self.parser.parse_instruction(instr7) + parsed_8 = self.parser.parse_instruction(instr8) + parsed_9 = self.parser.parse_instruction(instr9) + parsed_10 = self.parser.parse_instruction(instr10) + parsed_11 = self.parser.parse_instruction(instr11) + parsed_12 = self.parser.parse_instruction(instr12) + parsed_13 = self.parser.parse_instruction(instr13) + + self.assertEqual(parsed_1.mnemonic, "sub") + self.assertEqual(parsed_1.operands[0], + RegisterOperand(name="RSP")) + self.assertEqual(parsed_1.operands[1], + ImmediateOperand(value=296)) + self.assertEqual(parsed_1.comment, "00000128H") + + self.assertEqual(parsed_2.mnemonic, "fst") + self.assertEqual(parsed_2.operands[0], + RegisterOperand(name="ST(3)")) + self.assertEqual(parsed_2.comment, "Good ol' x87.") + + self.assertEqual(parsed_3.mnemonic, "mulsd") + self.assertEqual(parsed_3.operands[0], + RegisterOperand(name="XMM0")) + self.assertEqual(parsed_3.operands[1], + MemoryOperand(base=RegisterOperand(name="RDX"), + index=RegisterOperand(name="RCX"), + scale=8)) + + self.assertEqual(parsed_4.mnemonic, "mov") + self.assertEqual(parsed_4.operands[0], + RegisterOperand(name="EAX")) + self.assertEqual(parsed_4.operands[1], + MemoryOperand(offset=ImmediateOperand( + identifier="cur_elements$", + value=104 + ), + base=RegisterOperand(name="RBP"))) + + self.assertEqual(parsed_5.mnemonic, "mov") + self.assertEqual(parsed_5.operands[0], + MemoryOperand(offset=ImmediateOperand(value=24), + base=RegisterOperand(name="RSP"))) + self.assertEqual(parsed_5.operands[1], + RegisterOperand(name="R8")) + + self.assertEqual(parsed_6.mnemonic, "jmp") + self.assertEqual(parsed_6.operands[0], + LabelOperand(name="$LN2@kernel")) + + self.assertEqual(parsed_7.mnemonic, "lea") + self.assertEqual(parsed_7.operands[0], + RegisterOperand(name="RCX")) + self.assertEqual(parsed_7.operands[1], + MemoryOperand(offset=IdentifierOperand(name="__FAC6D534_triad@c"))) + + self.assertEqual(parsed_8.mnemonic, "mov") + self.assertEqual(parsed_8.operands[0], + MemoryOperand( + base=RegisterOperand(name="GS"), + offset=ImmediateOperand(value=111))) + self.assertEqual(parsed_8.operands[1], + RegisterOperand(name="AL")) + + self.assertEqual(parsed_9.mnemonic, "lea") + self.assertEqual(parsed_9.operands[0], + RegisterOperand(name="R8")) + self.assertEqual(parsed_9.operands[1], + MemoryOperand(base=None, + index=RegisterOperand(name="R8"), + scale=4)) + + self.assertEqual(parsed_10.mnemonic, "movsd") + self.assertEqual(parsed_10.operands[0], + RegisterOperand(name="XMM1")) + self.assertEqual(parsed_10.operands[1], + MemoryOperand(offset=IdentifierOperand(name="boost@@XZ@4V456@A", + offset=ImmediateOperand(value=16)))) + + self.assertEqual(parsed_11.mnemonic, "lea") + self.assertEqual(parsed_11.operands[0], + RegisterOperand(name="RCX")) + self.assertEqual(parsed_11.operands[1], + MemoryOperand(offset=IdentifierOperand(name="??_R0N@8", + offset=ImmediateOperand(value=8)))) + + self.assertEqual(parsed_12.mnemonic, "vfmadd213sd") + self.assertEqual(parsed_12.operands[0], + RegisterOperand(name="XMM0")) + self.assertEqual(parsed_12.operands[1], + RegisterOperand(name="XMM1")) + self.assertEqual(parsed_12.operands[2], + MemoryOperand(offset=IdentifierOperand(name="__real@bfc5555555555555"))) + + self.assertEqual(parsed_13.mnemonic, "jmp") + self.assertEqual(parsed_13.operands[0], + IdentifierOperand(name="$LN18@operator")) + + def test_parse_line(self): + line_comment = "; -- Begin main" + line_instruction = "\tret\t0" + + instruction_form_1 = InstructionForm( + mnemonic=None, + operands=[], + directive_id=None, + comment_id="-- Begin main", + label_id=None, + line="; -- Begin main", + line_number=1, + ) + instruction_form_2 = InstructionForm( + mnemonic="ret", + operands=[ + {"immediate": {"value": 0}}, + ], + directive_id=None, + comment_id=None, + label_id=None, + line="\tret\t0", + line_number=2, + ) + + parsed_1 = self.parser.parse_line(line_comment, 1) + parsed_2 = self.parser.parse_line(line_instruction, 2) + + self.assertEqual(parsed_1, instruction_form_1) + self.assertEqual(parsed_2, instruction_form_2) + + def test_parse_register(self): + register_str_1 = "rax" + register_str_2 = "r9" + register_str_3 = "xmm1" + register_str_4 = "ST(4)" + + parsed_reg_1 = RegisterOperand(name="RAX") + parsed_reg_2 = RegisterOperand(name="R9") + parsed_reg_3 = RegisterOperand(name="XMM1") + parsed_reg_4 = RegisterOperand(name="ST(4)") + + self.assertEqual(self.parser.parse_register(register_str_1), parsed_reg_1) + self.assertEqual(self.parser.parse_register(register_str_2), parsed_reg_2) + self.assertEqual(self.parser.parse_register(register_str_3), parsed_reg_3) + self.assertEqual(self.parser.parse_register(register_str_4), parsed_reg_4) + + def test_parse_file1(self): + parsed = self.parser.parse_file(self.triad_code) + self.assertEqual(parsed[0].line_number, 1) + # Check specifically that the values of the symbols defined by "=" were correctly + # propagated. + self.assertEqual(parsed[69], + InstructionForm(mnemonic="mov", + operands=[MemoryOperand( + base=RegisterOperand("RBP"), + offset=ImmediateOperand( + value=4, + identifier="r$1" + ) + ), + ImmediateOperand(value=0)], + line="\tmov\tDWORD PTR r$1[rbp], 0", + line_number=73)) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual(parsed[60], + InstructionForm(mnemonic="mov", + operands=[MemoryOperand(base=RegisterOperand("RSP"), + offset=ImmediateOperand(value=8)), + RegisterOperand(name="RCX")], + line="\tmov\tQWORD PTR [rsp+8], rcx", + line_number=64)) + self.assertEqual(parsed[120], + InstructionForm(directive_id=DirectiveOperand(name="END"), + line="END", + line_number=124)) + self.assertEqual(len(parsed), 121) + + def test_parse_file2(self): + parsed = self.parser.parse_file(self.triad_iaca_code) + self.assertEqual(parsed[0].line_number, 1) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual(parsed[68], + InstructionForm(directive_id=DirectiveOperand(name="=", + parameters=["s$", "88"]), + line="s$ = 88", + line_number=72)) + self.assertEqual(parsed[135], + InstructionForm(directive_id=DirectiveOperand(name="END"), + line="END", + line_number=139)) + self.assertEqual(len(parsed), 136) + + def test_parse_file3(self): + parsed = self.parser.parse_file(self.gs_icc_code) + self.assertEqual(parsed[0].line_number, 1) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual(parsed[113], + InstructionForm(mnemonic="vmovsd", + operands=[RegisterOperand("XMM5"), + MemoryOperand(base=RegisterOperand("R11"), + index=RegisterOperand("R10"), + scale=1, + offset=ImmediateOperand(value=16))], + comment_id="26.19", + line=" vmovsd xmm5, QWORD PTR [16+r11+r10]" + + " #26.19", + line_number=114)) + self.assertEqual(parsed[226], + InstructionForm(directive_id=DirectiveOperand(name=".long", + parameters=["681509"]), + line=" .long 681509", + line_number=227)) + self.assertEqual(len(parsed), 227) + + def test_normalize_imd(self): + imd_binary = ImmediateOperand(value="1001111B") + imd_octal = ImmediateOperand(value="117O") + imd_decimal = ImmediateOperand(value="79") + imd_hex = ImmediateOperand(value="4fH") + imd_float = ImmediateOperand(value="-79.34") + self.assertEqual( + self.parser.normalize_imd(imd_binary), + self.parser.normalize_imd(imd_octal), + ) + self.assertEqual( + self.parser.normalize_imd(imd_octal), + self.parser.normalize_imd(imd_decimal), + ) + self.assertEqual( + self.parser.normalize_imd(imd_decimal), + self.parser.normalize_imd(imd_hex), + ) + self.assertEqual(self.parser.normalize_imd(ImmediateOperand(value="-79")), -79) + self.assertEqual(self.parser.normalize_imd(imd_float), -79.34) + + ################## + # Helper functions + ################## + def _get_comment(self, parser, comment): + return " ".join( + parser.process_operand(parser.comment.parseString(comment, parseAll=True))[ + "comment" + ] + ) + + def _get_label(self, parser, label): + return parser.process_operand(parser.label.parseString(label, parseAll=True)) + + def _get_directive(self, parser, directive): + return parser.process_operand(parser.directive.parseString(directive, parseAll=True)) + + @staticmethod + def _find_file(name): + testdir = os.path.dirname(__file__) + name = os.path.join(testdir, "test_files", name) + assert os.path.exists(name) + return name + + +if __name__ == "__main__": + suite = unittest.TestLoader().loadTestsFromTestCase(TestParserX86Intel) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/tests/test_semantics.py b/tests/test_semantics.py index cc8b1b6..7ff0748 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -10,7 +10,7 @@ from copy import deepcopy import networkx as nx from osaca.osaca import get_unmatched_instruction_ratio -from osaca.parser import ParserAArch64, ParserX86ATT +from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel from osaca.semantics import ( INSTR_FLAGS, ArchSemantics, @@ -32,7 +32,8 @@ class TestSemanticTools(unittest.TestCase): @classmethod def setUpClass(cls): # set up parser and kernels - cls.parser_x86 = ParserX86ATT() + cls.parser_x86_att = ParserX86ATT() + cls.parser_x86_intel = ParserX86Intel() cls.parser_AArch64 = ParserAArch64() with open(cls._find_file("kernel_x86.s")) as f: cls.code_x86 = f.read() @@ -40,6 +41,10 @@ class TestSemanticTools(unittest.TestCase): cls.code_x86_memdep = f.read() with open(cls._find_file("kernel_x86_long_LCD.s")) as f: cls.code_x86_long_LCD = f.read() + with open(cls._find_file("kernel_x86_intel.asm")) as f: + cls.code_x86_intel = f.read() + with open(cls._find_file("kernel_x86_intel_memdep.asm")) as f: + cls.code_x86_intel_memdep = f.read() with open(cls._find_file("kernel_aarch64_memdep.s")) as f: cls.code_aarch64_memdep = f.read() with open(cls._find_file("kernel_aarch64.s")) as f: @@ -48,24 +53,41 @@ class TestSemanticTools(unittest.TestCase): cls.code_AArch64_SVE = f.read() with open(cls._find_file("kernel_aarch64_deps.s")) as f: cls.code_AArch64_deps = f.read() - cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86") + cls.kernel_x86 = reduce_to_section( + cls.parser_x86_att.parse_file(cls.code_x86), + cls.parser_x86_att + ) cls.kernel_x86_memdep = reduce_to_section( - cls.parser_x86.parse_file(cls.code_x86_memdep), "x86" + cls.parser_x86_att.parse_file(cls.code_x86_memdep), + cls.parser_x86_att ) cls.kernel_x86_long_LCD = reduce_to_section( - cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86" + cls.parser_x86_att.parse_file(cls.code_x86_long_LCD), + cls.parser_x86_att + ) + cls.kernel_x86_intel = reduce_to_section( + cls.parser_x86_intel.parse_file(cls.code_x86_intel), + cls.parser_x86_intel + ) + cls.kernel_x86_intel_memdep = reduce_to_section( + cls.parser_x86_intel.parse_file(cls.code_x86_intel_memdep), + cls.parser_x86_intel ) cls.kernel_AArch64 = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64" + cls.parser_AArch64.parse_file(cls.code_AArch64), + cls.parser_AArch64 ) cls.kernel_aarch64_memdep = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64" + cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), + cls.parser_AArch64 ) cls.kernel_aarch64_SVE = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64" + cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), + cls.parser_AArch64 ) cls.kernel_aarch64_deps = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64_deps), "aarch64" + cls.parser_AArch64.parse_file(cls.code_AArch64_deps), + cls.parser_AArch64 ) # set up machine models @@ -78,40 +100,64 @@ class TestSemanticTools(unittest.TestCase): cls.machine_model_a64fx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml") ) - cls.semantics_x86 = ISASemantics("x86") + cls.semantics_x86 = ISASemantics(cls.parser_x86_att) cls.semantics_csx = ArchSemantics( + cls.parser_x86_att, cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"), ) - cls.semantics_aarch64 = ISASemantics("aarch64") + cls.semantics_x86_intel = ISASemantics(cls.parser_x86_intel) + cls.semantics_csx_intel = ArchSemantics( + cls.parser_x86_intel, + cls.machine_model_csx, + path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"), + ) + cls.semantics_aarch64 = ISASemantics(cls.parser_AArch64) cls.semantics_tx2 = ArchSemantics( + cls.parser_AArch64, cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.semantics_a64fx = ArchSemantics( + cls.parser_AArch64, cls.machine_model_a64fx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.machine_model_zen = MachineModel(arch="zen1") + cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86) for i in range(len(cls.kernel_x86)): cls.semantics_csx.assign_src_dst(cls.kernel_x86[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i]) + cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86_memdep) for i in range(len(cls.kernel_x86_memdep)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i]) + cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86_long_LCD) for i in range(len(cls.kernel_x86_long_LCD)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i]) + cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel) + for i in range(len(cls.kernel_x86_intel)): + cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel[i]) + cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel[i]) + cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel_memdep) + for i in range(len(cls.kernel_x86_intel_memdep)): + cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel_memdep[i]) + cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel_memdep[i]) + cls.semantics_tx2.normalize_instruction_forms(cls.kernel_AArch64) for i in range(len(cls.kernel_AArch64)): cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i]) + cls.semantics_tx2.normalize_instruction_forms(cls.kernel_aarch64_memdep) for i in range(len(cls.kernel_aarch64_memdep)): cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i]) + cls.semantics_a64fx.normalize_instruction_forms(cls.kernel_aarch64_SVE) for i in range(len(cls.kernel_aarch64_SVE)): cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i]) cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i]) + cls.semantics_a64fx.normalize_instruction_forms(cls.kernel_aarch64_deps) for i in range(len(cls.kernel_aarch64_deps)): cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_deps[i]) cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_deps[i]) @@ -123,7 +169,7 @@ class TestSemanticTools(unittest.TestCase): def test_creation_by_name(self): try: tmp_mm = MachineModel(arch="CSX") - ArchSemantics(tmp_mm) + ArchSemantics(self.parser_x86_att, tmp_mm) except ValueError: self.fail() @@ -254,7 +300,7 @@ class TestSemanticTools(unittest.TestCase): test_mm_arm.add_port("dummyPort") # test dump of DB - with open("/dev/null", "w") as dev_null: + with open(os.devnull, "w") as dev_null: test_mm_x86.dump(stream=dev_null) test_mm_arm.dump(stream=dev_null) @@ -266,6 +312,14 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue("destination" in instruction_form.semantic_operands) self.assertTrue("src_dst" in instruction_form.semantic_operands) + def test_src_dst_assignment_x86_intel(self): + for instruction_form in self.kernel_x86_intel: + with self.subTest(instruction_form=instruction_form): + if instruction_form.semantic_operands is not None: + self.assertTrue("source" in instruction_form.semantic_operands) + self.assertTrue("destination" in instruction_form.semantic_operands) + self.assertTrue("src_dst" in instruction_form.semantic_operands) + def test_src_dst_assignment_AArch64(self): for instruction_form in self.kernel_AArch64: with self.subTest(instruction_form=instruction_form): @@ -284,6 +338,16 @@ class TestSemanticTools(unittest.TestCase): self.assertIsInstance(instruction_form.port_pressure, list) self.assertEqual(len(instruction_form.port_pressure), port_num) + def test_tp_lt_assignment_x86_intel(self): + self.assertTrue("ports" in self.machine_model_csx) + port_num = len(self.machine_model_csx["ports"]) + for instruction_form in self.kernel_x86_intel: + with self.subTest(instruction_form=instruction_form): + self.assertTrue(instruction_form.throughput is not None) + self.assertTrue(instruction_form.latency is not None) + self.assertIsInstance(instruction_form.port_pressure, list) + self.assertEqual(len(instruction_form.port_pressure), port_num) + def test_tp_lt_assignment_AArch64(self): self.assertTrue("ports" in self.machine_model_tx2) port_num = len(self.machine_model_tx2["ports"]) @@ -294,8 +358,7 @@ class TestSemanticTools(unittest.TestCase): self.assertIsInstance(instruction_form.port_pressure, list) self.assertEqual(len(instruction_form.port_pressure), port_num) - def test_optimal_throughput_assignment(self): - # x86 + def test_optimal_throughput_assignment_x86(self): kernel_fixed = deepcopy(self.kernel_x86) self.semantics_csx.add_semantics(kernel_fixed) self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0) @@ -308,11 +371,13 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(max(tp_optimal) <= max(tp_fixed)) # test multiple port assignment options test_mm_x86 = MachineModel(path_to_yaml=self._find_file("test_db_x86.yml")) - tmp_semantics = ArchSemantics(test_mm_x86) + tmp_semantics = ArchSemantics(self.parser_x86_att, test_mm_x86) tmp_code_1 = "fantasyinstr1 %rax, %rax\n" tmp_code_2 = "fantasyinstr1 %rax, %rax\nfantasyinstr2 %rbx, %rbx\n" - tmp_kernel_1 = self.parser_x86.parse_file(tmp_code_1) - tmp_kernel_2 = self.parser_x86.parse_file(tmp_code_2) + tmp_kernel_1 = self.parser_x86_att.parse_file(tmp_code_1) + tmp_kernel_2 = self.parser_x86_att.parse_file(tmp_code_2) + tmp_semantics.normalize_instruction_forms(tmp_kernel_1) + tmp_semantics.normalize_instruction_forms(tmp_kernel_2) tmp_semantics.add_semantics(tmp_kernel_1) tmp_semantics.add_semantics(tmp_kernel_2) tmp_semantics.assign_optimal_throughput(tmp_kernel_1) @@ -322,7 +387,36 @@ class TestSemanticTools(unittest.TestCase): self.assertEqual(k1i1_pp, [0.33, 0.0, 0.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.0, 0.0]) self.assertEqual(k2i1_pp, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]) - # arm + def test_optimal_throughput_assignment_x86_intel(self): + kernel_fixed = deepcopy(self.kernel_x86_intel) + self.semantics_csx_intel.add_semantics(kernel_fixed) + self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0) + + kernel_optimal = deepcopy(kernel_fixed) + self.semantics_csx_intel.assign_optimal_throughput(kernel_optimal) + tp_fixed = self.semantics_csx_intel.get_throughput_sum(kernel_fixed) + tp_optimal = self.semantics_csx_intel.get_throughput_sum(kernel_optimal) + self.assertNotEqual(tp_fixed, tp_optimal) + self.assertTrue(max(tp_optimal) <= max(tp_fixed)) + # test multiple port assignment options + test_mm_x86 = MachineModel(path_to_yaml=self._find_file("test_db_x86.yml")) + tmp_semantics = ArchSemantics(self.parser_x86_intel, test_mm_x86) + tmp_code_1 = "fantasyinstr1 rax, rax\n" + tmp_code_2 = "fantasyinstr1 rax, rax\nfantasyinstr2 rbx, rbx\n" + tmp_kernel_1 = self.parser_x86_intel.parse_file(tmp_code_1) + tmp_kernel_2 = self.parser_x86_intel.parse_file(tmp_code_2) + tmp_semantics.normalize_instruction_forms(tmp_kernel_1) + tmp_semantics.normalize_instruction_forms(tmp_kernel_2) + tmp_semantics.add_semantics(tmp_kernel_1) + tmp_semantics.add_semantics(tmp_kernel_2) + tmp_semantics.assign_optimal_throughput(tmp_kernel_1) + tmp_semantics.assign_optimal_throughput(tmp_kernel_2) + k1i1_pp = [round(x, 2) for x in tmp_kernel_1[0].port_pressure] + k2i1_pp = [round(x, 2) for x in tmp_kernel_2[0].port_pressure] + self.assertEqual(k1i1_pp, [0.33, 0.0, 0.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.0, 0.0]) + self.assertEqual(k2i1_pp, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]) + + def test_optimal_throughput_assignment_AArch64(self): kernel_fixed = deepcopy(self.kernel_AArch64) self.semantics_tx2.add_semantics(kernel_fixed) @@ -343,7 +437,12 @@ class TestSemanticTools(unittest.TestCase): # 3 # 5_______>9 # - dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) + dg = KernelDG( + self.kernel_x86, + self.parser_x86_att, + self.machine_model_csx, + self.semantics_csx + ) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=3))), 1) self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=3)), 6) @@ -358,12 +457,44 @@ class TestSemanticTools(unittest.TestCase): with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation - dg.export_graph(filepath="/dev/null") + dg.export_graph(filepath=os.devnull) + + def test_kernelDG_x86_intel(self): + # + # 3 + # \___>5__>6 + # / / + # 4 / + # / + # 5.1 + # + dg = KernelDG( + self.kernel_x86_intel, + self.parser_x86_intel, + self.machine_model_csx, + self.semantics_csx_intel + ) + self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=3))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=3)), 5) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=4))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4)), 5) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5)), 6) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5.1))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5.1)), 5) + self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=6)), []) + self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=7)), []) + self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=8)), []) + with self.assertRaises(ValueError): + dg.get_dependent_instruction_forms() + # test dot creation + dg.export_graph(filepath=os.devnull) def test_memdependency_x86(self): dg = KernelDG( self.kernel_x86_memdep, - self.parser_x86, + self.parser_x86_att, self.machine_model_csx, self.semantics_csx, ) @@ -373,7 +504,22 @@ class TestSemanticTools(unittest.TestCase): with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation - dg.export_graph(filepath="/dev/null") + dg.export_graph(filepath=os.devnull) + + def test_memdependency_x86_intel(self): + dg = KernelDG( + self.kernel_x86_intel_memdep, + self.parser_x86_intel, + self.machine_model_csx, + self.semantics_csx_intel, + ) + self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) + self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8}) + self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12}) + with self.assertRaises(ValueError): + dg.get_dependent_instruction_forms() + # test dot creation + dg.export_graph(filepath=os.devnull) def test_kernelDG_AArch64(self): dg = KernelDG( @@ -404,7 +550,7 @@ class TestSemanticTools(unittest.TestCase): with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation - dg.export_graph(filepath="/dev/null") + dg.export_graph(filepath=os.devnull) def test_kernelDG_SVE(self): KernelDG( @@ -420,11 +566,15 @@ class TestSemanticTools(unittest.TestCase): path_to_yaml=self._find_file("hidden_load_machine_model.yml") ) self.assertTrue(machine_model_hld.has_hidden_loads()) - semantics_hld = ArchSemantics(machine_model_hld) - kernel_hld = self.parser_x86.parse_file(self.code_x86) - kernel_hld_2 = self.parser_x86.parse_file(self.code_x86) - kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)[-3:] - kernel_hld_3 = self.parser_x86.parse_file(self.code_x86)[5:8] + semantics_hld = ArchSemantics(self.parser_x86_att, machine_model_hld) + kernel_hld = self.parser_x86_att.parse_file(self.code_x86) + kernel_hld_2 = self.parser_x86_att.parse_file(self.code_x86) + kernel_hld_2 = self.parser_x86_att.parse_file(self.code_x86)[-3:] + kernel_hld_3 = self.parser_x86_att.parse_file(self.code_x86)[5:8] + + semantics_hld.normalize_instruction_forms(kernel_hld) + semantics_hld.normalize_instruction_forms(kernel_hld_2) + semantics_hld.normalize_instruction_forms(kernel_hld_3) semantics_hld.add_semantics(kernel_hld) semantics_hld.add_semantics(kernel_hld_2) @@ -438,7 +588,12 @@ class TestSemanticTools(unittest.TestCase): self.assertEqual(num_hidden_loads_3, 1) def test_cyclic_dag(self): - dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) + dg = KernelDG( + self.kernel_x86, + self.parser_x86_att, + self.machine_model_csx, + self.semantics_csx + ) dg.dg.add_edge(100, 101, latency=1.0) dg.dg.add_edge(101, 102, latency=2.0) dg.dg.add_edge(102, 100, latency=3.0) @@ -503,7 +658,45 @@ class TestSemanticTools(unittest.TestCase): def test_loop_carried_dependency_x86(self): lcd_id = "8" lcd_id2 = "5" - dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) + dg = KernelDG( + self.kernel_x86, + self.parser_x86_att, + self.machine_model_csx, + self.semantics_csx + ) + lc_deps = dg.get_loopcarried_dependencies() + # self.assertEqual(len(lc_deps), 2) + # ID 8 + self.assertEqual( + lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"] + ) + self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1) + self.assertEqual( + lc_deps[lcd_id]["dependencies"][0][0], + dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"], + ) + # w/ flag dependencies: ID 9 w/ len=2 + # w/o flag dependencies: ID 5 w/ len=1 + # TODO discuss + self.assertEqual( + lc_deps[lcd_id2]["root"], + dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"], + ) + self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1) + self.assertEqual( + lc_deps[lcd_id2]["dependencies"][0][0], + dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"], + ) + + def test_loop_carried_dependency_x86_intel(self): + lcd_id = "8" + lcd_id2 = "7" + dg = KernelDG( + self.kernel_x86_intel, + self.parser_x86_intel, + self.machine_model_csx, + self.semantics_csx_intel + ) lc_deps = dg.get_loopcarried_dependencies() # self.assertEqual(len(lc_deps), 2) # ID 8 @@ -532,7 +725,7 @@ class TestSemanticTools(unittest.TestCase): start_time = time.perf_counter() KernelDG( self.kernel_x86_long_LCD, - self.parser_x86, + self.parser_x86_att, self.machine_model_csx, self.semantics_x86, timeout=10, @@ -542,7 +735,7 @@ class TestSemanticTools(unittest.TestCase): start_time = time.perf_counter() KernelDG( self.kernel_x86_long_LCD, - self.parser_x86, + self.parser_x86_att, self.machine_model_csx, self.semantics_x86, timeout=2, @@ -556,22 +749,32 @@ class TestSemanticTools(unittest.TestCase): def test_is_read_is_written_x86(self): # independent form HW model - dag = KernelDG(self.kernel_x86, self.parser_x86, None, None) + dag = KernelDG(self.kernel_x86, self.parser_x86_att, None, None) reg_rcx = RegisterOperand(name="rcx") reg_ymm1 = RegisterOperand(name="ymm1") - instr_form_r_c = self.parser_x86.parse_line("vmovsd %xmm0, (%r15,%rcx,8)") + instr_form_r_c = self.parser_x86_att.parse_line("vmovsd %xmm0, (%r15,%rcx,8)") + self.semantics_csx.normalize_instruction_form(instr_form_r_c) self.semantics_csx.assign_src_dst(instr_form_r_c) - instr_form_non_r_c = self.parser_x86.parse_line("movl %xmm0, (%r15,%rax,8)") + instr_form_non_r_c = self.parser_x86_att.parse_line("movl %xmm0, (%r15,%rax,8)") + self.semantics_csx.normalize_instruction_form(instr_form_non_r_c) self.semantics_csx.assign_src_dst(instr_form_non_r_c) - instr_form_w_c = self.parser_x86.parse_line("movi $0x05ACA, %rcx") + instr_form_w_c = self.parser_x86_att.parse_line("movi $0x05ACA, %rcx") + self.semantics_csx.normalize_instruction_form(instr_form_w_c) self.semantics_csx.assign_src_dst(instr_form_w_c) - instr_form_rw_ymm_1 = self.parser_x86.parse_line("vinsertf128 $0x1, %xmm1, %ymm0, %ymm1") + instr_form_rw_ymm_1 = self.parser_x86_att.parse_line( + "vinsertf128 $0x1, %xmm1, %ymm0, %ymm1" + ) + self.semantics_csx.normalize_instruction_form(instr_form_rw_ymm_1) self.semantics_csx.assign_src_dst(instr_form_rw_ymm_1) - instr_form_rw_ymm_2 = self.parser_x86.parse_line("vinsertf128 $0x1, %xmm0, %ymm1, %ymm1") + instr_form_rw_ymm_2 = self.parser_x86_att.parse_line( + "vinsertf128 $0x1, %xmm0, %ymm1, %ymm1" + ) + self.semantics_csx.normalize_instruction_form(instr_form_rw_ymm_2) self.semantics_csx.assign_src_dst(instr_form_rw_ymm_2) - instr_form_r_ymm = self.parser_x86.parse_line("vmovapd %ymm1, %ymm0") + instr_form_r_ymm = self.parser_x86_att.parse_line("vmovapd %ymm1, %ymm0") + self.semantics_csx.normalize_instruction_form(instr_form_r_ymm) self.semantics_csx.assign_src_dst(instr_form_r_ymm) self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c)) self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c)) @@ -585,6 +788,47 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm)) + def test_is_read_is_written_x86_intel(self): + # independent form HW model + dag = KernelDG(self.kernel_x86_intel, self.parser_x86_intel, None, None) + reg_rcx = RegisterOperand(name="rcx") + reg_ymm1 = RegisterOperand(name="ymm1") + + instr_form_r_c = self.parser_x86_intel.parse_line("vmovsd QWORD PTR [r15+rcx*8], xmm0") + self.semantics_csx_intel.normalize_instruction_form(instr_form_r_c) + self.semantics_csx_intel.assign_src_dst(instr_form_r_c) + instr_form_non_r_c = self.parser_x86_intel.parse_line("mov QWORD PTR [r15+rax*8], xmm0") + self.semantics_csx_intel.normalize_instruction_form(instr_form_non_r_c) + self.semantics_csx_intel.assign_src_dst(instr_form_non_r_c) + instr_form_w_c = self.parser_x86_intel.parse_line("mov rcx, H05ACA") + self.semantics_csx_intel.normalize_instruction_form(instr_form_w_c) + self.semantics_csx_intel.assign_src_dst(instr_form_w_c) + + instr_form_rw_ymm_1 = self.parser_x86_intel.parse_line( + "vinsertf128 ymm1, ymm0, xmm1, 1" + ) + self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_ymm_1) + self.semantics_csx_intel.assign_src_dst(instr_form_rw_ymm_1) + instr_form_rw_ymm_2 = self.parser_x86_intel.parse_line( + "vinsertf128 ymm1, ymm1, xmm0, 1" + ) + self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_ymm_2) + self.semantics_csx_intel.assign_src_dst(instr_form_rw_ymm_2) + instr_form_r_ymm = self.parser_x86_intel.parse_line("vmovapd ymm0, ymm1") + self.semantics_csx_intel.normalize_instruction_form(instr_form_r_ymm) + self.semantics_csx_intel.assign_src_dst(instr_form_r_ymm) + self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c)) + self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c)) + self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c)) + self.assertTrue(dag.is_written(reg_rcx, instr_form_w_c)) + self.assertFalse(dag.is_written(reg_rcx, instr_form_r_c)) + self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_1)) + self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_2)) + self.assertTrue(dag.is_read(reg_ymm1, instr_form_r_ymm)) + self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1)) + self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) + self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm)) + def test_is_read_is_written_AArch64(self): # independent form HW model dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None, None) @@ -597,20 +841,28 @@ class TestSemanticTools(unittest.TestCase): regs_gp = [reg_w1, reg_x1] instr_form_r_1 = self.parser_AArch64.parse_line("stp q1, q3, [x12, #192]") + self.semantics_tx2.normalize_instruction_form(instr_form_r_1) self.semantics_tx2.assign_src_dst(instr_form_r_1) instr_form_r_2 = self.parser_AArch64.parse_line("fadd v2.2d, v1.2d, v0.2d") + self.semantics_tx2.normalize_instruction_form(instr_form_r_2) self.semantics_tx2.assign_src_dst(instr_form_r_2) instr_form_w_1 = self.parser_AArch64.parse_line("ldr d1, [x1, #:got_lo12:q2c]") + self.semantics_tx2.normalize_instruction_form(instr_form_w_1) self.semantics_tx2.assign_src_dst(instr_form_w_1) instr_form_non_w_1 = self.parser_AArch64.parse_line("ldr x1, [x1, #:got_lo12:q2c]") + self.semantics_tx2.normalize_instruction_form(instr_form_non_w_1) self.semantics_tx2.assign_src_dst(instr_form_non_w_1) instr_form_rw_1 = self.parser_AArch64.parse_line("fmul v1.2d, v1.2d, v0.2d") + self.semantics_tx2.normalize_instruction_form(instr_form_rw_1) self.semantics_tx2.assign_src_dst(instr_form_rw_1) instr_form_rw_2 = self.parser_AArch64.parse_line("ldp q2, q4, [x1, #64]!") + self.semantics_tx2.normalize_instruction_form(instr_form_rw_2) self.semantics_tx2.assign_src_dst(instr_form_rw_2) instr_form_rw_3 = self.parser_AArch64.parse_line("str x4, [x1], #64") + self.semantics_tx2.normalize_instruction_form(instr_form_rw_3) self.semantics_tx2.assign_src_dst(instr_form_rw_3) instr_form_non_rw_1 = self.parser_AArch64.parse_line("adds x1, x11") + self.semantics_tx2.normalize_instruction_form(instr_form_non_rw_1) self.semantics_tx2.assign_src_dst(instr_form_non_rw_1) for reg in regs: diff --git a/validation/kernels/striad.c b/validation/kernels/striad.c index 49b1feb..3c7ab43 100644 --- a/validation/kernels/striad.c +++ b/validation/kernels/striad.c @@ -7,6 +7,12 @@ #endif #endif +#define USE_IACA 0 + +#if USE_IACA +#include "intel\iacaMarks.h" +#endif + #define DTYPE double void dummy(void *); @@ -15,9 +21,15 @@ void kernel(DTYPE* a, DTYPE* b, DTYPE* c, DTYPE* d, const int repeat, const int #ifndef MAIN { for(int r=0; r < repeat; r++) { +#if USE_IACA + IACA_VC64_START +#endif for(int i=0; i