diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 94b2666..6636024 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.x" - name: Install Python dependencies run: python -m pip install black flake8 diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index 03ddd5f..c217ff1 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 @@ -18,11 +18,13 @@ jobs: run: | python -m pip install wheel python -m pip install --upgrade pip + python -m pip install setuptools python -m pip install codecov requests python -m pip install bs4 sudo apt-get -y install graphviz libgraphviz-dev pkg-config python -m pip install pygraphviz - python -m pip install "kerncraft>=0.8.16" + #python -m pip install "kerncraft>=0.8.16" + python -m pip install git+https://github.com/RRZE-HPC/kerncraft.git@7caff4e2ecdbef595013041ba0131e37ed33c72c python -m pip install -e . - name: Test run: | diff --git a/.gitignore b/.gitignore index 6ef37fa..426ace6 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,7 @@ venv.bak/ # mypy .mypy_cache/ + +# Visual Studio +.vs +x64/ diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py index d10555e..f1ab348 100644 --- a/osaca/data/model_importer.py +++ b/osaca/data/model_importer.py @@ -111,7 +111,8 @@ def extract_model(tree, arch, skip_mem=True): print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) - parser = get_parser(isa) + # The model uses the AT&T syntax. + parser = get_parser(isa, "ATT") for instruction_tag in tree.findall(".//instruction"): ignore = False diff --git a/osaca/osaca.py b/osaca/osaca.py index d0fb49d..4605d9c 100644 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -11,7 +11,7 @@ from ruamel.yaml import YAML from osaca.db_interface import import_benchmark_output, sanity_check from osaca.frontend import Frontend -from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT +from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT, ParserX86Intel from osaca.semantics import ( INSTR_FLAGS, ArchSemantics, @@ -47,6 +47,10 @@ DEFAULT_ARCHS = { "aarch64": "V2", "x86": "SPR", } +SUPPORTED_SYNTAXES = [ + "ATT", + "INTEL", +] # Stolen from pip @@ -108,6 +112,12 @@ def create_parser(parser=None): "ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a " "default uarch for x86/AArch64.", ) + parser.add_argument( + "--syntax", + type=str, + help="Define the assembly syntax (ATT, Intel) for x86. If no syntax is given, OSACA " + "tries to determine automatically the syntax to use.", + ) parser.add_argument( "--fixed", action="store_true", @@ -232,6 +242,14 @@ def check_arguments(args, parser): parser.error( "Microarchitecture not supported. Please see --help for all valid architecture codes." ) + if args.syntax and args.arch and MachineModel.get_isa_for_arch(args.arch) != "x86": + parser.error("Syntax can only be explicitly specified for an x86 microarchitecture") + if args.syntax: + args.syntax = args.syntax.upper() + if args.syntax not in SUPPORTED_SYNTAXES: + parser.error( + "Assembly syntax not supported. Please see --help for all valid assembly syntaxes." + ) if "import_data" in args and args.import_data not in supported_import_files: parser.error( "Microbenchmark not supported for data import. Please see --help for all valid " @@ -310,30 +328,56 @@ def inspect(args, output_file=sys.stdout): code = args.file.read() # Detect ISA if necessary - arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)] - print_arch_warning = False if args.arch else True - isa = MachineModel.get_isa_for_arch(arch) + detected_isa, detected_syntax = BaseParser.detect_ISA(code) + detected_arch = DEFAULT_ARCHS[detected_isa] + + print_arch_warning = not args.arch verbose = args.verbose ignore_unknown = args.ignore_unknown - # Parse file - parser = get_asm_parser(arch) - try: - parsed_code = parser.parse_file(code) - except Exception as e: - # probably the wrong parser based on heuristic - if args.arch is None: - # change ISA and try again - arch = ( - DEFAULT_ARCHS["x86"] - if BaseParser.detect_ISA(code) == "aarch64" - else DEFAULT_ARCHS["aarch64"] - ) - isa = MachineModel.get_isa_for_arch(arch) - parser = get_asm_parser(arch) + # If the arch/syntax is explicitly specified, that's the only thing we'll try. Otherwise, we'll + # look at all the possible archs/syntaxes, but with our detected arch/syntax last in the list, + # thus tried first. + if args.arch: + archs_to_try = [args.arch] + else: + archs_to_try = list(DEFAULT_ARCHS.values()) + archs_to_try.remove(detected_arch) + archs_to_try.append(detected_arch) + if args.syntax: + syntaxes_to_try = [args.syntax] + else: + syntaxes_to_try = SUPPORTED_SYNTAXES + [None] + syntaxes_to_try.remove(detected_syntax) + syntaxes_to_try.append(detected_syntax) + + # Filter the cross-product of archs and syntaxes to eliminate the combinations that don't make + # sense. + combinations_to_try = [ + (arch, syntax) + for arch in archs_to_try + for syntax in syntaxes_to_try + if (syntax is not None) == (MachineModel.get_isa_for_arch(arch) == "x86") + ] + + # Parse file. + message = "" + single_combination = len(combinations_to_try) == 1 + while True: + arch, syntax = combinations_to_try.pop() + parser = get_asm_parser(arch, syntax) + try: parsed_code = parser.parse_file(code) - else: - raise e + break + except Exception as e: + message += f"\nWith arch {arch} and syntax {syntax} got error: {e}." + # Either the wrong parser based on heuristic, or a bona fide syntax error (or + # unsupported syntax). For ease of debugging, we emit the entire exception trace if + # we tried a single arch/syntax combination. If we tried multiple combinations, we + # don't emit the traceback as it would apply to the latest combination tried, which is + # probably the less interesting. + if not combinations_to_try: + raise SyntaxError(message) from e if single_combination else None # Reduce to marked kernel or chosen section and add semantics if args.lines: @@ -341,13 +385,14 @@ def inspect(args, output_file=sys.stdout): kernel = [line for line in parsed_code if line.line_number in line_range] print_length_warning = False else: - kernel = reduce_to_section(parsed_code, isa) + kernel = reduce_to_section(parsed_code, parser) # Print warning if kernel has no markers and is larger than threshold (100) print_length_warning = ( True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False ) machine_model = MachineModel(arch=arch) - semantics = ArchSemantics(machine_model) + semantics = ArchSemantics(parser, machine_model) + semantics.normalize_instruction_forms(kernel) semantics.add_semantics(kernel) # Do optimal schedule for kernel throughput if wished if not args.fixed: @@ -417,7 +462,7 @@ def run(args, output_file=sys.stdout): @lru_cache() -def get_asm_parser(arch) -> BaseParser: +def get_asm_parser(arch, syntax="ATT") -> BaseParser: """ Helper function to create the right parser for a specific architecture. @@ -427,7 +472,7 @@ def get_asm_parser(arch) -> BaseParser: """ isa = MachineModel.get_isa_for_arch(arch) if isa == "x86": - return ParserX86ATT() + return ParserX86ATT() if syntax == "ATT" else ParserX86Intel() elif isa == "aarch64": return ParserAArch64() diff --git a/osaca/parser/__init__.py b/osaca/parser/__init__.py index 3b5e8ba..492b998 100644 --- a/osaca/parser/__init__.py +++ b/osaca/parser/__init__.py @@ -1,11 +1,13 @@ """ Collection of parsers supported by OSACA. -Only the parser below will be exported, so please add new parsers to __all__. +Only the parsers below will be exported, so please add new parsers to __all__. """ from .base_parser import BaseParser +from .parser_x86 import ParserX86 from .parser_x86att import ParserX86ATT +from .parser_x86intel import ParserX86Intel from .parser_AArch64 import ParserAArch64 from .instruction_form import InstructionForm from .operand import Operand @@ -14,15 +16,17 @@ __all__ = [ "Operand", "InstructionForm", "BaseParser", + "ParserX86", "ParserX86ATT", + "ParserX86Intel", "ParserAArch64", "get_parser", ] -def get_parser(isa): +def get_parser(isa, syntax="ATT"): if isa.lower() == "x86": - return ParserX86ATT() + return ParserX86ATT() if syntax.upper() == "ATT" else ParserX86Intel() elif isa.lower() == "aarch64": return ParserAArch64() else: diff --git a/osaca/parser/base_parser.py b/osaca/parser/base_parser.py index 3ac2124..ce383de 100644 --- a/osaca/parser/base_parser.py +++ b/osaca/parser/base_parser.py @@ -25,20 +25,57 @@ class BaseParser(object): self.construct_parser() self._parser_constructed = True + def isa(self): + # Done in derived classes + raise NotImplementedError + + # The marker functions return lists of `InstructionForm` that are used to find the IACA markers + # in the parsed code. In addition to just a list, the marker may have a structure like + # [I1, [I2, I3], I4, ...] where the nested list indicates that at least one of I2 and I3 must + # match the second instruction in the fragment of parsed code. + # If an instruction form is a `DirectiveOperand`, the match may happen over several directive + # operands in the parsed code, provided that the directives have the same name and the + # parameters are in sequence with respect to the pattern. This provides an easy way to describe + # a sequence of bytes irrespective of the way it was grouped in the assembly source. + # Note that markers must be matched *before* normalization. + def start_marker(self): + # Done in derived classes + raise NotImplementedError + + def end_marker(self): + # Done in derived classes + raise NotImplementedError + + # Performs all the normalization needed to match the instruction to the ISO/arch model. This + # method must set the `normalized` property of the instruction and must be idempotent. + def normalize_instruction_form(self, instruction_form, isa_model, arch_model): + raise NotImplementedError + @staticmethod def detect_ISA(file_content): - """Detect the ISA of the assembly based on the used registers and return the ISA code.""" + """ + Detect the ISA of the assembly based on the used registers and return the ISA code. + + :param str file_content: assembly code. + :return: a tuple isa, syntax describing the architecture and the assembly syntax, + if appropriate. If there is no notion of syntax, the second element is None. + """ # Check for the amount of registers in the code to determine the ISA # 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86 + # AT&T syntax. There is a % before each register name. heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"] - # 2) check for v and z vector registers and x/w general-purpose registers + # 2) Same as above, but for the Intel syntax. There is no % before the register names. + heuristics_x86Intel = [r"[^%][xyz]mm[0-9]", r"[^%][er][abcd]x[0-9]"] + # 3) check for v and z vector registers and x/w general-purpose registers heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"] - matches = {"x86": 0, "aarch64": 0} + matches = {("x86", "ATT"): 0, ("x86", "INTEL"): 0, ("aarch64", None): 0} for h in heuristics_x86ATT: - matches["x86"] += len(re.findall(h, file_content)) + matches[("x86", "ATT")] += len(re.findall(h, file_content)) + for h in heuristics_x86Intel: + matches[("x86", "INTEL")] += len(re.findall(h, file_content)) for h in heuristics_aarch64: - matches["aarch64"] += len(re.findall(h, file_content)) + matches[("aarch64", None)] += len(re.findall(h, file_content)) return max(matches.items(), key=operator.itemgetter(1))[0] @@ -94,6 +131,14 @@ class BaseParser(object): def get_full_reg_name(self, register): raise NotImplementedError + # Must be called on a *normalized* instruction. + def get_regular_source_operands(self, instruction_form): + raise NotImplementedError + + # Must be called on a *normalized* instruction. + def get_regular_destination_operands(self, instruction_form): + raise NotImplementedError + def normalize_imd(self, imd): raise NotImplementedError diff --git a/osaca/parser/identifier.py b/osaca/parser/identifier.py index e5c0209..87c3d76 100644 --- a/osaca/parser/identifier.py +++ b/osaca/parser/identifier.py @@ -41,3 +41,12 @@ class IdentifierOperand(Operand): def __repr__(self): return self.__str__() + + def __eq__(self, other): + if isinstance(other, IdentifierOperand): + return ( + self._name == other._name + and self._offset == other._offset + and self._relocation == other._relocation + ) + return False diff --git a/osaca/parser/instruction_form.py b/osaca/parser/instruction_form.py index d32bc34..5a04c7a 100644 --- a/osaca/parser/instruction_form.py +++ b/osaca/parser/instruction_form.py @@ -19,6 +19,7 @@ class InstructionForm: port_pressure=None, operation=None, breaks_dependency_on_equal_operands=False, + normalized=False, ): self._mnemonic = mnemonic self._operands = operands @@ -33,6 +34,7 @@ class InstructionForm: self._operation = operation self._uops = uops self._breaks_dependency_on_equal_operands = breaks_dependency_on_equal_operands + self._normalized = normalized self._latency = latency self._throughput = throughput self._latency_cp = [] @@ -42,6 +44,10 @@ class InstructionForm: self._port_uops = [] self._flags = [] + def check_normalized(self): + if not self._normalized: + raise AssertionError("Unnormalized instruction") + @property def semantic_operands(self): return self._semantic_operands @@ -114,6 +120,10 @@ class InstructionForm: def breaks_dependency_on_equal_operands(self): return self._breaks_dependency_on_equal_operands + @property + def normalized(self): + return self._normalized + @semantic_operands.setter def semantic_operands(self, semantic_operands): self._semantic_operands = semantic_operands @@ -142,6 +152,10 @@ class InstructionForm: def breaks_dependency_on_equal_operands(self, boolean): self._breaks_dependency_on_equal_operands = boolean + @normalized.setter + def normalized(self, normalized): + self._normalized = normalized + @mnemonic.setter def mnemonic(self, mnemonic): self._mnemonic = mnemonic diff --git a/osaca/parser/label.py b/osaca/parser/label.py index 39b1ece..62cbfcd 100644 --- a/osaca/parser/label.py +++ b/osaca/parser/label.py @@ -20,3 +20,8 @@ class LabelOperand(Operand): def __repr__(self): return self.__str__() + + def __eq__(self, other): + if isinstance(other, LabelOperand): + return self._name == other._name + return False diff --git a/osaca/parser/memory.py b/osaca/parser/memory.py index 9e79c3c..96d812d 100644 --- a/osaca/parser/memory.py +++ b/osaca/parser/memory.py @@ -15,6 +15,7 @@ class MemoryOperand(Operand): pre_indexed=False, post_indexed=False, indexed_val=None, + data_type=None, src=None, dst=None, source=False, @@ -30,6 +31,7 @@ class MemoryOperand(Operand): self._pre_indexed = pre_indexed self._post_indexed = post_indexed self._indexed_val = indexed_val + self._data_type = data_type # type of register we store from (`src`) or load to (`dst`) self._src = src self._dst = dst @@ -74,6 +76,14 @@ class MemoryOperand(Operand): def indexed_val(self): return self._indexed_val + @property + def data_type(self): + return self._data_type + + @data_type.setter + def data_type(self, data_type): + self._data_type = data_type + @property def src(self): return self._src diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index f19db97..12f44b2 100644 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -26,7 +26,53 @@ class ParserAArch64(BaseParser): def __init__(self): super().__init__() - self.isa = "aarch64" + + def isa(self): + return "aarch64" + + def start_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=111)], + ), + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"]) + ), + ] + + def end_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=222)], + ), + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"]) + ), + ] + + def normalize_instruction_form(self, instruction_form, isa_model, arch_model): + """ + If the instruction doesn't exist in the machine model, normalize it by dropping the shape + suffix. + """ + if instruction_form.normalized: + return + instruction_form.normalized = True + + mnemonic = instruction_form.mnemonic + if not mnemonic: + return + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if not model: + if "." in mnemonic: + # Check for instruction without shape/cc suffix. + suffix_start = mnemonic.index(".") + mnemonic = mnemonic[:suffix_start] + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if model: + instruction_form.mnemonic = mnemonic def construct_parser(self): """Create parser for ARM AArch64 ISA.""" @@ -592,6 +638,21 @@ class ParserAArch64(BaseParser): name += "[" + str(register.index) + "]" return name + def get_regular_source_operands(self, instruction_form): + """Get source operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume it is a source operand + if len(instruction_form.operands) == 1: + return [instruction_form.operands[0]] + return [op for op in instruction_form.operands[1:]] + + def get_regular_destination_operands(self, instruction_form): + """Get destination operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume no destination + if len(instruction_form.operands) == 1: + return [] + # return first operand + return instruction_form.operands[:1] + def normalize_imd(self, imd): """Normalize immediate to decimal based representation""" if isinstance(imd, IdentifierOperand): diff --git a/osaca/parser/parser_x86.py b/osaca/parser/parser_x86.py new file mode 100644 index 0000000..e9b1837 --- /dev/null +++ b/osaca/parser/parser_x86.py @@ -0,0 +1,123 @@ +import re +import string + +from osaca.parser import BaseParser + + +class ParserX86(BaseParser): + _instance = None + + # Singleton pattern, as this is created very many times. + def __new__(cls): + if cls._instance is None: + cls._instance = super(ParserX86, cls).__new__(cls) + return cls._instance + + def __init__(self): + super().__init__() + + def isa(self): + return "x86" + + def is_reg_dependend_of(self, reg_a, reg_b): + """Check if ``reg_a`` is dependent on ``reg_b``""" + reg_a_name = reg_a.name.upper() + reg_b_name = reg_b.name.upper() + + # Check if they are the same registers + if reg_a_name == reg_b_name: + return True + # Check vector registers first + if self.is_vector_register(reg_a): + if self.is_vector_register(reg_b): + if reg_a_name[1:] == reg_b_name[1:]: + # Registers in the same vector space + return True + return False + # Check basic GPRs + gpr_groups = { + "A": ["RAX", "EAX", "AX", "AH", "AL"], + "B": ["RBX", "EBX", "BX", "BH", "BL"], + "C": ["RCX", "ECX", "CX", "CH", "CL"], + "D": ["RDX", "EDX", "DX", "DH", "DL"], + "SP": ["RSP", "ESP", "SP", "SPL"], + "SRC": ["RSI", "ESI", "SI", "SIL"], + "DST": ["RDI", "EDI", "DI", "DIL"], + } + if self.is_basic_gpr(reg_a): + if self.is_basic_gpr(reg_b): + for dep_group in gpr_groups.values(): + if reg_a_name in dep_group: + if reg_b_name in dep_group: + return True + return False + + # Check other GPRs + ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name) + mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name) + if ma and mb and ma.group(1) == mb.group(1): + return True + + # No dependencies + return False + + def is_basic_gpr(self, register): + """Check if register is a basic general purpose register (ebi, rax, ...)""" + if any(char.isdigit() for char in register.name) or any( + register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"] + ): + return False + return True + + def is_gpr(self, register): + """Check if register is a general purpose register""" + if register is None: + return False + if self.is_basic_gpr(register): + return True + return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE) + + def is_vector_register(self, register): + """Check if register is a vector register""" + if register is None or register.name is None: + return False + if register.name.rstrip(string.digits).lower() in [ + "mm", + "xmm", + "ymm", + "zmm", + ]: + return True + return False + + def get_reg_type(self, register): + """Get register type""" + if register is None: + return False + if self.is_gpr(register): + return "gpr" + elif self.is_vector_register(register): + return register.name.rstrip(string.digits).lower() + raise ValueError + + def is_flag_dependend_of(self, flag_a, flag_b): + """Check if ``flag_a`` is dependent on ``flag_b``""" + # we assume flags are independent of each other, e.g., CF can be read while ZF gets written + # TODO validate this assumption + return flag_a.name == flag_b.name + + def get_regular_source_operands(self, instruction_form): + """Get source operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume it is a source operand + if len(instruction_form.operands) == 1: + return [instruction_form.operands[0]] + # return all but last operand + return [op for op in instruction_form.operands[0:-1]] + + def get_regular_destination_operands(self, instruction_form): + """Get destination operand of given instruction form assuming regular src/dst behavior.""" + # if there is only one operand, assume no destination + if len(instruction_form.operands) == 1: + return [] + # return last operand + return instruction_form.operands[-1:] diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index c5f0627..54f5125 100644 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -1,11 +1,8 @@ #!/usr/bin/env python3 -import string -import re - import pyparsing as pp -from osaca.parser import BaseParser +from osaca.parser import ParserX86 from osaca.parser.instruction_form import InstructionForm from osaca.parser.directive import DirectiveOperand from osaca.parser.memory import MemoryOperand @@ -15,8 +12,9 @@ from osaca.parser.identifier import IdentifierOperand from osaca.parser.immediate import ImmediateOperand -class ParserX86ATT(BaseParser): +class ParserX86ATT(ParserX86): _instance = None + GAS_SUFFIXES = "bswlqt" # Singelton pattern, as this is created very many times def __new__(cls): @@ -26,7 +24,61 @@ class ParserX86ATT(BaseParser): def __init__(self): super().__init__() - self.isa = "x86" + + def start_marker(self): + return [ + [ + InstructionForm( + mnemonic="mov", + operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")], + ), + InstructionForm( + mnemonic="movl", + operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")], + ), + ], + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"]) + ), + ] + + def end_marker(self): + return [ + [ + InstructionForm( + mnemonic="mov", + operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")], + ), + InstructionForm( + mnemonic="movl", + operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")], + ), + ], + InstructionForm( + directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"]) + ), + ] + + def normalize_instruction_form(self, instruction_form, isa_model, arch_model): + """ + If the instruction doesn't exist in the machine model, normalize it by dropping the GAS + suffix. + """ + if instruction_form.normalized: + return + instruction_form.normalized = True + + mnemonic = instruction_form.mnemonic + if not mnemonic: + return + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if not model: + # Check for instruction without GAS suffix. + if mnemonic[-1] in self.GAS_SUFFIXES: + mnemonic = mnemonic[:-1] + model = arch_model.get_instruction(mnemonic, instruction_form.operands) + if model: + instruction_form.mnemonic = mnemonic def construct_parser(self): """Create parser for x86 AT&T ISA.""" @@ -253,10 +305,10 @@ class ParserX86ATT(BaseParser): if result is None: try: result = self.parse_instruction(line) - except pp.ParseException: + except pp.ParseException as e: raise ValueError( "Could not parse instruction on line {}: {!r}".format(line_number, line) - ) + ) from e instruction_form.mnemonic = result.mnemonic instruction_form.operands = result.operands instruction_form.comment = result.comment @@ -393,90 +445,3 @@ class ParserX86ATT(BaseParser): return imd.value # identifier return imd - - def is_flag_dependend_of(self, flag_a, flag_b): - """Check if ``flag_a`` is dependent on ``flag_b``""" - # we assume flags are independent of each other, e.g., CF can be read while ZF gets written - # TODO validate this assumption - return flag_a.name == flag_b.name - - def is_reg_dependend_of(self, reg_a, reg_b): - """Check if ``reg_a`` is dependent on ``reg_b``""" - reg_a_name = reg_a.name.upper() - reg_b_name = reg_b.name.upper() - - # Check if they are the same registers - if reg_a_name == reg_b_name: - return True - # Check vector registers first - if self.is_vector_register(reg_a): - if self.is_vector_register(reg_b): - if reg_a_name[1:] == reg_b_name[1:]: - # Registers in the same vector space - return True - return False - # Check basic GPRs - gpr_groups = { - "A": ["RAX", "EAX", "AX", "AH", "AL"], - "B": ["RBX", "EBX", "BX", "BH", "BL"], - "C": ["RCX", "ECX", "CX", "CH", "CL"], - "D": ["RDX", "EDX", "DX", "DH", "DL"], - "SP": ["RSP", "ESP", "SP", "SPL"], - "SRC": ["RSI", "ESI", "SI", "SIL"], - "DST": ["RDI", "EDI", "DI", "DIL"], - } - if self.is_basic_gpr(reg_a): - if self.is_basic_gpr(reg_b): - for dep_group in gpr_groups.values(): - if reg_a_name in dep_group: - if reg_b_name in dep_group: - return True - return False - - # Check other GPRs - ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name) - mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name) - if ma and mb and ma.group(1) == mb.group(1): - return True - - # No dependencies - return False - - def is_basic_gpr(self, register): - """Check if register is a basic general purpose register (ebi, rax, ...)""" - if any(char.isdigit() for char in register.name) or any( - register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"] - ): - return False - return True - - def is_gpr(self, register): - """Check if register is a general purpose register""" - if register is None: - return False - if self.is_basic_gpr(register): - return True - return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE) - - def is_vector_register(self, register): - """Check if register is a vector register""" - if register is None or register.name is None: - return False - if register.name.rstrip(string.digits).lower() in [ - "mm", - "xmm", - "ymm", - "zmm", - ]: - return True - return False - - def get_reg_type(self, register): - """Get register type""" - if register is None: - return False - if self.is_gpr(register): - return "gpr" - elif self.is_vector_register(register): - return register.name.rstrip(string.digits).lower() - raise ValueError diff --git a/osaca/parser/parser_x86intel.py b/osaca/parser/parser_x86intel.py new file mode 100644 index 0000000..8802b59 --- /dev/null +++ b/osaca/parser/parser_x86intel.py @@ -0,0 +1,807 @@ +#!/usr/bin/env python3 + +import pyparsing as pp +import unicodedata + +from osaca.parser import ParserX86 +from osaca.parser.directive import DirectiveOperand +from osaca.parser.identifier import IdentifierOperand +from osaca.parser.immediate import ImmediateOperand +from osaca.parser.instruction_form import InstructionForm +from osaca.parser.label import LabelOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.register import RegisterOperand + +# We assume any non-ASCII characters except control characters and line terminators can be part of +# identifiers; this is based on the assumption that no assembler uses non-ASCII white space and +# syntax characters. +# This approach is described at the end of https://www.unicode.org/reports/tr55/#Whitespace-Syntax. +# It is appropriate for tools, such as this one, which process source code but do not fully validate +# it (in this case, that’s the job of the assembler). +NON_ASCII_PRINTABLE_CHARACTERS = "".join( + chr(cp) + for cp in range(0x80, 0x10FFFF + 1) + if unicodedata.category(chr(cp)) not in ("Cc", "Zl", "Zp", "Cs", "Cn") +) + + +# References: +# ASM386 Assembly Language Reference, document number 469165-003, https://mirror.math.princeton.edu/pub/oldlinux/Linux.old/Ref-docs/asm-ref.pdf. +# Microsoft Macro Assembler BNF Grammar, https://learn.microsoft.com/en-us/cpp/assembler/masm/masm-bnf-grammar?view=msvc-170. +# Intel Architecture Code Analyzer User's Guide, https://www.intel.com/content/dam/develop/external/us/en/documents/intel-architecture-code-analyzer-3-0-users-guide-157552.pdf. +class ParserX86Intel(ParserX86): + _instance = None + + # Singleton pattern, as this is created very many times. + def __new__(cls): + if cls._instance is None: + cls._instance = super(ParserX86Intel, cls).__new__(cls) + return cls._instance + + def __init__(self): + super().__init__() + self._equ = {} + + # The IACA manual says: "For For Microsoft* Visual C++ compiler, 64-bit version, use + # IACA_VC64_START and IACA_VC64_END, instead" (of IACA_START and IACA_END). + # TODO: Inconveniently, the code generated with optimization disabled (/Od) has two + # instructions. We should support both patterns, but then who runs OSACA with /Od? + def start_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[ + MemoryOperand( + base=RegisterOperand(name="GS"), offset=ImmediateOperand(value=111) + ), + ImmediateOperand(value=111), + ], + ), + ] + + def end_marker(self): + return [ + InstructionForm( + mnemonic="mov", + operands=[ + MemoryOperand( + base=RegisterOperand(name="GS"), offset=ImmediateOperand(value=222) + ), + ImmediateOperand(value=222), + ], + ), + ] + + def normalize_instruction_form(self, instruction_form, isa_model, arch_model): + """ + If the model indicates that this instruction has a single destination that is the last + operand, move the first operand to the last position. This effectively converts the Intel + syntax to the AT&T one. + """ + if instruction_form.normalized: + return + instruction_form.normalized = True + + mnemonic = instruction_form.mnemonic + if not mnemonic: + return + + # The model may only contain the VEX-encoded instruction and we may have the non-VEX-encoded + # one, or vice-versa. Note that this doesn't work when the arguments differ between VEX- + # encoded and non-VEX-encoded, e.g., for psubq. + if not arch_model.get_instruction(mnemonic, len(instruction_form.operands)): + if mnemonic[0] == "v": + unvexed_mnemonic = mnemonic[1:] + if arch_model.get_instruction(unvexed_mnemonic, len(instruction_form.operands)): + mnemonic = unvexed_mnemonic + else: + vexed_mnemonic = "v" + mnemonic + if arch_model.get_instruction(vexed_mnemonic, len(instruction_form.operands)): + mnemonic = vexed_mnemonic + instruction_form.mnemonic = mnemonic + + # We cannot pass the operands because they may not match before the reordering. We just + # pass the arity instead. Also, this must use the ISA model, because that's where the + # source/destination information is found. + model = isa_model.get_instruction(mnemonic, len(instruction_form.operands)) + has_single_destination_at_end = False + has_destination = False + if model: + for o in model.operands: + if o.source: + if has_destination: + has_single_destination_at_end = False + if o.destination: + if has_destination: + has_single_destination_at_end = False + else: + has_destination = True + has_single_destination_at_end = True + else: + # if there is only one operand, assume it is a source operand + has_single_destination_at_end = len(instruction_form.operands) > 1 + + if has_single_destination_at_end: + # It is important to reverse the operands, we cannot just move the first one last. This + # makes a difference for instructions with 3 operands or more, such as roundsd: the + # model files expect the rounding mode (an immediate) first but the Intel syntax has it + # last. + instruction_form.operands.reverse() + + # A hack to help with comparison instruction: if the instruction is in the model, and has + # exactly two sources, swap its operands. + if ( + model + and not has_destination + and len(instruction_form.operands) == 2 + and not isa_model.get_instruction(mnemonic, instruction_form.operands) + and not arch_model.get_instruction(mnemonic, instruction_form.operands) + ): + instruction_form.operands.reverse() + + # If the instruction has a well-known data type, append a suffix. + data_type_to_suffix = {"DWORD": "d", "QWORD": "q"} + for o in instruction_form.operands: + if isinstance(o, MemoryOperand) and o.data_type: + suffix = data_type_to_suffix.get(o.data_type, None) + if suffix: + suffixed_mnemonic = mnemonic + suffix + if isa_model.get_instruction( + suffixed_mnemonic, len(instruction_form.operands) + ) or arch_model.get_instruction( + suffixed_mnemonic, len(instruction_form.operands) + ): + instruction_form.mnemonic = suffixed_mnemonic + break + + def construct_parser(self): + """Create parser for x86 Intel ISA.""" + # Numeric literal. + binary_number = pp.Combine(pp.Word("01") + pp.CaselessLiteral("B")) + octal_number = pp.Combine(pp.Word("01234567") + pp.CaselessLiteral("O")) + decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)) + hex_number = pp.Combine(pp.Word(pp.hexnums) + pp.CaselessLiteral("H")) + float_number = pp.Combine( + pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + pp.Word(".", pp.nums) + ).setResultsName("value") + integer_number = ( + binary_number ^ octal_number ^ decimal_number ^ hex_number + ).setResultsName("value") + + # Comment. + self.comment = pp.Word(";#", exact=1) + pp.Group( + pp.ZeroOrMore(pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS)) + ).setResultsName(self.comment_id) + + # Types. + data_type = ( + pp.CaselessKeyword("BYTE") + | pp.CaselessKeyword("DWORD") + | pp.CaselessKeyword("FWORD") + | pp.CaselessKeyword("MMWORD") + | pp.CaselessKeyword("OWORD") + | pp.CaselessKeyword("QWORD") + | pp.CaselessKeyword("REAL10") + | pp.CaselessKeyword("REAL4") + | pp.CaselessKeyword("REAL8") + | pp.CaselessKeyword("SBYTE") + | pp.CaselessKeyword("SDWORD") + | pp.CaselessKeyword("SQWORD") + | pp.CaselessKeyword("SWORD") + | pp.CaselessKeyword("TBYTE") + | pp.CaselessKeyword("WORD") + | pp.CaselessKeyword("XMMWORD") + | pp.CaselessKeyword("YMMWORD") + ).setResultsName("data_type") + + # Identifier. Note that $ is not mentioned in the ASM386 Assembly Language Reference, + # but it is mentioned in the MASM syntax. < and > apparently show up in C++ mangled names. + # ICC allows ".", at least in labels. + first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>", exact=1) + rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>") + identifier = pp.Group( + pp.Combine(first + pp.Optional(rest)).setResultsName("name") + ).setResultsName("identifier") + + # Register. + # This follows the MASM grammar. + special_register = ( + pp.CaselessKeyword("CR0") + | pp.CaselessKeyword("CR2") + | pp.CaselessKeyword("CR3") + | pp.CaselessKeyword("DR0") + | pp.CaselessKeyword("DR1") + | pp.CaselessKeyword("DR2") + | pp.CaselessKeyword("DR3") + | pp.CaselessKeyword("DR6") + | pp.CaselessKeyword("DR7") + | pp.CaselessKeyword("TR3") + | pp.CaselessKeyword("TR4") + | pp.CaselessKeyword("TR5") + | pp.CaselessKeyword("TR6") + | pp.CaselessKeyword("TR7") + ).setResultsName("name") + gp_register = ( + pp.CaselessKeyword("AX") + | pp.CaselessKeyword("EAX") + | pp.CaselessKeyword("CX") + | pp.CaselessKeyword("ECX") + | pp.CaselessKeyword("DX") + | pp.CaselessKeyword("EDX") + | pp.CaselessKeyword("BX") + | pp.CaselessKeyword("EBX") + | pp.CaselessKeyword("DI") + | pp.CaselessKeyword("EDI") + | pp.CaselessKeyword("SI") + | pp.CaselessKeyword("ESI") + | pp.CaselessKeyword("BP") + | pp.CaselessKeyword("EBP") + | pp.CaselessKeyword("SP") + | pp.CaselessKeyword("ESP") + | pp.CaselessKeyword("R8W") + | pp.CaselessKeyword("R8D") + | pp.CaselessKeyword("R9W") + | pp.CaselessKeyword("R9D") + | pp.CaselessKeyword("R12D") + | pp.CaselessKeyword("R13W") + | pp.CaselessKeyword("R13D") + | pp.CaselessKeyword("R14W") + | pp.CaselessKeyword("R14D") + ).setResultsName("name") + byte_register = ( + pp.CaselessKeyword("AL") + | pp.CaselessKeyword("AH") + | pp.CaselessKeyword("CL") + | pp.CaselessKeyword("CH") + | pp.CaselessKeyword("DL") + | pp.CaselessKeyword("DH") + | pp.CaselessKeyword("BL") + | pp.CaselessKeyword("BH") + | pp.CaselessKeyword("R8B") + | pp.CaselessKeyword("R9B") + | pp.CaselessKeyword("R10B") + | pp.CaselessKeyword("R11B") + | pp.CaselessKeyword("R12B") + | pp.CaselessKeyword("R13B") + ).setResultsName("name") + qword_register = ( + pp.CaselessKeyword("RAX") + | pp.CaselessKeyword("RCX") + | pp.CaselessKeyword("RDX") + | pp.CaselessKeyword("RBX") + | pp.CaselessKeyword("RSP") + | pp.CaselessKeyword("RBP") + | pp.CaselessKeyword("RSI") + | pp.CaselessKeyword("RDI") + | pp.CaselessKeyword("R8") + | pp.CaselessKeyword("R9") + | pp.CaselessKeyword("R10") + | pp.CaselessKeyword("R11") + | pp.CaselessKeyword("R12") + | pp.CaselessKeyword("R13") + | pp.CaselessKeyword("R14") + | pp.CaselessKeyword("R15") + ).setResultsName("name") + fpu_register = pp.Combine( + pp.CaselessKeyword("ST") + + pp.Optional(pp.Literal("(") + pp.Word("01234567") + pp.Literal(")")) + ).setResultsName("name") + xmm_register = pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums)) | pp.Combine( + pp.CaselessLiteral("XMM1") + pp.Word("012345") + ) + simd_register = ( + pp.Combine(pp.CaselessLiteral("MM") + pp.Word("01234567")) + | xmm_register + | pp.Combine(pp.CaselessLiteral("YMM") + pp.Word(pp.nums)) + | pp.Combine(pp.CaselessLiteral("YMM1") + pp.Word("012345")) + ).setResultsName("name") + segment_register = ( + pp.CaselessKeyword("CS") + | pp.CaselessKeyword("DS") + | pp.CaselessKeyword("ES") + | pp.CaselessKeyword("FS") + | pp.CaselessKeyword("GS") + | pp.CaselessKeyword("SS") + ).setResultsName("name") + self.register = pp.Group( + special_register + | gp_register + | byte_register + | qword_register + | fpu_register + | simd_register + | segment_register + | pp.CaselessKeyword("RIP") + ).setResultsName(self.register_id) + + # Register expressions. + base_register = self.register + index_register = self.register + scale = pp.Word("1248", exact=1) + + base = base_register.setResultsName("base") + displacement = pp.Group( + pp.Group(integer_number ^ identifier).setResultsName(self.immediate_id) + ).setResultsName("displacement") + short_indexed = index_register.setResultsName("index") + long_indexed = ( + index_register.setResultsName("index") + + pp.Literal("*") + + scale.setResultsName("scale") + ) + indexed = pp.Group(short_indexed ^ long_indexed).setResultsName("indexed") + operator = pp.Word("+-", exact=1) + operator_index = pp.Word("+-", exact=1).setResultsName("operator_idx") + operator_displacement = pp.Word("+-", exact=1).setResultsName("operator_disp") + + # Syntax: + # `base` always preceedes `indexed`. + # `short_indexed` is only allowed if it follows `base`, not alone. + # `displacement` can go anywhere. + # It's easier to list all the alternatives than to represent these rules using complicated + # `Optional` and what not. + register_expression = pp.Group( + pp.Literal("[") + + ( + base + ^ (base + operator_displacement + displacement) + ^ (base + operator_displacement + displacement + operator_index + indexed) + ^ (base + operator_index + indexed) + ^ (base + operator_index + indexed + operator_displacement + displacement) + ^ (displacement + operator + base) + ^ (displacement + operator + base + operator_index + indexed) + ^ ( + displacement + + operator_index + + pp.Group(long_indexed).setResultsName("indexed") + ) + ^ pp.Group(long_indexed).setResultsName("indexed") + ^ ( + pp.Group(long_indexed).setResultsName("indexed") + + operator_displacement + + displacement + ) + ) + + pp.Literal("]") + ).setResultsName("register_expression") + + # Immediate. + immediate = pp.Group(integer_number | float_number | identifier).setResultsName( + self.immediate_id + ) + + # Expressions. + # The ASM86 manual has weird expressions on page 130 (displacement outside of the register + # expression, multiple register expressions). Let's ignore those for now, but see + # https://stackoverflow.com/questions/71540754/why-sometimes-use-offset-flatlabel-and-sometimes-not. + address_expression = pp.Group( + self.register.setResultsName("segment") + pp.Literal(":") + immediate + ^ immediate + register_expression + ^ register_expression + ^ identifier + pp.Optional(operator + immediate) + ).setResultsName("address_expression") + + offset_expression = pp.Group( + pp.CaselessKeyword("OFFSET") + + pp.Group( + pp.CaselessKeyword("GROUP") + | pp.CaselessKeyword("SEGMENT") + | pp.CaselessKeyword("FLAT") + ) + # The MASM grammar has the ":" immediately after "OFFSET", but that's not what MSVC + # outputs. + + pp.Literal(":") + + identifier.setResultsName("identifier") + + pp.Optional(pp.Literal("+") + immediate.setResultsName("displacement")) + ).setResultsName("offset_expression") + ptr_expression = pp.Group( + data_type + pp.CaselessKeyword("PTR") + address_expression + ).setResultsName("ptr_expression") + short_expression = pp.Group(pp.CaselessKeyword("SHORT") + identifier).setResultsName( + "short_expression" + ) + + # Instructions. + mnemonic = pp.Word(pp.alphas, pp.alphanums).setResultsName("mnemonic") + operand = pp.Group( + self.register + | pp.Group( + offset_expression | ptr_expression | short_expression | address_expression + ).setResultsName(self.memory_id) + | immediate + ) + self.instruction_parser = ( + mnemonic + + pp.Optional(operand.setResultsName("operand1")) + + pp.Optional(pp.Suppress(pp.Literal(","))) + + pp.Optional(operand.setResultsName("operand2")) + + pp.Optional(pp.Suppress(pp.Literal(","))) + + pp.Optional(operand.setResultsName("operand3")) + + pp.Optional(pp.Suppress(pp.Literal(","))) + + pp.Optional(operand.setResultsName("operand4")) + + pp.Optional(self.comment) + ) + + # Label. + self.label = pp.Group( + identifier.setResultsName("name") + + pp.Literal(":") + + pp.Optional(self.instruction_parser) + + pp.Optional(self.comment) + ).setResultsName(self.label_id) + + # Directives. + # The identifiers at the beginnig of a directive cannot start with a "." otherwise we end up + # with ambiguities. + directive_first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + "$?@_<>", exact=1) + directive_rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>") + directive_identifier = pp.Group( + pp.Combine(directive_first + pp.Optional(directive_rest)).setResultsName("name") + ).setResultsName("identifier") + + # Parameter can be any quoted string or sequence of characters besides ';' (for comments) + # or ',' (parameter delimiter). See ASM386 p. 38. + directive_parameter = ( + pp.quotedString + ^ ( + pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS, excludeChars=",;") + + pp.Optional(pp.Suppress(pp.Literal(","))) + ) + ^ pp.Suppress(pp.Literal(",")) + ) + # The directives that don't start with a "." are ambiguous with instructions, so we list + # them explicitly. + # TODO: The directives that are types introduce a nasty ambiguity with instructions. Skip + # them for now, apparently the MSVC output uses the short D? directives. + directive_keywords = ( + pp.CaselessKeyword("ALIAS") + | pp.CaselessKeyword("ALIGN") + | pp.CaselessKeyword("ASSUME") + # | pp.CaselessKeyword("BYTE") + | pp.CaselessKeyword("CATSTR") + | pp.CaselessKeyword("COMM") + | pp.CaselessKeyword("COMMENT") + | pp.CaselessKeyword("DB") + | pp.CaselessKeyword("DD") + | pp.CaselessKeyword("DF") + | pp.CaselessKeyword("DQ") + | pp.CaselessKeyword("DT") + | pp.CaselessKeyword("DW") + # | pp.CaselessKeyword("DWORD") + | pp.CaselessKeyword("ECHO") + | pp.CaselessKeyword("END") + | pp.CaselessKeyword("ENDP") + | pp.CaselessKeyword("ENDS") + | pp.CaselessKeyword("EQU") + | pp.CaselessKeyword("EVEN") + | pp.CaselessKeyword("EXTRN") + | pp.CaselessKeyword("EXTERNDEF") + # | pp.CaselessKeyword("FWORD") + | pp.CaselessKeyword("GROUP") + | pp.CaselessKeyword("INCLUDE") + | pp.CaselessKeyword("INCLUDELIB") + | pp.CaselessKeyword("INSTR") + | pp.CaselessKeyword("INVOKE") + | pp.CaselessKeyword("LABEL") + # | pp.CaselessKeyword("MMWORD") + | pp.CaselessKeyword("OPTION") + | pp.CaselessKeyword("ORG") + | pp.CaselessKeyword("PAGE") + | pp.CaselessKeyword("POPCONTEXT") + | pp.CaselessKeyword("PROC") + | pp.CaselessKeyword("PROTO") + | pp.CaselessKeyword("PUBLIC") + | pp.CaselessKeyword("PUSHCONTEXT") + # | pp.CaselessKeyword("QWORD") + # | pp.CaselessKeyword("REAL10") + # | pp.CaselessKeyword("REAL4") + # | pp.CaselessKeyword("REAL8") + | pp.CaselessKeyword("RECORD") + # | pp.CaselessKeyword("SBYTE") + # | pp.CaselessKeyword("SDWORD") + | pp.CaselessKeyword("SEGMENT") + | pp.CaselessKeyword("SIZESTR") + | pp.CaselessKeyword("STRUCT") + | pp.CaselessKeyword("SUBSTR") + | pp.CaselessKeyword("SUBTITLE") + # | pp.CaselessKeyword("SWORD") + # | pp.CaselessKeyword("TBYTE") + | pp.CaselessKeyword("TEXTEQU") + | pp.CaselessKeyword("TITLE") + | pp.CaselessKeyword("TYPEDEF") + | pp.CaselessKeyword("UNION") + # | pp.CaselessKeyword("WORD") + # | pp.CaselessKeyword("XMMWORD") + # | pp.CaselessKeyword("YMMWORD") + ) + self.directive = pp.Group( + pp.Optional(~directive_keywords + directive_identifier) + + ( + pp.Combine(pp.Literal(".") + pp.Word(pp.alphanums + "_")) + | pp.Literal("=") + | directive_keywords + ).setResultsName("name") + + pp.ZeroOrMore(directive_parameter).setResultsName("parameters") + + pp.Optional(self.comment) + ).setResultsName(self.directive_id) + + def parse_line(self, line, line_number=None): + """ + Parse line and return instruction form. + + :param str line: line of assembly code + :param line_number: default None, identifier of instruction form + :type line_number: int, optional + :return: ``dict`` -- parsed asm line (comment, label, directive or instruction form) + """ + instruction_form = InstructionForm(line=line, line_number=line_number) + result = None + + # 1. Parse comment. + try: + result = self.process_operand(self.comment.parseString(line, parseAll=True)) + instruction_form.comment = " ".join(result[self.comment_id]) + except pp.ParseException: + pass + + # 2. Parse label. + if not result: + try: + # Returns tuple with label operand and comment, if any. + result = self.process_operand(self.label.parseString(line, parseAll=True)) + instruction_form.label = result[0].name + if result[1]: + instruction_form.comment = " ".join(result[1]) + except pp.ParseException: + pass + + # 3. Parse directive. + if not result: + try: + # Returns tuple with directive operand and comment, if any. + result = self.process_operand(self.directive.parseString(line, parseAll=True)) + instruction_form.directive = result[0] + if result[1]: + instruction_form.comment = " ".join(result[1]) + except pp.ParseException: + pass + + # 4. Parse instruction. + if not result: + try: + result = self.parse_instruction(line) + except pp.ParseException as e: + raise ValueError( + "Could not parse instruction on line {}: {!r}".format(line_number, line) + ) from e + instruction_form.mnemonic = result.mnemonic + instruction_form.operands = result.operands + instruction_form.comment = result.comment + return instruction_form + + def make_instruction(self, parse_result): + """ + Parse instruction in asm line. + + :param parse_result: tuple resulting from calling `parseString` on the `instruction_parser`. + :returns: `dict` -- parsed instruction form + """ + operands = [] + # Add operands to list + # Check first operand + if "operand1" in parse_result: + operands.append(self.process_operand(parse_result.operand1)) + # Check second operand + if "operand2" in parse_result: + operands.append(self.process_operand(parse_result.operand2)) + # Check third operand + if "operand3" in parse_result: + operands.append(self.process_operand(parse_result.operand3)) + # Check fourth operand + if "operand4" in parse_result: + operands.append(self.process_operand(parse_result.operand4)) + return_dict = InstructionForm( + mnemonic=parse_result.mnemonic, + operands=operands, + label_id=None, + comment_id=( + " ".join(parse_result[self.comment_id]) + if self.comment_id in parse_result + else None + ), + ) + + return return_dict + + def parse_instruction(self, instruction): + """ + Parse instruction in asm line. + + :param str instruction: Assembly line string. + :returns: `dict` -- parsed instruction form + """ + return self.make_instruction( + self.instruction_parser.parseString(instruction, parseAll=True) + ) + + def parse_register(self, register_string): + """Parse register string""" + try: + return self.process_operand(self.register.parseString(register_string, parseAll=True)) + except pp.ParseException: + return None + + def process_operand(self, operand): + """Post-process operand""" + if self.directive_id in operand: + return self.process_directive(operand[self.directive_id]) + if self.identifier in operand: + return self.process_identifier(operand[self.identifier]) + if self.immediate_id in operand: + return self.process_immediate(operand[self.immediate_id]) + if self.label_id in operand: + return self.process_label(operand[self.label_id]) + if self.memory_id in operand: + return self.process_memory_address(operand[self.memory_id]) + if self.register_id in operand: + return self.process_register(operand[self.register_id]) + return operand + + def process_directive(self, directive): + # TODO: This is putting the identifier in the parameters. No idea if it's right. + parameters = [directive.identifier.name] if "identifier" in directive else [] + parameters.extend(directive.parameters) + directive_new = DirectiveOperand(name=directive.name, parameters=parameters or None) + # Interpret the "=" directives because the generated assembly is full of symbols that are + # defined there. + if directive.name == "=": + self._equ[parameters[0]] = parameters[1] + return directive_new, directive.get("comment") + + def process_register(self, operand): + return RegisterOperand(name=operand.name) + + def process_register_expression(self, register_expression): + base = register_expression.get("base") + displacement = register_expression.get("displacement") + indexed = register_expression.get("indexed") + index = None + scale = 1 + if indexed: + index = indexed.get("index") + scale = int(indexed.get("scale", "1"), 0) + if register_expression.get("operator_index") == "-": + scale *= -1 + displacement_op = self.process_immediate(displacement.immediate) if displacement else None + if displacement_op and register_expression.get("operator_disp") == "-": + displacement_op.value *= -1 + base_op = RegisterOperand(name=base.name) if base else None + index_op = RegisterOperand(name=index.name) if index else None + new_memory = MemoryOperand( + offset=displacement_op, base=base_op, index=index_op, scale=scale + ) + return new_memory + + def process_address_expression(self, address_expression, data_type=None): + # TODO: It seems that we could have a prefix immediate operand, a displacement in the + # brackets, and an offset. How all of this works together is somewhat mysterious. + immediate_operand = ( + self.process_immediate(address_expression.immediate) + if "immediate" in address_expression + else None + ) + register_expression = ( + self.process_register_expression(address_expression.register_expression) + if "register_expression" in address_expression + else None + ) + segment = ( + self.process_register(address_expression.segment) + if "segment" in address_expression + else None + ) + identifier = ( + self.process_identifier(address_expression.identifier) + if "identifier" in address_expression + else None + ) + if register_expression: + if immediate_operand: + register_expression.offset = immediate_operand + if data_type: + register_expression.data_type = data_type + return register_expression + elif segment: + return MemoryOperand(base=segment, offset=immediate_operand, data_type=data_type) + elif identifier: + if immediate_operand: + identifier.offset = immediate_operand + elif not data_type: + # An address expression without a data type or an offset is just an identifier. + # This matters for jumps. + return identifier + return MemoryOperand(offset=identifier, data_type=data_type) + else: + return MemoryOperand(base=immediate_operand, data_type=data_type) + + def process_offset_expression(self, offset_expression): + # TODO: Record that this is an offset expression. + displacement = ( + self.process_immediate(offset_expression.displacement) + if "displacement" in offset_expression + else None + ) + if displacement and "operator_disp" == "-": + displacement.value *= -1 + identifier = self.process_identifier(offset_expression.identifier) + identifier.offset = displacement + return MemoryOperand(offset=identifier) + + def process_ptr_expression(self, ptr_expression): + # TODO: Do something with the data_type. + return self.process_address_expression( + ptr_expression.address_expression, ptr_expression.data_type + ) + + def process_short_expression(self, short_expression): + # TODO: Do something with the fact that it is short. + return LabelOperand(name=short_expression.identifier.name) + + def process_memory_address(self, memory_address): + """Post-process memory address operand""" + if "address_expression" in memory_address: + return self.process_address_expression(memory_address.address_expression) + elif "offset_expression" in memory_address: + return self.process_offset_expression(memory_address.offset_expression) + elif "ptr_expression" in memory_address: + return self.process_ptr_expression(memory_address.ptr_expression) + elif "short_expression" in memory_address: + return self.process_short_expression(memory_address.short_expression) + return memory_address + + def process_label(self, label): + """Post-process label asm line""" + # Remove duplicated 'name' level due to identifier. Note that there is no place to put the + # comment, if any. + label["name"] = label["name"]["name"] + return ( + LabelOperand(name=label.name), + self.make_instruction(label) if "mnemonic" in label else None, + ) + + def process_immediate(self, immediate): + """Post-process immediate operand""" + if "identifier" in immediate: + # Actually an identifier, change declaration. + return self.process_identifier(immediate.identifier) + new_immediate = ImmediateOperand(value=immediate.get("sign", "") + immediate.value) + new_immediate.value = self.normalize_imd(new_immediate) + return new_immediate + + def process_identifier(self, identifier): + if identifier.name in self._equ: + # Actually an immediate, change declaration. + new_immediate = ImmediateOperand( + identifier=identifier.name, value=self._equ[identifier.name] + ) + new_immediate.value = self.normalize_imd(new_immediate) + return new_immediate + return IdentifierOperand(name=identifier.name) + + def normalize_imd(self, imd): + """Normalize immediate to decimal based representation""" + if isinstance(imd.value, str): + if "." in imd.value: + return float(imd.value) + # Now parse depending on the base. + base = {"B": 2, "O": 8, "H": 16}.get(imd.value[-1], 10) + value = 0 + negative = imd.value[0] == "-" + positive = imd.value[0] == "+" + start = +(negative or positive) + stop = len(imd.value) if base == 10 else -1 + for c in imd.value[start:stop]: + value = value * base + int(c, base) + return -value if negative else value + else: + return imd.value diff --git a/osaca/semantics/arch_semantics.py b/osaca/semantics/arch_semantics.py index e87e5e7..b83a8dd 100644 --- a/osaca/semantics/arch_semantics.py +++ b/osaca/semantics/arch_semantics.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 """Semantics opbject responsible for architecture specific semantic operations""" - import sys import warnings from itertools import chain @@ -14,12 +13,22 @@ from osaca.parser.register import RegisterOperand class ArchSemantics(ISASemantics): - GAS_SUFFIXES = "bswlqt" - - def __init__(self, machine_model: MachineModel, path_to_yaml=None): - super().__init__(machine_model.get_ISA().lower(), path_to_yaml=path_to_yaml) + def __init__(self, parser, machine_model: MachineModel, path_to_yaml=None): + super().__init__(parser, path_to_yaml=path_to_yaml) self._machine_model = machine_model - self._isa = machine_model.get_ISA().lower() + + def normalize_instruction_form(self, instruction_form): + self.parser.normalize_instruction_form( + instruction_form, self.isa_model, self._machine_model + ) + + def normalize_instruction_forms(self, instruction_forms): + for instruction_form in instruction_forms: + self.normalize_instruction_form(instruction_form) + + def _check_normalized(self, instruction_forms): + for instruction_form in instruction_forms: + instruction_form.check_normalized() # SUMMARY FUNCTION def add_semantics(self, kernel): @@ -29,6 +38,7 @@ class ArchSemantics(ISASemantics): :param list kernel: kernel to apply semantics """ + self._check_normalized(kernel) for instruction_form in kernel: self.assign_src_dst(instruction_form) self.assign_tp_lt(instruction_form) @@ -41,6 +51,7 @@ class ArchSemantics(ISASemantics): :param list kernel: kernel to apply optimal port utilization """ + self._check_normalized(kernel) INC = 0.01 kernel.reverse() port_list = self._machine_model.get_ports() @@ -137,6 +148,7 @@ class ArchSemantics(ISASemantics): def set_hidden_loads(self, kernel): """Hide loads behind stores if architecture supports hidden loads (depricated)""" + self._check_normalized(kernel) loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr.flags] stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr.flags] # Filter instructions including load and store @@ -176,6 +188,7 @@ class ArchSemantics(ISASemantics): # mark instruction form with semantic flags def assign_tp_lt(self, instruction_form): """Assign throughput and latency to an instruction form.""" + instruction_form.check_normalized() flags = [] port_number = len(self._machine_model["ports"]) if instruction_form.mnemonic is None: @@ -189,25 +202,6 @@ class ArchSemantics(ISASemantics): instruction_data = self._machine_model.get_instruction( instruction_form.mnemonic, instruction_form.operands ) - if ( - not instruction_data - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # check for instruction without GAS suffix - instruction_data = self._machine_model.get_instruction( - instruction_form.mnemonic[:-1], instruction_form.operands - ) - if ( - instruction_data is None - and self._isa == "aarch64" - and "." in instruction_form.mnemonic - ): - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - instruction_data = self._machine_model.get_instruction( - instruction_form.mnemonic[:suffix_start], instruction_form.operands - ) if instruction_data: # instruction form in DB ( @@ -232,25 +226,6 @@ class ArchSemantics(ISASemantics): instruction_data_reg = self._machine_model.get_instruction( instruction_form.mnemonic, operands ) - if ( - not instruction_data_reg - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # check for instruction without GAS suffix - instruction_data_reg = self._machine_model.get_instruction( - instruction_form.mnemonic[:-1], operands - ) - if ( - instruction_data_reg is None - and self._isa == "aarch64" - and "." in instruction_form.mnemonic - ): - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - instruction_data_reg = self._machine_model.get_instruction( - instruction_form.mnemonic[:suffix_start], operands - ) if instruction_data_reg: assign_unknown = False reg_type = self._parser.get_reg_type( @@ -310,7 +285,7 @@ class ArchSemantics(ISASemantics): # - all mem operands in src_dst are pre-/post_indexed # since it is no mem store if ( - self._isa == "aarch64" + self._parser.isa() == "aarch64" and not isinstance( instruction_form.semantic_operands["destination"], MemoryOperand, @@ -406,6 +381,7 @@ class ArchSemantics(ISASemantics): def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags): """Apply performance data to instruction if it was found in the archDB""" + instruction_form.check_normalized() throughput = instruction_data.throughput port_pressure = self._machine_model.average_port_pressure(instruction_data.port_pressure) instruction_form.port_uops = instruction_data.port_pressure @@ -441,12 +417,12 @@ class ArchSemantics(ISASemantics): def convert_op_to_reg(self, reg_type, regtype="0"): """Create register operand for a memory addressing operand""" - if self._isa == "x86": + if self._parser.isa() == "x86": if reg_type == "gpr": register = RegisterOperand(name="r" + str(int(regtype) + 9)) else: register = RegisterOperand(name=reg_type + regtype) - elif self._isa == "aarch64": + elif self._parser.isa() == "aarch64": register = RegisterOperand(name=regtype, prefix=reg_type) return register diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 45f9f2b..d298b32 100644 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -11,7 +11,6 @@ from pathlib import Path import ruamel.yaml from osaca import __version__, utils -from osaca.parser import ParserX86ATT from osaca.parser.instruction_form import InstructionForm from osaca.parser.operand import Operand from osaca.parser.memory import MemoryOperand @@ -79,7 +78,7 @@ class MachineModel(object): else: yaml = self._create_yaml_object() # otherwise load - with open(self._path, "r") as f: + with open(self._path, "r", encoding="utf8") as f: if not lazy: self._data = yaml.load(f) else: @@ -286,23 +285,35 @@ class MachineModel(object): ###################################################### def get_instruction(self, name, operands): - """Find and return instruction data from name and operands.""" + """Find and return instruction data from name and operands/arity.""" # For use with dict instead of list as DB if name is None: return None name_matched_iforms = self._data["instruction_forms_dict"].get(name.upper(), []) try: - return next( - instruction_form - for instruction_form in name_matched_iforms - if self._match_operands( - instruction_form.operands, - operands, + # If `operands` is an integer, it represents the arity of the instruction. This is + # useful to reorder the operands in the Intel syntax because in their original order + # they may not match the model. + if isinstance(operands, int): + arity = operands + return next( + ( + instruction_form + for instruction_form in name_matched_iforms + if len(instruction_form.operands) == arity + ), + None, + ) + else: + return next( + ( + instruction_form + for instruction_form in name_matched_iforms + if self._match_operands(instruction_form.operands, operands) + ), + None, ) - ) - except StopIteration: - return None except TypeError as e: print("\nname: {}\noperands: {}".format(name, operands)) raise TypeError from e @@ -878,6 +889,8 @@ class MachineModel(object): return True def _is_x86_reg_type(self, i_reg, reg, consider_masking=False): + from osaca.parser import ParserX86 + """Check if register type match.""" if reg is None: if i_reg is None: @@ -895,7 +908,7 @@ class MachineModel(object): if i_reg_name == self.WILDCARD or reg.name == self.WILDCARD: return True # differentiate between vector registers (mm, xmm, ymm, zmm) and others (gpr) - parser_x86 = ParserX86ATT() + parser_x86 = ParserX86() if parser_x86.is_vector_register(reg): if reg.name.rstrip(string.digits).lower() == i_reg_name: # Consider masking and zeroing for AVX512 diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index 6bf0e44..37a74cb 100644 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -2,7 +2,6 @@ from itertools import chain from osaca import utils -from osaca.parser import ParserAArch64, ParserX86ATT from osaca.parser.memory import MemoryOperand from osaca.parser.operand import Operand from osaca.parser.register import RegisterOperand @@ -26,20 +25,23 @@ class INSTR_FLAGS: class ISASemantics(object): - GAS_SUFFIXES = "bswlqt" - - def __init__(self, isa, path_to_yaml=None): - self._isa = isa.lower() - path = path_to_yaml or utils.find_datafile("isa/" + self._isa + ".yml") + def __init__(self, parser, path_to_yaml=None): + path = path_to_yaml or utils.find_datafile("isa/" + parser.isa() + ".yml") self._isa_model = MachineModel(path_to_yaml=path) - if self._isa == "x86": - self._parser = ParserX86ATT() - elif self._isa == "aarch64": - self._parser = ParserAArch64() + self._parser = parser + + @property + def parser(self): + return self._parser + + @property + def isa_model(self): + return self._isa_model def process(self, instruction_forms): """Process a list of instruction forms.""" for i in instruction_forms: + i.check_normalized() self.assign_src_dst(i) # get ;parser result and assign operands to @@ -48,6 +50,7 @@ class ISASemantics(object): # - source/destination def assign_src_dst(self, instruction_form): """Update instruction form dictionary with source, destination and flag information.""" + instruction_form.check_normalized() # if the instruction form doesn't have operands or is None, there's nothing to do if instruction_form.operands is None or instruction_form.mnemonic is None: instruction_form.semantic_operands = {"source": [], "destination": [], "src_dst": []} @@ -57,21 +60,6 @@ class ISASemantics(object): isa_data = self._isa_model.get_instruction( instruction_form.mnemonic, instruction_form.operands ) - if ( - isa_data is None - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # Check for instruction without GAS suffix - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:-1], instruction_form.operands - ) - if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic: - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:suffix_start], instruction_form.operands - ) operands = instruction_form.operands op_dict = {} @@ -88,33 +76,16 @@ class ISASemantics(object): isa_data_reg = self._isa_model.get_instruction( instruction_form.mnemonic, operands_reg ) - if ( - isa_data_reg is None - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # Check for instruction without GAS suffix - isa_data_reg = self._isa_model.get_instruction( - instruction_form.mnemonic[:-1], operands_reg - ) - if ( - isa_data_reg is None - and self._isa == "aarch64" - and "." in instruction_form.mnemonic - ): - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - isa_data_reg = self._isa_model.get_instruction( - instruction_form.mnemonic[:suffix_start], operands_reg - ) if isa_data_reg: assign_default = False op_dict = self._apply_found_ISA_data(isa_data_reg, operands) if assign_default: # no irregular operand structure, apply default - op_dict["source"] = self._get_regular_source_operands(instruction_form) - op_dict["destination"] = self._get_regular_destination_operands(instruction_form) + op_dict["source"] = self._parser.get_regular_source_operands(instruction_form) + op_dict["destination"] = self._parser.get_regular_destination_operands( + instruction_form + ) op_dict["src_dst"] = [] # handle Xd! registers in aarch64 if any( @@ -133,7 +104,7 @@ class ISASemantics(object): op_dict["source"].remove(reg) op_dict["src_dst"].append(reg) # post-process pre- and post-indexing for aarch64 memory operands - if self._isa == "aarch64": + if self._parser.isa() == "aarch64": for operand in [op for op in op_dict["source"] if isinstance(op, MemoryOperand)]: post_indexed = operand.post_indexed pre_indexed = operand.pre_indexed @@ -177,6 +148,7 @@ class ISASemantics(object): Empty dict if no changes of registers occured. None for registers with unknown changes. If only_postindexed is True, only considers changes due to post_indexed memory references. """ + instruction_form.check_normalized() if instruction_form.mnemonic is None: return {} dest_reg_names = [ @@ -190,21 +162,6 @@ class ISASemantics(object): isa_data = self._isa_model.get_instruction( instruction_form.mnemonic, instruction_form.operands ) - if ( - isa_data is None - and self._isa == "x86" - and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES - ): - # Check for instruction without GAS suffix - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:-1], instruction_form.operands - ) - if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic: - # Check for instruction without shape/cc suffix - suffix_start = instruction_form.mnemonic.index(".") - isa_data = self._isa_model.get_instruction( - instruction_form.mnemonic[:suffix_start], instruction_form.operands - ) if only_postindexed: for o in instruction_form.operands: @@ -321,6 +278,7 @@ class ISASemantics(object): def _has_load(self, instruction_form): """Check if instruction form performs a LOAD""" + instruction_form.check_normalized() for operand in chain( instruction_form.semantic_operands["source"], instruction_form.semantic_operands["src_dst"], @@ -331,6 +289,7 @@ class ISASemantics(object): def _has_store(self, instruction_form): """Check if instruction form perfroms a STORE""" + instruction_form.check_normalized() for operand in chain( instruction_form.semantic_operands["destination"], instruction_form.semantic_operands["src_dst"], @@ -339,33 +298,6 @@ class ISASemantics(object): return True return False - def _get_regular_source_operands(self, instruction_form): - """Get source operand of given instruction form assuming regular src/dst behavior.""" - # if there is only one operand, assume it is a source operand - if len(instruction_form.operands) == 1: - return [instruction_form.operands[0]] - if self._isa == "x86": - # return all but last operand - return [op for op in instruction_form.operands[0:-1]] - elif self._isa == "aarch64": - return [op for op in instruction_form.operands[1:]] - else: - raise ValueError("Unsupported ISA {}.".format(self._isa)) - - def _get_regular_destination_operands(self, instruction_form): - """Get destination operand of given instruction form assuming regular src/dst behavior.""" - # if there is only one operand, assume no destination - if len(instruction_form.operands) == 1: - return [] - if self._isa == "x86": - # return last operand - return instruction_form.operands[-1:] - if self._isa == "aarch64": - # return first operand - return instruction_form.operands[:1] - else: - raise ValueError("Unsupported ISA {}.".format(self._isa)) - def substitute_mem_address(self, operands): """Create memory wildcard for all memory operands""" return [ diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index c9d64a5..7d3eb46 100644 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 import copy -import os -import signal import time from itertools import chain from multiprocessing import Manager, Process, cpu_count @@ -38,7 +36,8 @@ class KernelDG(nx.DiGraph): self.kernel, timeout, flag_dependencies ) - def _extend_path(self, dst_list, kernel, dg, offset): + @classmethod + def _extend_path(cls, dst_list, kernel, dg, offset): for instr in kernel: generator_path = nx.algorithms.simple_paths.all_simple_paths( dg, instr.line_number, instr.line_number + offset @@ -138,7 +137,7 @@ class KernelDG(nx.DiGraph): all_paths = manager.list() processes = [ Process( - target=self._extend_path, + target=KernelDG._extend_path, args=(all_paths, instr_section, dg, offset), ) for instr_section in instrs @@ -164,9 +163,7 @@ class KernelDG(nx.DiGraph): # terminate running processes for p in processes: if p.is_alive(): - # Python 3.6 does not support Process.kill(). - # Can be changed to `p.kill()` after EoL (01/22) of Py3.6 - os.kill(p.pid, signal.SIGKILL) + p.kill() p.join() all_paths = list(all_paths) else: @@ -186,11 +183,11 @@ class KernelDG(nx.DiGraph): for s, d in nx.utils.pairwise(path): edge_lat = dg.edges[s, d]["latency"] # map source node back to original line numbers - if s >= offset: + if s > offset: s -= offset lat_path.append((s, edge_lat)) lat_sum += edge_lat - if d >= offset: + if d > offset: d -= offset lat_path.sort() @@ -413,7 +410,7 @@ class KernelDG(nx.DiGraph): addr_change = 0 if isinstance(src.offset, ImmediateOperand) and src.offset.value is not None: addr_change += src.offset.value - if mem.offset: + if isinstance(mem.offset, ImmediateOperand) and mem.offset.value is not None: addr_change -= mem.offset.value if mem.base and src.base: base_change = register_changes.get( diff --git a/osaca/semantics/marker_utils.py b/osaca/semantics/marker_utils.py index 5f2eb4a..6892ee1 100644 --- a/osaca/semantics/marker_utils.py +++ b/osaca/semantics/marker_utils.py @@ -1,29 +1,35 @@ #!/usr/bin/env python3 from collections import OrderedDict +from enum import Enum -from osaca.parser import ParserAArch64, ParserX86ATT, get_parser -from osaca.parser.register import RegisterOperand +from osaca.parser import get_parser from osaca.parser.identifier import IdentifierOperand from osaca.parser.immediate import ImmediateOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.register import RegisterOperand COMMENT_MARKER = {"start": "OSACA-BEGIN", "end": "OSACA-END"} -def reduce_to_section(kernel, isa): +# State of marker matching. +# No: we have determined that the code doesn't match the marker. +# Partial: so far the code matches the marker, but we have not reached the end of the marker yet. +# Full: the code matches all instructions in the marker. +class Matching(Enum): + No = 0 + Partial = 1 + Full = 2 + + +def reduce_to_section(kernel, parser): """ Finds OSACA markers in given kernel and returns marked section :param list kernel: kernel to check - :param str isa: ISA of given kernel + :param BaseParser parser: parser used to produce the kernel :returns: `list` -- marked section of kernel as list of instruction forms """ - isa = isa.lower() - if isa == "x86": - start, end = find_marked_kernel_x86ATT(kernel) - elif isa == "aarch64": - start, end = find_marked_kernel_AArch64(kernel) - else: - raise ValueError("ISA not supported.") + start, end = find_marked_section(kernel, parser, COMMENT_MARKER) if start == -1: start = 0 if end == -1: @@ -31,63 +37,121 @@ def reduce_to_section(kernel, isa): return kernel[start:end] -def find_marked_kernel_AArch64(lines): +def find_marked_section(lines, parser, comments=None): """ - Find marked section for AArch64 + Return indexes of marked section :param list lines: kernel + :param parser: parser to use for checking + :type parser: :class:`~parser.BaseParser` + :param comments: dictionary with start and end markers in comment format, defaults to None + :type comments: dict, optional :returns: `tuple of int` -- start and end line of marked section """ - nop_bytes = [213, 3, 32, 31] - return find_marked_section( - lines, - ParserAArch64(), - ["mov"], - "x1", - [111, 222], - nop_bytes, - reverse=True, - comments=COMMENT_MARKER, - ) + index_start = -1 + index_end = -1 + start_marker = parser.start_marker() + end_marker = parser.end_marker() + for i, line in enumerate(lines): + try: + if line.mnemonic is None and comments is not None and line.comment is not None: + if comments["start"] == line.comment: + index_start = i + 1 + elif comments["end"] == line.comment: + index_end = i + if index_start == -1: + matching_lines = match_lines(parser, lines[i:], start_marker) + if matching_lines > 0: + # Return the first line after the marker. + index_start = i + matching_lines + if index_end == -1: + if match_lines(parser, lines[i:], end_marker): + index_end = i + except TypeError as e: + print(i, e, line) + if index_start != -1 and index_end != -1: + break + return index_start, index_end -def find_marked_kernel_x86ATT(lines): +# This function and the following ones traverse the syntactic tree produced by the parser and try to +# match it to the marker. This is necessary because the IACA markers are significantly different on +# MSVC x86 than on other ISA/compilers. Therefore, simple string matching is not sufficient. Also, +# the syntax of numeric literals depends on the parser and should not be known to this class. +# The matching only checks for a limited number of properties (and the marker doesn't specify the +# rest). +def match_lines(parser, lines, marker): """ - Find marked section for x86 + Returns True iff the `lines` match the `marker`. - :param list lines: kernel - :returns: `tuple of int` -- start and end line of marked section + :param list of `InstructionForm` lines: parsed assembly code. + :param list of `InstructionForm` marker: pattern to match against the `lines`. + :return int: the length of the match in the parsed code, 0 if there is no match. """ - nop_bytes = [100, 103, 144] - return find_marked_section( - lines, - ParserX86ATT(), - ["mov", "movl"], - "ebx", - [111, 222], - nop_bytes, - comments=COMMENT_MARKER, - ) + marker_iter = iter(marker) + marker_line = next(marker_iter) + for matched_lines, line in enumerate(lines): + if isinstance(marker_line, list): + # No support for partial matching in lists. + for marker_alternative in marker_line: + matching = match_line(parser, line, marker_alternative) + if matching == Matching.Full: + break + else: + return 0 + marker_line = next(marker_iter, None) + else: + matching = match_line(parser, line, marker_line) + if matching == Matching.No: + return 0 + elif matching == Matching.Partial: + # Try the same marker line again. The call to `match_line` consumed some of the + # directive parameters. + pass + elif matching == Matching.Full: + # Move to the next marker line, the current one has been fully matched. + marker_line = next(marker_iter, None) + # If we have reached the last marker line, the parsed code matches the marker. + if not marker_line: + return matched_lines + 1 -def get_marker(isa, comment=""): +def get_marker(isa, syntax="ATT", comment=""): """Return tuple of start and end marker lines.""" isa = isa.lower() + syntax = syntax.lower() if isa == "x86": - start_marker_raw = ( - "movl $111, %ebx # OSACA START MARKER\n" - ".byte 100 # OSACA START MARKER\n" - ".byte 103 # OSACA START MARKER\n" - ".byte 144 # OSACA START MARKER\n" - ) - if comment: - start_marker_raw += "# {}\n".format(comment) - end_marker_raw = ( - "movl $222, %ebx # OSACA END MARKER\n" - ".byte 100 # OSACA END MARKER\n" - ".byte 103 # OSACA END MARKER\n" - ".byte 144 # OSACA END MARKER\n" - ) + if syntax == "att": + start_marker_raw = ( + "movl $111, %ebx # OSACA START MARKER\n" + ".byte 100 # OSACA START MARKER\n" + ".byte 103 # OSACA START MARKER\n" + ".byte 144 # OSACA START MARKER\n" + ) + if comment: + start_marker_raw += "# {}\n".format(comment) + end_marker_raw = ( + "movl $222, %ebx # OSACA END MARKER\n" + ".byte 100 # OSACA END MARKER\n" + ".byte 103 # OSACA END MARKER\n" + ".byte 144 # OSACA END MARKER\n" + ) + else: + # Intel syntax + start_marker_raw = ( + "movl ebx, 111 # OSACA START MARKER\n" + ".byte 100 # OSACA START MARKER\n" + ".byte 103 # OSACA START MARKER\n" + ".byte 144 # OSACA START MARKER\n" + ) + if comment: + start_marker_raw += "# {}\n".format(comment) + end_marker_raw = ( + "movl ebx, 222 # OSACA END MARKER\n" + ".byte 100 # OSACA END MARKER\n" + ".byte 103 # OSACA END MARKER\n" + ".byte 144 # OSACA END MARKER\n" + ) elif isa == "aarch64": start_marker_raw = ( "mov x1, #111 // OSACA START MARKER\n" @@ -108,92 +172,97 @@ def get_marker(isa, comment=""): return start_marker, end_marker -def find_marked_section( - lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes, reverse=False, comments=None -): +def match_line(parser, line, marker_line): """ - Return indexes of marked section + Returns whether `line` matches `marker_line`. - :param list lines: kernel - :param parser: parser to use for checking - :type parser: :class:`~parser.BaseParser` - :param mov_instr: all MOV instruction possible for the marker - :type mov_instr: `list of str` - :param mov_reg: register used for the marker - :type mov_reg: `str` - :param mov_vals: values needed to be moved to ``mov_reg`` for valid marker - :type mov_vals: `list of int` - :param nop_bytes: bytes representing opcode of NOP - :type nop_bytes: `list of int` - :param reverse: indicating if ISA syntax requires reverse operand order, defaults to `False` - :type reverse: boolean, optional - :param comments: dictionary with start and end markers in comment format, defaults to None - :type comments: dict, optional - :returns: `tuple of int` -- start and end line of marked section + :param `IntructionForm` line: parsed assembly code. + :param marker_line `InstructionForm` marker: pattern to match against `line`. + :return: Matching. In case of partial match, `marker_line` is modified and should be reused for + matching the next line in the parsed assembly code. """ - # TODO match to instructions returned by get_marker - index_start = -1 - index_end = -1 - for i, line in enumerate(lines): - try: - if line.mnemonic is None and comments is not None and line.comment is not None: - if comments["start"] == line.comment: - index_start = i + 1 - elif comments["end"] == line.comment: - index_end = i - elif ( - line.mnemonic in mov_instr - and len(lines) > i + 1 - and lines[i + 1].directive is not None - ): - source = line.operands[0 if not reverse else 1] - destination = line.operands[1 if not reverse else 0] - # instruction pair matches, check for operands - if ( - isinstance(source, ImmediateOperand) - and parser.normalize_imd(source) == mov_vals[0] - and isinstance(destination, RegisterOperand) - and parser.get_full_reg_name(destination) == mov_reg - ): - # operands of first instruction match start, check for second one - match, line_count = match_bytes(lines, i + 1, nop_bytes) - if match: - # return first line after the marker - index_start = i + 1 + line_count - elif ( - isinstance(source, ImmediateOperand) - and parser.normalize_imd(source) == mov_vals[1] - and isinstance(destination, RegisterOperand) - and parser.get_full_reg_name(destination) == mov_reg - ): - # operand of first instruction match end, check for second one - match, line_count = match_bytes(lines, i + 1, nop_bytes) - if match: - # return line of the marker - index_end = i - except TypeError: - print(i, line) - if index_start != -1 and index_end != -1: - break - return index_start, index_end - - -def match_bytes(lines, index, byte_list): - """Match bytes directives of markers""" - # either all bytes are in one line or in separate ones - extracted_bytes = [] - line_count = 0 - while ( - index < len(lines) - and lines[index].directive is not None - and lines[index].directive.name == "byte" + if ( + line.mnemonic + and marker_line.mnemonic + and line.mnemonic == marker_line.mnemonic + and match_operands(line.operands, marker_line.operands) ): - line_count += 1 - extracted_bytes += [int(x, 0) for x in lines[index].directive.parameters] - index += 1 - if extracted_bytes[0 : len(byte_list)] == byte_list: - return True, line_count - return False, -1 + return Matching.Full + if ( + line.directive + and marker_line.directive + and line.directive.name == marker_line.directive.name + ): + return match_parameters( + parser, line.directive.parameters, marker_line.directive.parameters + ) + else: + return Matching.No + + +def match_operands(line_operands, marker_line_operands): + if len(line_operands) != len(marker_line_operands): + return False + return all( + match_operand(line_operand, marker_line_operand) + for line_operand, marker_line_operand in zip(line_operands, marker_line_operands) + ) + + +def match_operand(line_operand, marker_line_operand): + if ( + isinstance(line_operand, ImmediateOperand) + and isinstance(marker_line_operand, ImmediateOperand) + and line_operand.value == marker_line_operand.value + ): + return True + if ( + isinstance(line_operand, RegisterOperand) + and isinstance(marker_line_operand, RegisterOperand) + and line_operand.name.lower() == marker_line_operand.name.lower() + ): + return True + if ( + isinstance(line_operand, MemoryOperand) + and isinstance(marker_line_operand, MemoryOperand) + and match_operand(line_operand.base, marker_line_operand.base) + and match_operand(line_operand.offset, line_operand.offset) + ): + return True + return False + + +def match_parameters(parser, line_parameters, marker_line_parameters): + """ + Returns whether `line_parameters` matches `marker_line_parameters`. + + :param list of strings line_parameters: parameters of a directive in the parsed assembly code. + :param list of strings marker_line_parameters: parameters of a directive in the marker. + :return: Matching. In case of partial match, `marker_line_parameters` is modified and should be + reused for matching the next line in the parsed assembly code. + """ + # The elements of `marker_line_parameters` are consumed as they are matched. + for line_parameter in line_parameters: + if not marker_line_parameters: + break + marker_line_parameter = marker_line_parameters[0] + if not match_parameter(parser, line_parameter, marker_line_parameter): + return Matching.No + marker_line_parameters.pop(0) + if marker_line_parameters: + return Matching.Partial + else: + return Matching.Full + + +def match_parameter(parser, line_parameter, marker_line_parameter): + if line_parameter.lower() == marker_line_parameter.lower(): + return True + else: + # If the parameters don't match verbatim, check if they represent the same immediate value. + line_immediate = ImmediateOperand(value=line_parameter) + marker_line_immediate = ImmediateOperand(value=marker_line_parameter) + return parser.normalize_imd(line_immediate) == parser.normalize_imd(marker_line_immediate) def find_jump_labels(lines): diff --git a/tests/test_base_parser.py b/tests/test_base_parser.py index 9794ce9..5159799 100755 --- a/tests/test_base_parser.py +++ b/tests/test_base_parser.py @@ -20,6 +20,8 @@ class TestBaseParser(unittest.TestCase): pass with open(self._find_file("triad_x86_iaca.s")) as f: self.triad_code = f.read() + with open(self._find_file("triad_x86_intel.s")) as f: + self.triad_code_intel = f.read() with open(self._find_file("triad_arm_iaca.s")) as f: self.triad_code_arm = f.read() with open(self._find_file("kernel_x86.s")) as f: @@ -68,10 +70,11 @@ class TestBaseParser(unittest.TestCase): self.parser.normalize_imd(imd_hex_1) def test_detect_ISA(self): - self.assertEqual(BaseParser.detect_ISA(self.triad_code), "x86") - self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), "aarch64") - self.assertEqual(BaseParser.detect_ISA(self.x86_code), "x86") - self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), "aarch64") + self.assertEqual(BaseParser.detect_ISA(self.triad_code), ("x86", "ATT")) + self.assertEqual(BaseParser.detect_ISA(self.triad_code_intel), ("x86", "INTEL")) + self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), ("aarch64", None)) + self.assertEqual(BaseParser.detect_ISA(self.x86_code), ("x86", "ATT")) + self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), ("aarch64", None)) ################## # Helper functions diff --git a/tests/test_cli.py b/tests/test_cli.py index 47d1623..d8dca82 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,7 +12,7 @@ from unittest.mock import patch import osaca.osaca as osaca from osaca.db_interface import sanity_check -from osaca.parser import ParserAArch64, ParserX86ATT +from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel from osaca.semantics import MachineModel @@ -83,6 +83,7 @@ class TestCLI(unittest.TestCase): def test_get_parser(self): self.assertTrue(isinstance(osaca.get_asm_parser("csx"), ParserX86ATT)) + self.assertTrue(isinstance(osaca.get_asm_parser("csx", "intel"), ParserX86Intel)) self.assertTrue(isinstance(osaca.get_asm_parser("tx2"), ParserAArch64)) with self.assertRaises(ValueError): osaca.get_asm_parser("UNKNOWN") diff --git a/tests/test_files/gs_x86_gcc.s b/tests/test_files/gs_x86_gcc.s new file mode 100644 index 0000000..307e9e6 --- /dev/null +++ b/tests/test_files/gs_x86_gcc.s @@ -0,0 +1,102 @@ +# Produced with gcc 14.2 with -O3 -march=sapphirerapids -fopenmp-simd -mprefer-vector-width=512, https://godbolt.org/z/drE47x1b4. +.LC3: + .string "%f\n" +main: + push r14 + xor edi, edi + push r13 + push r12 + push rbp + push rbx + call time + mov edi, eax + call srand + mov edi, 1600 + call malloc + mov r12, rax + mov rbp, rax + lea r13, [rax+1600] + mov rbx, rax +.L2: + mov edi, 1600 + add rbx, 8 + call malloc + mov QWORD PTR [rbx-8], rax + cmp r13, rbx + jne .L2 + lea rbx, [r12+8] + lea r13, [r12+1592] +.L5: + mov r14d, 8 +.L4: + call rand + vxorpd xmm2, xmm2, xmm2 + mov rcx, QWORD PTR [rbx] + movsx rdx, eax + mov esi, eax + imul rdx, rdx, 351843721 + sar esi, 31 + sar rdx, 45 + sub edx, esi + imul edx, edx, 100000 + sub eax, edx + vcvtsi2sd xmm0, xmm2, eax + vdivsd xmm0, xmm0, QWORD PTR .LC0[rip] + vmovsd QWORD PTR [rcx+r14], xmm0 + add r14, 8 + cmp r14, 1592 + jne .L4 + add rbx, 8 + cmp r13, rbx + jne .L5 + vmovsd xmm1, QWORD PTR .LC1[rip] + lea rdi, [r12+1584] +.L6: + mov rdx, QWORD PTR [rbp+8] + mov rcx, QWORD PTR [rbp+16] + mov eax, 1 + mov rsi, QWORD PTR [rbp+0] + vmovsd xmm0, QWORD PTR [rdx] +.L7: + vaddsd xmm0, xmm0, QWORD PTR [rcx+rax*8] + vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8] + vaddsd xmm0, xmm0, QWORD PTR [rsi+rax*8] + vmulsd xmm0, xmm0, xmm1 + vmovsd QWORD PTR [rdx+rax*8], xmm0 + inc rax + cmp rax, 199 + jne .L7 + vmovsd xmm0, QWORD PTR [rdx+1592] + add rbp, 8 + vmovsd QWORD PTR [rcx+8], xmm0 + cmp rdi, rbp + jne .L6 + mov rax, QWORD PTR [r12+1584] + vmovsd xmm0, QWORD PTR .LC2[rip] + vucomisd xmm0, QWORD PTR [rax+1584] + jp .L9 + je .L19 +.L9: + pop rbx + xor eax, eax + pop rbp + pop r12 + pop r13 + pop r14 + ret +.L19: + mov rax, QWORD PTR [r12] + mov edi, OFFSET FLAT:.LC3 + vmovsd xmm0, QWORD PTR [rax] + mov eax, 1 + call printf + jmp .L9 +.LC0: + .long 0 + .long 1083129856 +.LC1: + .long 2061584302 + .long 1072934420 +.LC2: + .long -57724360 + .long 1072939201 diff --git a/tests/test_files/gs_x86_icc.s b/tests/test_files/gs_x86_icc.s new file mode 100644 index 0000000..3da0cec --- /dev/null +++ b/tests/test_files/gs_x86_icc.s @@ -0,0 +1,227 @@ +# Produced with ICC 2021.10.0 with -O3 -xcore-avx512, https://godbolt.org/z/87bYseh8r +..B1.1: # Preds ..B1.0 + push rbp #5.32 + mov rbp, rsp #5.32 + and rsp, -128 #5.32 + push r15 #5.32 + push rbx #5.32 + sub rsp, 112 #5.32 + mov edi, 3 #5.32 + mov rsi, 0x64199d9ffe #5.32 + call __intel_new_feature_proc_init #5.32 +..B1.34: # Preds ..B1.1 + vstmxcsr DWORD PTR [rsp] #5.32 + xor edi, edi #11.7 + or DWORD PTR [rsp], 32832 #5.32 + vldmxcsr DWORD PTR [rsp] #5.32 + call time #11.7 +..B1.2: # Preds ..B1.34 + mov edi, eax #11.1 + call srand #11.1 +..B1.3: # Preds ..B1.2 + mov edi, 1600 #13.16 + call malloc #13.16 +..B1.35: # Preds ..B1.3 + mov rsi, rax #13.16 +..B1.4: # Preds ..B1.35 + xor eax, eax #14.1 + mov rbx, rsi #14.1 + mov r15, rax #14.1 +..B1.5: # Preds ..B1.6 ..B1.4 + mov edi, 1600 #15.22 + call malloc #15.22 +..B1.6: # Preds ..B1.5 + mov QWORD PTR [rbx+r15*8], rax #15.5 + inc r15 #14.1 + cmp r15, 200 #14.1 + jb ..B1.5 # Prob 82% #14.1 +..B1.7: # Preds ..B1.6 + xor eax, eax #17.1 + mov rsi, rbx # + mov r15, rax #19.44 + mov QWORD PTR [rsp], r13 #19.44[spill] + mov QWORD PTR [8+rsp], r14 #19.44[spill] +..B1.8: # Preds ..B1.11 ..B1.7 + mov r13, QWORD PTR [8+rbx+r15*8] #19.5 + xor r14d, r14d #18.3 +..B1.9: # Preds ..B1.10 ..B1.8 + call rand #19.26 +..B1.37: # Preds ..B1.9 + mov r8d, eax #19.26 +..B1.10: # Preds ..B1.37 + mov eax, 351843721 #19.33 + mov ecx, r8d #19.33 + imul r8d #19.33 + sar ecx, 31 #19.33 + vxorpd xmm0, xmm0, xmm0 #19.33 + sar edx, 13 #19.33 + sub edx, ecx #19.33 + imul edi, edx, -100000 #19.33 + add r8d, edi #19.33 + vcvtsi2sd xmm0, xmm0, r8d #19.33 + vdivsd xmm1, xmm0, QWORD PTR .L_2il0floatpacket.0[rip] #19.44 + vmovsd QWORD PTR [8+r13+r14*8], xmm1 #19.5 + inc r14 #18.3 + cmp r14, 198 #18.3 + jb ..B1.9 # Prob 82% #18.3 +..B1.11: # Preds ..B1.10 + inc r15 #17.1 + cmp r15, 198 #17.1 + jb ..B1.8 # Prob 91% #17.1 +..B1.12: # Preds ..B1.11 + mov r13, QWORD PTR [rsp] #[spill] + mov rsi, rbx # + mov r14, QWORD PTR [8+rsp] #[spill] + xor ecx, ecx #23.1 + vmovsd xmm0, QWORD PTR .L_2il0floatpacket.1[rip] #10.14 + xor dil, dil #10.14 + mov edx, 196 #10.14 +..B1.13: # Preds ..B1.27 ..B1.12 + mov rax, QWORD PTR [8+rsi+rcx*8] #25.5 + mov r8, rax #25.5 + lea r9, QWORD PTR [8+rax] #25.5 + sub r8, r9 #25.5 + cmp r8, 1584 #24.3 + jge ..B1.15 # Prob 50% #24.3 +..B1.14: # Preds ..B1.13 + neg r8 #26.7 + cmp r8, 1584 #24.3 + jl ..B1.22 # Prob 50% #24.3 +..B1.15: # Preds ..B1.13 ..B1.14 + lea r8, QWORD PTR [16+rax] #27.9 + sub r9, r8 #27.9 + cmp r9, 1584 #24.3 + jge ..B1.17 # Prob 50% #24.3 +..B1.16: # Preds ..B1.15 + neg r9 #25.5 + cmp r9, 1584 #24.3 + jl ..B1.22 # Prob 50% #24.3 +..B1.17: # Preds ..B1.15 ..B1.16 + vmovsd xmm1, QWORD PTR [rax] #27.9 + mov bl, dil #24.3 + mov r9, QWORD PTR [rsi+rcx*8] #27.21 + xor r11d, r11d #25.5 + mov r10, QWORD PTR [16+rsi+rcx*8] #26.19 + mov r8, QWORD PTR [8+rsi+rcx*8] #27.9 +..B1.18: # Preds ..B1.18 ..B1.17 + vmovsd xmm2, QWORD PTR [8+r11+r10] #26.19 + inc bl #24.3 + vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #25.5 + vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #25.5 + vaddsd xmm1, xmm4, xmm1 #25.5 + vmulsd xmm8, xmm0, xmm1 #27.21 + vmovsd QWORD PTR [8+r11+r8], xmm8 #25.5 + vmovsd xmm5, QWORD PTR [16+r11+r10] #26.19 + vaddsd xmm6, xmm5, QWORD PTR [24+r11+r8] #26.19 + vaddsd xmm7, xmm6, QWORD PTR [16+r11+r9] #27.9 + vaddsd xmm9, xmm7, xmm8 #27.21 + vmulsd xmm13, xmm0, xmm9 #27.21 + vmovsd QWORD PTR [16+r11+r8], xmm13 #25.5 + vmovsd xmm10, QWORD PTR [24+r11+r10] #26.19 + vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #26.19 + vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.9 + vaddsd xmm14, xmm12, xmm13 #27.21 + vmulsd xmm18, xmm0, xmm14 #27.21 + vmovsd QWORD PTR [24+r11+r8], xmm18 #25.5 + vmovsd xmm15, QWORD PTR [32+r11+r10] #26.19 + vaddsd xmm16, xmm15, QWORD PTR [40+r11+r8] #26.19 + vaddsd xmm17, xmm16, QWORD PTR [32+r11+r9] #27.9 + vaddsd xmm19, xmm17, xmm18 #27.21 + vmulsd xmm1, xmm0, xmm19 #27.21 + vmovsd QWORD PTR [32+r11+r8], xmm1 #25.5 + add r11, 32 #24.3 + cmp bl, 49 #24.3 + jb ..B1.18 # Prob 27% #24.3 +..B1.19: # Preds ..B1.18 + mov r11, rdx #24.3 +..B1.20: # Preds ..B1.20 ..B1.19 + vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7 + vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19 + vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9 + vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21 + vmulsd xmm5, xmm0, xmm4 #27.21 + vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5 + inc r11 #24.3 + cmp r11, 198 #24.3 + jb ..B1.20 # Prob 66% #24.3 + jmp ..B1.27 # Prob 100% #24.3 +..B1.22: # Preds ..B1.14 ..B1.16 + mov r9, QWORD PTR [rsi+rcx*8] #27.21 + mov bl, dil #24.3 + mov r10, QWORD PTR [16+rsi+rcx*8] #26.19 + xor r11d, r11d #25.5 + mov r8, QWORD PTR [8+rsi+rcx*8] #26.7 +..B1.23: # Preds ..B1.23 ..B1.22 + inc bl #24.3 + vmovsd xmm1, QWORD PTR [r11+r8] #26.7 + vaddsd xmm2, xmm1, QWORD PTR [8+r11+r10] #26.19 + vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #27.9 + vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #27.21 + vmulsd xmm5, xmm0, xmm4 #27.21 + vmovsd QWORD PTR [8+r11+r8], xmm5 #25.5 + vaddsd xmm6, xmm5, QWORD PTR [16+r11+r10] #26.19 + vaddsd xmm7, xmm6, QWORD PTR [24+r11+r8] #27.9 + vaddsd xmm8, xmm7, QWORD PTR [16+r11+r9] #27.21 + vmulsd xmm9, xmm0, xmm8 #27.21 + vmovsd QWORD PTR [16+r11+r8], xmm9 #25.5 + vaddsd xmm10, xmm9, QWORD PTR [24+r11+r10] #26.19 + vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #27.9 + vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.21 + vmulsd xmm13, xmm0, xmm12 #27.21 + vmovsd QWORD PTR [24+r11+r8], xmm13 #25.5 + vaddsd xmm14, xmm13, QWORD PTR [32+r11+r10] #26.19 + vaddsd xmm15, xmm14, QWORD PTR [40+r11+r8] #27.9 + vaddsd xmm16, xmm15, QWORD PTR [32+r11+r9] #27.21 + vmulsd xmm17, xmm0, xmm16 #27.21 + vmovsd QWORD PTR [32+r11+r8], xmm17 #25.5 + add r11, 32 #24.3 + cmp bl, 49 #24.3 + jb ..B1.23 # Prob 27% #24.3 +..B1.24: # Preds ..B1.23 + mov r11, rdx #24.3 +..B1.25: # Preds ..B1.25 ..B1.24 + vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7 + vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19 + vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9 + vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21 + vmulsd xmm5, xmm0, xmm4 #27.21 + vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5 + inc r11 #24.3 + cmp r11, 198 #24.3 + jb ..B1.25 # Prob 66% #24.3 +..B1.27: # Preds ..B1.25 ..B1.20 + mov r8, QWORD PTR [16+rsi+rcx*8] #30.3 + inc rcx #23.1 + mov rax, QWORD PTR [1592+rax] #30.15 + mov QWORD PTR [8+r8], rax #30.3 + cmp rcx, 198 #23.1 + jb ..B1.13 # Prob 91% #23.1 +..B1.28: # Preds ..B1.27 + mov rax, QWORD PTR [1584+rsi] #33.4 + vmovsd xmm0, QWORD PTR [1584+rax] #33.4 + vucomisd xmm0, QWORD PTR .L_2il0floatpacket.2[rip] #33.29 + jp ..B1.29 # Prob 0% #33.29 + je ..B1.30 # Prob 5% #33.29 +..B1.29: # Preds ..B1.28 ..B1.30 + xor eax, eax #34.1 + add rsp, 112 #34.1 + pop rbx #34.1 + pop r15 #34.1 + mov rsp, rbp #34.1 + pop rbp #34.1 + ret #34.1 +..B1.30: # Preds ..B1.28 + mov rax, QWORD PTR [rsi] #33.39 + mov edi, offset flat: .L_2__STRING.0 #33.39 + vmovsd xmm0, QWORD PTR [rax] #33.39 + mov eax, 1 #33.39 + call printf #33.39 + jmp ..B1.29 # Prob 100% #33.39 +.L_2il0floatpacket.0: + .long 0x00000000,0x408f4000 +.L_2il0floatpacket.1: + .long 0x7ae147ae,0x3ff3ae14 +.L_2il0floatpacket.2: + .long 0xfc8f3238,0x3ff3c0c1 +.L_2__STRING.0: + .long 681509 diff --git a/tests/test_files/kernel_x86_intel.s b/tests/test_files/kernel_x86_intel.s new file mode 100644 index 0000000..eb76ea1 --- /dev/null +++ b/tests/test_files/kernel_x86_intel.s @@ -0,0 +1,9 @@ +; https://godbolt.org/z/o49jjojnx /std:c++latest /O1 /fp:contract /arch:AVX2 +$LL13@foo: + vmovsd xmm1, QWORD PTR [rax] + vmovsd xmm0, QWORD PTR [rcx+rax] + vfmadd213sd xmm1, xmm0, QWORD PTR [rdx+rax] + vmovsd QWORD PTR [r8+rax], xmm1 + lea rax, QWORD PTR [rax+8] + sub rbx, 1 + jne SHORT $LL13@foo diff --git a/tests/test_files/kernel_x86_intel_memdep.s b/tests/test_files/kernel_x86_intel_memdep.s new file mode 100644 index 0000000..98ef9fc --- /dev/null +++ b/tests/test_files/kernel_x86_intel_memdep.s @@ -0,0 +1,19 @@ +; Translated from kernel_x86_memdep.s +L4: + vmovsd [rax+8], xmm0 # line 3 <---------------------------------+ + add rax, 8 # rax=rax_orig+8 | + vmovsd [rax+rcx*8+8], xmm0 # line 5 <------------------------------------------+ + vaddsd xmm0, xmm0, [rax] # depends on line 3, rax+8;[rax] == [rax+8] --------+ | + sub rax, -8 # rax=rax_orig+16 | | + vaddsd xmm0, xmm0, [rax-8] # depends on line 3, rax+16;[rax-8] == [rax+8] -----+ | + dec rcx # rcx=rcx_orig-1 | + vaddsd xmm0, xmm0, [rax+rcx*8+8] # depends on line 5, [(rax+8)+(rcx-1)*8+8] == [rax+rcx*+8] --+ + mov rdx, rcx # | + vaddsd xmm0, xmm0, [rax+rdx*8+8] # depends on line 5, rcx == rdx -----------------------------+ + vmulsd xmm0, xmm0, xmm1 + add rax, 8 + cmp rsi, rax + jne L4 +; Added to test LOAD dependencies + shl rax, 5 + subsd xmm10, QWORD PTR [rax+r8] diff --git a/tests/test_files/triad_x86_intel.s b/tests/test_files/triad_x86_intel.s new file mode 100644 index 0000000..3de2eae --- /dev/null +++ b/tests/test_files/triad_x86_intel.s @@ -0,0 +1,124 @@ +; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0 + +include listing.inc + +INCLUDELIB MSVCRTD +INCLUDELIB OLDNAMES + +msvcjmc SEGMENT +__FAC6D534_triad@c DB 01H +msvcjmc ENDS +PUBLIC kernel +PUBLIC __JustMyCode_Default +EXTRN dummy:PROC +EXTRN _RTC_InitBase:PROC +EXTRN _RTC_Shutdown:PROC +EXTRN __CheckForDebuggerJustMyCode:PROC +EXTRN _fltused:DWORD +; COMDAT pdata +pdata SEGMENT +$pdata$kernel DD imagerel $LN9 + DD imagerel $LN9+194 + DD imagerel $unwind$kernel +pdata ENDS +; COMDAT rtc$TMZ +rtc$TMZ SEGMENT +_RTC_Shutdown.rtc$TMZ DQ FLAT:_RTC_Shutdown +rtc$TMZ ENDS +; COMDAT rtc$IMZ +rtc$IMZ SEGMENT +_RTC_InitBase.rtc$IMZ DQ FLAT:_RTC_InitBase +rtc$IMZ ENDS +; COMDAT xdata +xdata SEGMENT +$unwind$kernel DD 025052301H + DD 011e2323H + DD 070170025H + DD 05016H +xdata ENDS +; Function compile flags: /Odt +; COMDAT __JustMyCode_Default +_TEXT SEGMENT +__JustMyCode_Default PROC ; COMDAT + ret 0 +__JustMyCode_Default ENDP +_TEXT ENDS +; Function compile flags: /Odtp /RTCsu /ZI +; COMDAT kernel +_TEXT SEGMENT +r$1 = 4 +i$2 = 36 +a$ = 288 +b$ = 296 +c$ = 304 +s$ = 312 +repeat$ = 320 +cur_elements$ = 328 +kernel PROC ; COMDAT +; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c +; Line 16 +$LN9: + movsd QWORD PTR [rsp+32], xmm3 + mov QWORD PTR [rsp+24], r8 + mov QWORD PTR [rsp+16], rdx + mov QWORD PTR [rsp+8], rcx + push rbp + push rdi + sub rsp, 296 ; 00000128H + lea rbp, QWORD PTR [rsp+32] + lea rcx, OFFSET FLAT:__FAC6D534_triad@c + call __CheckForDebuggerJustMyCode + npad 1 +; Line 17 + mov DWORD PTR r$1[rbp], 0 + jmp SHORT $LN4@kernel +$LN2@kernel: + mov eax, DWORD PTR r$1[rbp] + inc eax + mov DWORD PTR r$1[rbp], eax +$LN4@kernel: + mov eax, DWORD PTR repeat$[rbp] + cmp DWORD PTR r$1[rbp], eax + jge SHORT $LN3@kernel +; Line 18 + mov DWORD PTR i$2[rbp], 0 + jmp SHORT $LN7@kernel +$LN5@kernel: + mov eax, DWORD PTR i$2[rbp] + inc eax + mov DWORD PTR i$2[rbp], eax +$LN7@kernel: + mov eax, DWORD PTR cur_elements$[rbp] + cmp DWORD PTR i$2[rbp], eax + jge SHORT $LN6@kernel +; Line 19 + movsxd rax, DWORD PTR i$2[rbp] + movsxd rcx, DWORD PTR i$2[rbp] + mov rdx, QWORD PTR c$[rbp] + movsd xmm0, QWORD PTR s$[rbp] + mulsd xmm0, QWORD PTR [rdx+rcx*8] + mov rcx, QWORD PTR b$[rbp] + movsd xmm1, QWORD PTR [rcx+rax*8] + addsd xmm1, xmm0 + movaps xmm0, xmm1 + movsxd rax, DWORD PTR i$2[rbp] + mov rcx, QWORD PTR a$[rbp] + movsd QWORD PTR [rcx+rax*8], xmm0 +; Line 20 + jmp SHORT $LN5@kernel +$LN6@kernel: +; Line 21 + mov rcx, QWORD PTR a$[rbp] + call dummy + npad 1 +; Line 22 + jmp SHORT $LN2@kernel +$LN3@kernel: +; Line 23 + lea rsp, QWORD PTR [rbp+264] + pop rdi + pop rbp + ret 0 +kernel ENDP +_TEXT ENDS +END diff --git a/tests/test_files/triad_x86_intel_iaca.s b/tests/test_files/triad_x86_intel_iaca.s new file mode 100644 index 0000000..447406c --- /dev/null +++ b/tests/test_files/triad_x86_intel_iaca.s @@ -0,0 +1,139 @@ +; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0 + +include listing.inc + +INCLUDELIB MSVCRTD +INCLUDELIB OLDNAMES + +msvcjmc SEGMENT +__68D132EB_concurrencysal@h DB 01H +__4DC47379_sal@h DB 01H +__B6ADDB23_vadefs@h DB 01H +__A2A1025A_vcruntime@h DB 01H +__0EF3BC42_intrin0@inl@h DB 01H +__5EC35D46_setjmp@h DB 01H +__368E74E0_mmintrin@h DB 01H +__735960E1_corecrt@h DB 01H +__211DB995_corecrt_malloc@h DB 01H +__7CD62D9E_malloc@h DB 01H +__22746E0E_xmmintrin@h DB 01H +__4716E7C2_emmintrin@h DB 01H +__98B78F4B_pmmintrin@h DB 01H +__286EFCC9_tmmintrin@h DB 01H +__0155E94A_smmintrin@h DB 01H +__64376086_nmmintrin@h DB 01H +__B18C9AC8_wmmintrin@h DB 01H +__7A18D7CF_zmmintrin@h DB 01H +__4D0C7505_immintrin@h DB 01H +__F7CF9440_ammintrin@h DB 01H +__78F5E131_intrin@h DB 01H +__6A584D4A_iacaMarks@h DB 01H +__FAC6D534_triad@c DB 01H +msvcjmc ENDS +PUBLIC kernel +PUBLIC __JustMyCode_Default +EXTRN dummy:PROC +EXTRN __CheckForDebuggerJustMyCode:PROC +EXTRN _fltused:DWORD +; COMDAT pdata +pdata SEGMENT +$pdata$kernel DD imagerel $LN18 + DD imagerel $LN18+182 + DD imagerel $unwind$kernel +pdata ENDS +; COMDAT voltbl +voltbl SEGMENT +_volmd DB 05bH + DB 079H +voltbl ENDS +; COMDAT xdata +xdata SEGMENT +$unwind$kernel DD 0c2001H + DD 026820H + DD 0b7419H + DD 0a6419H + DD 095419H + DD 083419H + DD 0e0155219H +xdata ENDS +; Function compile flags: /Odt +; COMDAT __JustMyCode_Default +_TEXT SEGMENT +__JustMyCode_Default PROC ; COMDAT + ret 0 +__JustMyCode_Default ENDP +_TEXT ENDS +; Function compile flags: /Ogspy +; COMDAT kernel +_TEXT SEGMENT +a$ = 64 +b$ = 72 +c$ = 80 +s$ = 88 +repeat$ = 96 +cur_elements$ = 104 +kernel PROC ; COMDAT +; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c +; Line 22 +$LN18: + mov rax, rsp + mov QWORD PTR [rax+8], rbx + mov QWORD PTR [rax+16], rbp + mov QWORD PTR [rax+24], rsi + mov QWORD PTR [rax+32], rdi + push r14 + sub rsp, 48 ; 00000030H + mov rbp, rcx + movaps XMMWORD PTR [rax-24], xmm6 + lea rcx, OFFSET FLAT:__FAC6D534_triad@c + movaps xmm6, xmm3 + mov r14, r8 + mov rdi, rdx + call __CheckForDebuggerJustMyCode + mov eax, DWORD PTR repeat$[rsp] + movsxd rsi, DWORD PTR cur_elements$[rsp] + test eax, eax + jle SHORT $LN3@kernel + mov ebx, eax +$LL4@kernel: +; Line 24 + test rsi, rsi + jle SHORT $LN6@kernel + mov rcx, r14 + mov rdx, rbp + sub rcx, rdi + mov rax, rdi + sub rdx, rdi + mov r8, rsi +$LL7@kernel: +; Line 26 + mov BYTE PTR gs:111, 111 ; 0000006fH +; Line 28 + movaps xmm0, xmm6 + mulsd xmm0, QWORD PTR [rax+rcx] + addsd xmm0, QWORD PTR [rax] + movsd QWORD PTR [rdx+rax], xmm0 + add rax, 8 +; Line 30 + mov BYTE PTR gs:222, 222 ; 000000deH + sub r8, 1 + jne SHORT $LL7@kernel +$LN6@kernel: +; Line 33 + mov rcx, rbp + call dummy + sub rbx, 1 + jne SHORT $LL4@kernel +$LN3@kernel: +; Line 35 + mov rbx, QWORD PTR [rsp+64] + mov rbp, QWORD PTR [rsp+72] + mov rsi, QWORD PTR [rsp+80] + mov rdi, QWORD PTR [rsp+88] + movaps xmm6, XMMWORD PTR [rsp+32] + add rsp, 48 ; 00000030H + pop r14 + ret 0 +kernel ENDP +_TEXT ENDS +END diff --git a/tests/test_frontend.py b/tests/test_frontend.py index 1436bd0..225867b 100755 --- a/tests/test_frontend.py +++ b/tests/test_frontend.py @@ -34,14 +34,19 @@ class TestFrontend(unittest.TestCase): ) self.machine_model_tx2 = MachineModel(arch="tx2") self.semantics_csx = ArchSemantics( + self.parser_x86, self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"), ) self.semantics_tx2 = ArchSemantics( + self.parser_AArch64, self.machine_model_tx2, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/aarch64.yml"), ) + self.semantics_csx.normalize_instruction_forms(self.kernel_x86) + self.semantics_tx2.normalize_instruction_forms(self.kernel_AArch64) + for i in range(len(self.kernel_x86)): self.semantics_csx.assign_src_dst(self.kernel_x86[i]) self.semantics_csx.assign_tp_lt(self.kernel_x86[i]) @@ -114,7 +119,7 @@ class TestFrontend(unittest.TestCase): self.assertEqual(line.line_number, analysis_dict["Kernel"][i]["LineNumber"]) def test_dict_output_AArch64(self): - reduced_kernel = reduce_to_section(self.kernel_AArch64, self.semantics_tx2._isa) + reduced_kernel = reduce_to_section(self.kernel_AArch64, self.parser_AArch64) dg = KernelDG( reduced_kernel, self.parser_AArch64, diff --git a/tests/test_marker_utils.py b/tests/test_marker_utils.py index 49da8e8..160bfe8 100755 --- a/tests/test_marker_utils.py +++ b/tests/test_marker_utils.py @@ -12,37 +12,47 @@ from osaca.semantics import ( find_jump_labels, find_basic_loop_bodies, ) -from osaca.parser import ParserAArch64, ParserX86ATT +from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel class TestMarkerUtils(unittest.TestCase): @classmethod def setUpClass(self): self.parser_AArch = ParserAArch64() - self.parser_x86 = ParserX86ATT() + self.parser_x86_att = ParserX86ATT() + self.parser_x86_intel = ParserX86Intel() with open(self._find_file("triad_arm_iaca.s")) as f: triad_code_arm = f.read() with open(self._find_file("triad_x86_iaca.s")) as f: - triad_code_x86 = f.read() + triad_code_x86_att = f.read() + with open(self._find_file("triad_x86_intel_iaca.s")) as f: + triad_code_x86_intel = f.read() self.parsed_AArch = self.parser_AArch.parse_file(triad_code_arm) - self.parsed_x86 = self.parser_x86.parse_file(triad_code_x86) + self.parsed_x86_att = self.parser_x86_att.parse_file(triad_code_x86_att) + self.parsed_x86_intel = self.parser_x86_intel.parse_file(triad_code_x86_intel) ################# # Test ################# def test_marker_detection_AArch64(self): - kernel = reduce_to_section(self.parsed_AArch, "AArch64") + kernel = reduce_to_section(self.parsed_AArch, ParserAArch64()) self.assertEqual(len(kernel), 138) self.assertEqual(kernel[0].line_number, 307) self.assertEqual(kernel[-1].line_number, 444) - def test_marker_detection_x86(self): - kernel = reduce_to_section(self.parsed_x86, "x86") + def test_marker_detection_x86_att(self): + kernel = reduce_to_section(self.parsed_x86_att, ParserX86ATT()) self.assertEqual(len(kernel), 9) self.assertEqual(kernel[0].line_number, 146) self.assertEqual(kernel[-1].line_number, 154) + def test_marker_detection_x86_intel(self): + kernel = reduce_to_section(self.parsed_x86_intel, ParserX86Intel()) + self.assertEqual(len(kernel), 7) + self.assertEqual(kernel[0].line_number, 111) + self.assertEqual(kernel[-1].line_number, 117) + def test_marker_matching_AArch64(self): # preparation bytes_1_line = ".byte 213,3,32,31\n" @@ -108,7 +118,7 @@ class TestMarkerUtils(unittest.TestCase): bytes_end=bytes_var_2, ): sample_parsed = self.parser_AArch.parse_file(sample_code) - sample_kernel = reduce_to_section(sample_parsed, "AArch64") + sample_kernel = reduce_to_section(sample_parsed, ParserAArch64()) self.assertEqual(len(sample_kernel), kernel_length) kernel_start = len( list( @@ -179,8 +189,8 @@ class TestMarkerUtils(unittest.TestCase): mov_end=mov_end_var, bytes_end=bytes_var_2, ): - sample_parsed = self.parser_x86.parse_file(sample_code) - sample_kernel = reduce_to_section(sample_parsed, "x86") + sample_parsed = self.parser_x86_att.parse_file(sample_code) + sample_kernel = reduce_to_section(sample_parsed, ParserX86ATT()) self.assertEqual(len(sample_kernel), kernel_length) kernel_start = len( list( @@ -190,7 +200,7 @@ class TestMarkerUtils(unittest.TestCase): ) ) ) - parsed_kernel = self.parser_x86.parse_file( + parsed_kernel = self.parser_x86_att.parse_file( kernel, start_line=kernel_start ) self.assertEqual(sample_kernel, parsed_kernel) @@ -222,7 +232,7 @@ class TestMarkerUtils(unittest.TestCase): for test_name, pro, kernel, epi in samples: code = pro + kernel + epi parsed = self.parser_AArch.parse_file(code) - test_kernel = reduce_to_section(parsed, "AArch64") + test_kernel = reduce_to_section(parsed, ParserAArch64()) if kernel: kernel_length = len(kernel.strip().split("\n")) else: @@ -230,7 +240,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( len(test_kernel), kernel_length, - msg="Invalid exctracted kernel length on {!r} sample".format(test_name), + msg="Invalid extracted kernel length on {!r} sample".format(test_name), ) if pro: kernel_start = len((pro).strip().split("\n")) @@ -240,7 +250,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( test_kernel, parsed_kernel, - msg="Invalid exctracted kernel on {!r}".format(test_name), + msg="Invalid extracted kernel on {!r}".format(test_name), ) def test_marker_special_cases_x86(self): @@ -269,8 +279,8 @@ class TestMarkerUtils(unittest.TestCase): for test_name, pro, kernel, epi in samples: code = pro + kernel + epi - parsed = self.parser_x86.parse_file(code) - test_kernel = reduce_to_section(parsed, "x86") + parsed = self.parser_x86_att.parse_file(code) + test_kernel = reduce_to_section(parsed, ParserX86ATT()) if kernel: kernel_length = len(kernel.strip().split("\n")) else: @@ -278,23 +288,23 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( len(test_kernel), kernel_length, - msg="Invalid exctracted kernel length on {!r} sample".format(test_name), + msg="Invalid extracted kernel length on {!r} sample".format(test_name), ) if pro: kernel_start = len((pro).strip().split("\n")) else: kernel_start = 0 - parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start) + parsed_kernel = self.parser_x86_att.parse_file(kernel, start_line=kernel_start) self.assertEqual( test_kernel, parsed_kernel, - msg="Invalid exctracted kernel on {!r}".format(test_name), + msg="Invalid extracted kernel on {!r}".format(test_name), ) def test_find_jump_labels(self): self.assertEqual( - find_jump_labels(self.parsed_x86), + find_jump_labels(self.parsed_x86_att), OrderedDict( [ (".LFB24", 10), @@ -358,7 +368,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( [ (k, v[0].line_number, v[-1].line_number) - for k, v in find_basic_blocks(self.parsed_x86).items() + for k, v in find_basic_blocks(self.parsed_x86_att).items() ], [ (".LFB24", 11, 56), @@ -422,7 +432,7 @@ class TestMarkerUtils(unittest.TestCase): self.assertEqual( [ (k, v[0].line_number, v[-1].line_number) - for k, v in find_basic_loop_bodies(self.parsed_x86).items() + for k, v in find_basic_loop_bodies(self.parsed_x86_att).items() ], [(".L4", 66, 74), (".L10", 146, 154), (".L28", 290, 300)], ) diff --git a/tests/test_parser_x86intel.py b/tests/test_parser_x86intel.py new file mode 100755 index 0000000..1918810 --- /dev/null +++ b/tests/test_parser_x86intel.py @@ -0,0 +1,441 @@ +#!/usr/bin/env python3 +""" +Unit tests for x86 Intel assembly parser +""" + +import os +import unittest + +from osaca.parser import ParserX86Intel, InstructionForm +from osaca.parser.directive import DirectiveOperand +from osaca.parser.identifier import IdentifierOperand +from osaca.parser.immediate import ImmediateOperand +from osaca.parser.label import LabelOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.register import RegisterOperand + + +class TestParserX86Intel(unittest.TestCase): + @classmethod + def setUpClass(self): + self.parser = ParserX86Intel() + with open(self._find_file("triad_x86_intel.s")) as f: + self.triad_code = f.read() + with open(self._find_file("triad_x86_intel_iaca.s")) as f: + self.triad_iaca_code = f.read() + with open(self._find_file("gs_x86_icc.s")) as f: + self.gs_icc_code = f.read() + with open(self._find_file("gs_x86_gcc.s")) as f: + self.gs_gcc_code = f.read() + + ################## + # Test + ################## + + def test_comment_parser(self): + self.assertEqual(self._get_comment(self.parser, "; some comments"), "some comments") + self.assertEqual(self._get_comment(self.parser, "\t\t;AA BB CC \t end \t"), "AA BB CC end") + self.assertEqual( + self._get_comment(self.parser, "\t;; comment ;; comment"), + "; comment ;; comment", + ) + + def test_label_parser(self): + self.assertEqual(self._get_label(self.parser, "main:")[0].name, "main") + self.assertEqual(self._get_label(self.parser, "$$B1?10:")[0].name, "$$B1?10") + self.assertEqual( + self._get_label(self.parser, "$LN9:\tcall\t__CheckForDebuggerJustMyCode")[0].name, + "$LN9", + ) + self.assertEqual( + self._get_label(self.parser, "$LN9:\tcall\t__CheckForDebuggerJustMyCode")[1], + InstructionForm( + mnemonic="call", + operands=[ + {"identifier": {"name": "__CheckForDebuggerJustMyCode"}}, + ], + directive_id=None, + comment_id=None, + label_id=None, + line=None, + line_number=None, + ), + ) + + def test_directive_parser(self): + self.assertEqual( + self._get_directive(self.parser, "\t.allocstack 16")[0], + DirectiveOperand(name=".allocstack", parameters=["16"]), + ) + self.assertEqual( + self._get_directive(self.parser, "INCLUDELIB MSVCRTD")[0], + DirectiveOperand(name="INCLUDELIB", parameters=["MSVCRTD"]), + ) + self.assertEqual( + self._get_directive(self.parser, "msvcjmc\tSEGMENT")[0], + DirectiveOperand(name="SEGMENT", parameters=["msvcjmc"]), + ) + self.assertEqual( + self._get_directive(self.parser, "EXTRN\t_RTC_InitBase:PROC")[0], + DirectiveOperand(name="EXTRN", parameters=["_RTC_InitBase:PROC"]), + ) + self.assertEqual( + self._get_directive(self.parser, "$pdata$kernel DD imagerel $LN9")[0], + DirectiveOperand(name="DD", parameters=["$pdata$kernel", "imagerel", "$LN9"]), + ) + self.assertEqual( + self._get_directive(self.parser, "repeat$ = 320")[0], + DirectiveOperand(name="=", parameters=["repeat$", "320"]), + ) + + def test_parse_instruction(self): + instr1 = "\tsub\trsp, 296\t\t\t\t; 00000128H" + instr2 = " fst ST(3)\t; Good ol' x87." + instr3 = "\tmulsd\txmm0, QWORD PTR [rdx+rcx*8]" + instr4 = "\tmov\teax, DWORD PTR cur_elements$[rbp]" + instr5 = "\tmov\tQWORD PTR [rsp+24], r8" + instr6 = "\tjmp\tSHORT $LN2@kernel" + instr7 = "\tlea\trcx, OFFSET FLAT:__FAC6D534_triad@c" + instr8 = "\tmov\tBYTE PTR gs:111, al" + instr9 = "\tlea\tr8, QWORD PTR [r8*4]" + instr10 = "\tmovsd\txmm1, QWORD PTR boost@@XZ@4V456@A+16" + instr11 = "\tlea\trcx, OFFSET FLAT:??_R0N@8+8" + instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555" + instr13 = "\tjmp\t$LN18@operator" + instr14 = "vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]" + + parsed_1 = self.parser.parse_instruction(instr1) + parsed_2 = self.parser.parse_instruction(instr2) + parsed_3 = self.parser.parse_instruction(instr3) + parsed_4 = self.parser.parse_instruction(instr4) + parsed_5 = self.parser.parse_instruction(instr5) + parsed_6 = self.parser.parse_instruction(instr6) + parsed_7 = self.parser.parse_instruction(instr7) + parsed_8 = self.parser.parse_instruction(instr8) + parsed_9 = self.parser.parse_instruction(instr9) + parsed_10 = self.parser.parse_instruction(instr10) + parsed_11 = self.parser.parse_instruction(instr11) + parsed_12 = self.parser.parse_instruction(instr12) + parsed_13 = self.parser.parse_instruction(instr13) + parsed_14 = self.parser.parse_instruction(instr14) + + self.assertEqual(parsed_1.mnemonic, "sub") + self.assertEqual(parsed_1.operands[0], RegisterOperand(name="RSP")) + self.assertEqual(parsed_1.operands[1], ImmediateOperand(value=296)) + self.assertEqual(parsed_1.comment, "00000128H") + + self.assertEqual(parsed_2.mnemonic, "fst") + self.assertEqual(parsed_2.operands[0], RegisterOperand(name="ST(3)")) + self.assertEqual(parsed_2.comment, "Good ol' x87.") + + self.assertEqual(parsed_3.mnemonic, "mulsd") + self.assertEqual(parsed_3.operands[0], RegisterOperand(name="XMM0")) + self.assertEqual( + parsed_3.operands[1], + MemoryOperand( + base=RegisterOperand(name="RDX"), index=RegisterOperand(name="RCX"), scale=8 + ), + ) + + self.assertEqual(parsed_4.mnemonic, "mov") + self.assertEqual(parsed_4.operands[0], RegisterOperand(name="EAX")) + self.assertEqual( + parsed_4.operands[1], + MemoryOperand( + offset=ImmediateOperand(identifier="cur_elements$", value=104), + base=RegisterOperand(name="RBP"), + ), + ) + self.assertEqual(parsed_5.mnemonic, "mov") + self.assertEqual( + parsed_5.operands[0], + MemoryOperand(offset=ImmediateOperand(value=24), base=RegisterOperand(name="RSP")), + ) + self.assertEqual(parsed_5.operands[1], RegisterOperand(name="R8")) + + self.assertEqual(parsed_6.mnemonic, "jmp") + self.assertEqual(parsed_6.operands[0], LabelOperand(name="$LN2@kernel")) + + self.assertEqual(parsed_7.mnemonic, "lea") + self.assertEqual(parsed_7.operands[0], RegisterOperand(name="RCX")) + self.assertEqual( + parsed_7.operands[1], + MemoryOperand(offset=IdentifierOperand(name="__FAC6D534_triad@c")), + ) + + self.assertEqual(parsed_8.mnemonic, "mov") + self.assertEqual( + parsed_8.operands[0], + MemoryOperand(base=RegisterOperand(name="GS"), offset=ImmediateOperand(value=111)), + ) + self.assertEqual(parsed_8.operands[1], RegisterOperand(name="AL")) + + self.assertEqual(parsed_9.mnemonic, "lea") + self.assertEqual(parsed_9.operands[0], RegisterOperand(name="R8")) + self.assertEqual( + parsed_9.operands[1], + MemoryOperand(base=None, index=RegisterOperand(name="R8"), scale=4), + ) + + self.assertEqual(parsed_10.mnemonic, "movsd") + self.assertEqual(parsed_10.operands[0], RegisterOperand(name="XMM1")) + self.assertEqual( + parsed_10.operands[1], + MemoryOperand( + offset=IdentifierOperand( + name="boost@@XZ@4V456@A", offset=ImmediateOperand(value=16) + ) + ), + ) + + self.assertEqual(parsed_11.mnemonic, "lea") + self.assertEqual(parsed_11.operands[0], RegisterOperand(name="RCX")) + self.assertEqual( + parsed_11.operands[1], + MemoryOperand( + offset=IdentifierOperand(name="??_R0N@8", offset=ImmediateOperand(value=8)) + ), + ) + + self.assertEqual(parsed_12.mnemonic, "vfmadd213sd") + self.assertEqual(parsed_12.operands[0], RegisterOperand(name="XMM0")) + self.assertEqual(parsed_12.operands[1], RegisterOperand(name="XMM1")) + self.assertEqual( + parsed_12.operands[2], + MemoryOperand(offset=IdentifierOperand(name="__real@bfc5555555555555")), + ) + + self.assertEqual(parsed_13.mnemonic, "jmp") + self.assertEqual(parsed_13.operands[0], IdentifierOperand(name="$LN18@operator")) + + self.assertEqual(parsed_14.mnemonic, "vaddsd") + self.assertEqual(parsed_14.operands[0], RegisterOperand(name="XMM0")) + self.assertEqual(parsed_14.operands[1], RegisterOperand(name="XMM0")) + self.assertEqual( + parsed_14.operands[2], + MemoryOperand( + base=RegisterOperand(name="RDX"), + offset=ImmediateOperand(value=8), + index=RegisterOperand(name="RAX"), + scale=8, + ), + ) + + def test_parse_line(self): + line_comment = "; -- Begin main" + line_instruction = "\tret\t0" + + instruction_form_1 = InstructionForm( + mnemonic=None, + operands=[], + directive_id=None, + comment_id="-- Begin main", + label_id=None, + line="; -- Begin main", + line_number=1, + ) + instruction_form_2 = InstructionForm( + mnemonic="ret", + operands=[ + {"immediate": {"value": 0}}, + ], + directive_id=None, + comment_id=None, + label_id=None, + line="\tret\t0", + line_number=2, + ) + + parsed_1 = self.parser.parse_line(line_comment, 1) + parsed_2 = self.parser.parse_line(line_instruction, 2) + + self.assertEqual(parsed_1, instruction_form_1) + self.assertEqual(parsed_2, instruction_form_2) + + def test_parse_register(self): + register_str_1 = "rax" + register_str_2 = "r9" + register_str_3 = "xmm1" + register_str_4 = "ST(4)" + + parsed_reg_1 = RegisterOperand(name="RAX") + parsed_reg_2 = RegisterOperand(name="R9") + parsed_reg_3 = RegisterOperand(name="XMM1") + parsed_reg_4 = RegisterOperand(name="ST(4)") + + self.assertEqual(self.parser.parse_register(register_str_1), parsed_reg_1) + self.assertEqual(self.parser.parse_register(register_str_2), parsed_reg_2) + self.assertEqual(self.parser.parse_register(register_str_3), parsed_reg_3) + self.assertEqual(self.parser.parse_register(register_str_4), parsed_reg_4) + + def test_parse_file1(self): + parsed = self.parser.parse_file(self.triad_code) + self.assertEqual(parsed[0].line_number, 1) + # Check specifically that the values of the symbols defined by "=" were correctly + # propagated. + self.assertEqual( + parsed[69], + InstructionForm( + mnemonic="mov", + operands=[ + MemoryOperand( + base=RegisterOperand("RBP"), + offset=ImmediateOperand(value=4, identifier="r$1"), + ), + ImmediateOperand(value=0), + ], + line="\tmov\tDWORD PTR r$1[rbp], 0", + line_number=73, + ), + ) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual( + parsed[60], + InstructionForm( + mnemonic="mov", + operands=[ + MemoryOperand(base=RegisterOperand("RSP"), offset=ImmediateOperand(value=8)), + RegisterOperand(name="RCX"), + ], + line="\tmov\tQWORD PTR [rsp+8], rcx", + line_number=64, + ), + ) + self.assertEqual( + parsed[120], + InstructionForm( + directive_id=DirectiveOperand(name="END"), line="END", line_number=124 + ), + ) + self.assertEqual(len(parsed), 121) + + def test_parse_file2(self): + parsed = self.parser.parse_file(self.triad_iaca_code) + self.assertEqual(parsed[0].line_number, 1) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual( + parsed[68], + InstructionForm( + directive_id=DirectiveOperand(name="=", parameters=["s$", "88"]), + line="s$ = 88", + line_number=72, + ), + ) + self.assertEqual( + parsed[135], + InstructionForm( + directive_id=DirectiveOperand(name="END"), line="END", line_number=139 + ), + ) + self.assertEqual(len(parsed), 136) + + def test_parse_file3(self): + parsed = self.parser.parse_file(self.gs_icc_code) + self.assertEqual(parsed[0].line_number, 1) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual( + parsed[113], + InstructionForm( + mnemonic="vmovsd", + operands=[ + RegisterOperand("XMM5"), + MemoryOperand( + base=RegisterOperand("R11"), + index=RegisterOperand("R10"), + scale=1, + offset=ImmediateOperand(value=16), + ), + ], + comment_id="26.19", + line=" vmovsd xmm5, QWORD PTR [16+r11+r10]" + " #26.19", + line_number=114, + ), + ) + self.assertEqual( + parsed[226], + InstructionForm( + directive_id=DirectiveOperand(name=".long", parameters=["681509"]), + line=" .long 681509", + line_number=227, + ), + ) + self.assertEqual(len(parsed), 227) + + def test_parse_file4(self): + parsed = self.parser.parse_file(self.gs_gcc_code) + self.assertEqual(parsed[0].line_number, 1) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual( + parsed[61], + InstructionForm( + mnemonic="vaddsd", + operands=[ + RegisterOperand("XMM0"), + RegisterOperand("XMM0"), + MemoryOperand( + base=RegisterOperand("RDX"), + index=RegisterOperand("RAX"), + scale=8, + offset=ImmediateOperand(value=8), + ), + ], + line=" vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]", + line_number=62, + ), + ) + self.assertEqual( + parsed[101], + InstructionForm( + directive_id=DirectiveOperand(name=".long", parameters=["1072939201"]), + line=" .long 1072939201", + line_number=102, + ), + ) + self.assertEqual(len(parsed), 102) + + def test_normalize_imd(self): + imd_binary = ImmediateOperand(value="1001111B") + imd_octal = ImmediateOperand(value="117O") + imd_decimal = ImmediateOperand(value="79") + imd_hex = ImmediateOperand(value="4fH") + imd_float = ImmediateOperand(value="-79.34") + self.assertEqual( + self.parser.normalize_imd(imd_binary), + self.parser.normalize_imd(imd_octal), + ) + self.assertEqual( + self.parser.normalize_imd(imd_octal), + self.parser.normalize_imd(imd_decimal), + ) + self.assertEqual( + self.parser.normalize_imd(imd_decimal), + self.parser.normalize_imd(imd_hex), + ) + self.assertEqual(self.parser.normalize_imd(ImmediateOperand(value="-79")), -79) + self.assertEqual(self.parser.normalize_imd(imd_float), -79.34) + + ################## + # Helper functions + ################## + def _get_comment(self, parser, comment): + return " ".join( + parser.process_operand(parser.comment.parseString(comment, parseAll=True))["comment"] + ) + + def _get_label(self, parser, label): + return parser.process_operand(parser.label.parseString(label, parseAll=True)) + + def _get_directive(self, parser, directive): + return parser.process_operand(parser.directive.parseString(directive, parseAll=True)) + + @staticmethod + def _find_file(name): + testdir = os.path.dirname(__file__) + name = os.path.join(testdir, "test_files", name) + assert os.path.exists(name) + return name + + +if __name__ == "__main__": + suite = unittest.TestLoader().loadTestsFromTestCase(TestParserX86Intel) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/tests/test_semantics.py b/tests/test_semantics.py index f9073eb..0748bfe 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -10,7 +10,7 @@ from copy import deepcopy import networkx as nx from osaca.osaca import get_unmatched_instruction_ratio -from osaca.parser import ParserAArch64, ParserX86ATT +from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel from osaca.semantics import ( INSTR_FLAGS, ArchSemantics, @@ -32,7 +32,8 @@ class TestSemanticTools(unittest.TestCase): @classmethod def setUpClass(cls): # set up parser and kernels - cls.parser_x86 = ParserX86ATT() + cls.parser_x86_att = ParserX86ATT() + cls.parser_x86_intel = ParserX86Intel() cls.parser_AArch64 = ParserAArch64() with open(cls._find_file("kernel_x86.s")) as f: cls.code_x86 = f.read() @@ -40,6 +41,10 @@ class TestSemanticTools(unittest.TestCase): cls.code_x86_memdep = f.read() with open(cls._find_file("kernel_x86_long_LCD.s")) as f: cls.code_x86_long_LCD = f.read() + with open(cls._find_file("kernel_x86_intel.s")) as f: + cls.code_x86_intel = f.read() + with open(cls._find_file("kernel_x86_intel_memdep.s")) as f: + cls.code_x86_intel_memdep = f.read() with open(cls._find_file("kernel_aarch64_memdep.s")) as f: cls.code_aarch64_memdep = f.read() with open(cls._find_file("kernel_aarch64.s")) as f: @@ -52,24 +57,32 @@ class TestSemanticTools(unittest.TestCase): cls.mops_1_code = f.read() cls.mops_2_code = cls.mops_1_code.replace("//ALT1 ", "") - cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86") + cls.kernel_x86 = reduce_to_section( + cls.parser_x86_att.parse_file(cls.code_x86), cls.parser_x86_att + ) cls.kernel_x86_memdep = reduce_to_section( - cls.parser_x86.parse_file(cls.code_x86_memdep), "x86" + cls.parser_x86_att.parse_file(cls.code_x86_memdep), cls.parser_x86_att ) cls.kernel_x86_long_LCD = reduce_to_section( - cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86" + cls.parser_x86_att.parse_file(cls.code_x86_long_LCD), cls.parser_x86_att + ) + cls.kernel_x86_intel = reduce_to_section( + cls.parser_x86_intel.parse_file(cls.code_x86_intel), cls.parser_x86_intel + ) + cls.kernel_x86_intel_memdep = reduce_to_section( + cls.parser_x86_intel.parse_file(cls.code_x86_intel_memdep), cls.parser_x86_intel ) cls.kernel_AArch64 = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64" + cls.parser_AArch64.parse_file(cls.code_AArch64), cls.parser_AArch64 ) cls.kernel_aarch64_memdep = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64" + cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), cls.parser_AArch64 ) cls.kernel_aarch64_SVE = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64" + cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), cls.parser_AArch64 ) cls.kernel_aarch64_deps = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64_deps), "aarch64" + cls.parser_AArch64.parse_file(cls.code_AArch64_deps), cls.parser_AArch64 ) # set up machine models @@ -82,40 +95,64 @@ class TestSemanticTools(unittest.TestCase): cls.machine_model_a64fx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml") ) - cls.semantics_x86 = ISASemantics("x86") + cls.semantics_x86 = ISASemantics(cls.parser_x86_att) cls.semantics_csx = ArchSemantics( + cls.parser_x86_att, cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"), ) - cls.semantics_aarch64 = ISASemantics("aarch64") + cls.semantics_x86_intel = ISASemantics(cls.parser_x86_intel) + cls.semantics_csx_intel = ArchSemantics( + cls.parser_x86_intel, + cls.machine_model_csx, + path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"), + ) + cls.semantics_aarch64 = ISASemantics(cls.parser_AArch64) cls.semantics_tx2 = ArchSemantics( + cls.parser_AArch64, cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.semantics_a64fx = ArchSemantics( + cls.parser_AArch64, cls.machine_model_a64fx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.machine_model_zen = MachineModel(arch="zen1") + cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86) for i in range(len(cls.kernel_x86)): cls.semantics_csx.assign_src_dst(cls.kernel_x86[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i]) + cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86_memdep) for i in range(len(cls.kernel_x86_memdep)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i]) + cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86_long_LCD) for i in range(len(cls.kernel_x86_long_LCD)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i]) + cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel) + for i in range(len(cls.kernel_x86_intel)): + cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel[i]) + cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel[i]) + cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel_memdep) + for i in range(len(cls.kernel_x86_intel_memdep)): + cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel_memdep[i]) + cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel_memdep[i]) + cls.semantics_tx2.normalize_instruction_forms(cls.kernel_AArch64) for i in range(len(cls.kernel_AArch64)): cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i]) + cls.semantics_tx2.normalize_instruction_forms(cls.kernel_aarch64_memdep) for i in range(len(cls.kernel_aarch64_memdep)): cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i]) + cls.semantics_a64fx.normalize_instruction_forms(cls.kernel_aarch64_SVE) for i in range(len(cls.kernel_aarch64_SVE)): cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i]) cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i]) + cls.semantics_a64fx.normalize_instruction_forms(cls.kernel_aarch64_deps) for i in range(len(cls.kernel_aarch64_deps)): cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_deps[i]) cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_deps[i]) @@ -127,7 +164,7 @@ class TestSemanticTools(unittest.TestCase): def test_creation_by_name(self): try: tmp_mm = MachineModel(arch="CSX") - ArchSemantics(tmp_mm) + ArchSemantics(self.parser_x86_att, tmp_mm) except ValueError: self.fail() @@ -258,7 +295,7 @@ class TestSemanticTools(unittest.TestCase): test_mm_arm.add_port("dummyPort") # test dump of DB - with open("/dev/null", "w") as dev_null: + with open(os.devnull, "w") as dev_null: test_mm_x86.dump(stream=dev_null) test_mm_arm.dump(stream=dev_null) @@ -270,6 +307,14 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue("destination" in instruction_form.semantic_operands) self.assertTrue("src_dst" in instruction_form.semantic_operands) + def test_src_dst_assignment_x86_intel(self): + for instruction_form in self.kernel_x86_intel: + with self.subTest(instruction_form=instruction_form): + if instruction_form.semantic_operands is not None: + self.assertTrue("source" in instruction_form.semantic_operands) + self.assertTrue("destination" in instruction_form.semantic_operands) + self.assertTrue("src_dst" in instruction_form.semantic_operands) + def test_src_dst_assignment_AArch64(self): for instruction_form in self.kernel_AArch64: with self.subTest(instruction_form=instruction_form): @@ -288,6 +333,16 @@ class TestSemanticTools(unittest.TestCase): self.assertIsInstance(instruction_form.port_pressure, list) self.assertEqual(len(instruction_form.port_pressure), port_num) + def test_tp_lt_assignment_x86_intel(self): + self.assertTrue("ports" in self.machine_model_csx) + port_num = len(self.machine_model_csx["ports"]) + for instruction_form in self.kernel_x86_intel: + with self.subTest(instruction_form=instruction_form): + self.assertTrue(instruction_form.throughput is not None) + self.assertTrue(instruction_form.latency is not None) + self.assertIsInstance(instruction_form.port_pressure, list) + self.assertEqual(len(instruction_form.port_pressure), port_num) + def test_tp_lt_assignment_AArch64(self): self.assertTrue("ports" in self.machine_model_tx2) port_num = len(self.machine_model_tx2["ports"]) @@ -298,8 +353,7 @@ class TestSemanticTools(unittest.TestCase): self.assertIsInstance(instruction_form.port_pressure, list) self.assertEqual(len(instruction_form.port_pressure), port_num) - def test_optimal_throughput_assignment(self): - # x86 + def test_optimal_throughput_assignment_x86(self): kernel_fixed = deepcopy(self.kernel_x86) self.semantics_csx.add_semantics(kernel_fixed) self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0) @@ -312,11 +366,13 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(max(tp_optimal) <= max(tp_fixed)) # test multiple port assignment options test_mm_x86 = MachineModel(path_to_yaml=self._find_file("test_db_x86.yml")) - tmp_semantics = ArchSemantics(test_mm_x86) + tmp_semantics = ArchSemantics(self.parser_x86_att, test_mm_x86) tmp_code_1 = "fantasyinstr1 %rax, %rax\n" tmp_code_2 = "fantasyinstr1 %rax, %rax\nfantasyinstr2 %rbx, %rbx\n" - tmp_kernel_1 = self.parser_x86.parse_file(tmp_code_1) - tmp_kernel_2 = self.parser_x86.parse_file(tmp_code_2) + tmp_kernel_1 = self.parser_x86_att.parse_file(tmp_code_1) + tmp_kernel_2 = self.parser_x86_att.parse_file(tmp_code_2) + tmp_semantics.normalize_instruction_forms(tmp_kernel_1) + tmp_semantics.normalize_instruction_forms(tmp_kernel_2) tmp_semantics.add_semantics(tmp_kernel_1) tmp_semantics.add_semantics(tmp_kernel_2) tmp_semantics.assign_optimal_throughput(tmp_kernel_1) @@ -326,7 +382,36 @@ class TestSemanticTools(unittest.TestCase): self.assertEqual(k1i1_pp, [0.33, 0.0, 0.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.0, 0.0]) self.assertEqual(k2i1_pp, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]) - # arm + def test_optimal_throughput_assignment_x86_intel(self): + kernel_fixed = deepcopy(self.kernel_x86_intel) + self.semantics_csx_intel.add_semantics(kernel_fixed) + self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0) + + kernel_optimal = deepcopy(kernel_fixed) + self.semantics_csx_intel.assign_optimal_throughput(kernel_optimal) + tp_fixed = self.semantics_csx_intel.get_throughput_sum(kernel_fixed) + tp_optimal = self.semantics_csx_intel.get_throughput_sum(kernel_optimal) + self.assertNotEqual(tp_fixed, tp_optimal) + self.assertTrue(max(tp_optimal) <= max(tp_fixed)) + # test multiple port assignment options + test_mm_x86 = MachineModel(path_to_yaml=self._find_file("test_db_x86.yml")) + tmp_semantics = ArchSemantics(self.parser_x86_intel, test_mm_x86) + tmp_code_1 = "fantasyinstr1 rax, rax\n" + tmp_code_2 = "fantasyinstr1 rax, rax\nfantasyinstr2 rbx, rbx\n" + tmp_kernel_1 = self.parser_x86_intel.parse_file(tmp_code_1) + tmp_kernel_2 = self.parser_x86_intel.parse_file(tmp_code_2) + tmp_semantics.normalize_instruction_forms(tmp_kernel_1) + tmp_semantics.normalize_instruction_forms(tmp_kernel_2) + tmp_semantics.add_semantics(tmp_kernel_1) + tmp_semantics.add_semantics(tmp_kernel_2) + tmp_semantics.assign_optimal_throughput(tmp_kernel_1) + tmp_semantics.assign_optimal_throughput(tmp_kernel_2) + k1i1_pp = [round(x, 2) for x in tmp_kernel_1[0].port_pressure] + k2i1_pp = [round(x, 2) for x in tmp_kernel_2[0].port_pressure] + self.assertEqual(k1i1_pp, [0.33, 0.0, 0.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.0, 0.0]) + self.assertEqual(k2i1_pp, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]) + + def test_optimal_throughput_assignment_AArch64(self): kernel_fixed = deepcopy(self.kernel_AArch64) self.semantics_tx2.add_semantics(kernel_fixed) @@ -347,7 +432,9 @@ class TestSemanticTools(unittest.TestCase): # 3 # 5_______>9 # - dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) + dg = KernelDG( + self.kernel_x86, self.parser_x86_att, self.machine_model_csx, self.semantics_csx + ) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=3))), 1) self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=3)), 6) @@ -362,12 +449,44 @@ class TestSemanticTools(unittest.TestCase): with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation - dg.export_graph(filepath="/dev/null") + dg.export_graph(filepath=os.devnull) + + def test_kernelDG_x86_intel(self): + # + # 3 + # \___>5__>6 + # / / + # 4 / + # / + # 5.1 + # + dg = KernelDG( + self.kernel_x86_intel, + self.parser_x86_intel, + self.machine_model_csx, + self.semantics_csx_intel, + ) + self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=3))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=3)), 5) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=4))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4)), 5) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5)), 6) + self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5.1))), 1) + self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5.1)), 5) + self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=6)), []) + self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=7)), []) + self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=8)), []) + with self.assertRaises(ValueError): + dg.get_dependent_instruction_forms() + # test dot creation + dg.export_graph(filepath=os.devnull) def test_memdependency_x86(self): dg = KernelDG( self.kernel_x86_memdep, - self.parser_x86, + self.parser_x86_att, self.machine_model_csx, self.semantics_csx, ) @@ -377,7 +496,22 @@ class TestSemanticTools(unittest.TestCase): with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation - dg.export_graph(filepath="/dev/null") + dg.export_graph(filepath=os.devnull) + + def test_memdependency_x86_intel(self): + dg = KernelDG( + self.kernel_x86_intel_memdep, + self.parser_x86_intel, + self.machine_model_csx, + self.semantics_csx_intel, + ) + self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) + self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8}) + self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12}) + with self.assertRaises(ValueError): + dg.get_dependent_instruction_forms() + # test dot creation + dg.export_graph(filepath=os.devnull) def test_kernelDG_AArch64(self): dg = KernelDG( @@ -408,7 +542,7 @@ class TestSemanticTools(unittest.TestCase): with self.assertRaises(ValueError): dg.get_dependent_instruction_forms() # test dot creation - dg.export_graph(filepath="/dev/null") + dg.export_graph(filepath=os.devnull) def test_kernelDG_SVE(self): KernelDG( @@ -421,11 +555,13 @@ class TestSemanticTools(unittest.TestCase): def test_mops_deps_AArch64(self): self.kernel_mops_1 = reduce_to_section( - self.parser_AArch64.parse_file(self.mops_1_code), "aarch64" + self.parser_AArch64.parse_file(self.mops_1_code), self.parser_AArch64 ) self.kernel_mops_2 = reduce_to_section( - self.parser_AArch64.parse_file(self.mops_2_code), "aarch64" + self.parser_AArch64.parse_file(self.mops_2_code), self.parser_AArch64 ) + self.semantics_a64fx.normalize_instruction_forms(self.kernel_mops_1) + self.semantics_a64fx.normalize_instruction_forms(self.kernel_mops_2) for i in range(len(self.kernel_mops_1)): self.semantics_a64fx.assign_src_dst(self.kernel_mops_1[i]) for i in range(len(self.kernel_mops_2)): @@ -472,11 +608,15 @@ class TestSemanticTools(unittest.TestCase): path_to_yaml=self._find_file("hidden_load_machine_model.yml") ) self.assertTrue(machine_model_hld.has_hidden_loads()) - semantics_hld = ArchSemantics(machine_model_hld) - kernel_hld = self.parser_x86.parse_file(self.code_x86) - kernel_hld_2 = self.parser_x86.parse_file(self.code_x86) - kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)[-3:] - kernel_hld_3 = self.parser_x86.parse_file(self.code_x86)[5:8] + semantics_hld = ArchSemantics(self.parser_x86_att, machine_model_hld) + kernel_hld = self.parser_x86_att.parse_file(self.code_x86) + kernel_hld_2 = self.parser_x86_att.parse_file(self.code_x86) + kernel_hld_2 = self.parser_x86_att.parse_file(self.code_x86)[-3:] + kernel_hld_3 = self.parser_x86_att.parse_file(self.code_x86)[5:8] + + semantics_hld.normalize_instruction_forms(kernel_hld) + semantics_hld.normalize_instruction_forms(kernel_hld_2) + semantics_hld.normalize_instruction_forms(kernel_hld_3) semantics_hld.add_semantics(kernel_hld) semantics_hld.add_semantics(kernel_hld_2) @@ -490,7 +630,9 @@ class TestSemanticTools(unittest.TestCase): self.assertEqual(num_hidden_loads_3, 1) def test_cyclic_dag(self): - dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) + dg = KernelDG( + self.kernel_x86, self.parser_x86_att, self.machine_model_csx, self.semantics_csx + ) dg.dg.add_edge(100, 101, latency=1.0) dg.dg.add_edge(101, 102, latency=2.0) dg.dg.add_edge(102, 100, latency=3.0) @@ -555,7 +697,42 @@ class TestSemanticTools(unittest.TestCase): def test_loop_carried_dependency_x86(self): lcd_id = "8" lcd_id2 = "5" - dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx) + dg = KernelDG( + self.kernel_x86, self.parser_x86_att, self.machine_model_csx, self.semantics_csx + ) + lc_deps = dg.get_loopcarried_dependencies() + # self.assertEqual(len(lc_deps), 2) + # ID 8 + self.assertEqual( + lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"] + ) + self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1) + self.assertEqual( + lc_deps[lcd_id]["dependencies"][0][0], + dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"], + ) + # w/ flag dependencies: ID 9 w/ len=2 + # w/o flag dependencies: ID 5 w/ len=1 + # TODO discuss + self.assertEqual( + lc_deps[lcd_id2]["root"], + dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"], + ) + self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1) + self.assertEqual( + lc_deps[lcd_id2]["dependencies"][0][0], + dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"], + ) + + def test_loop_carried_dependency_x86_intel(self): + lcd_id = "8" + lcd_id2 = "7" + dg = KernelDG( + self.kernel_x86_intel, + self.parser_x86_intel, + self.machine_model_csx, + self.semantics_csx_intel, + ) lc_deps = dg.get_loopcarried_dependencies() # self.assertEqual(len(lc_deps), 2) # ID 8 @@ -584,7 +761,7 @@ class TestSemanticTools(unittest.TestCase): start_time = time.perf_counter() KernelDG( self.kernel_x86_long_LCD, - self.parser_x86, + self.parser_x86_att, self.machine_model_csx, self.semantics_x86, timeout=10, @@ -594,7 +771,7 @@ class TestSemanticTools(unittest.TestCase): start_time = time.perf_counter() KernelDG( self.kernel_x86_long_LCD, - self.parser_x86, + self.parser_x86_att, self.machine_model_csx, self.semantics_x86, timeout=2, @@ -608,22 +785,32 @@ class TestSemanticTools(unittest.TestCase): def test_is_read_is_written_x86(self): # independent form HW model - dag = KernelDG(self.kernel_x86, self.parser_x86, None, None) + dag = KernelDG(self.kernel_x86, self.parser_x86_att, None, None) reg_rcx = RegisterOperand(name="rcx") reg_ymm1 = RegisterOperand(name="ymm1") - instr_form_r_c = self.parser_x86.parse_line("vmovsd %xmm0, (%r15,%rcx,8)") + instr_form_r_c = self.parser_x86_att.parse_line("vmovsd %xmm0, (%r15,%rcx,8)") + self.semantics_csx.normalize_instruction_form(instr_form_r_c) self.semantics_csx.assign_src_dst(instr_form_r_c) - instr_form_non_r_c = self.parser_x86.parse_line("movl %xmm0, (%r15,%rax,8)") + instr_form_non_r_c = self.parser_x86_att.parse_line("movl %xmm0, (%r15,%rax,8)") + self.semantics_csx.normalize_instruction_form(instr_form_non_r_c) self.semantics_csx.assign_src_dst(instr_form_non_r_c) - instr_form_w_c = self.parser_x86.parse_line("movi $0x05ACA, %rcx") + instr_form_w_c = self.parser_x86_att.parse_line("movi $0x05ACA, %rcx") + self.semantics_csx.normalize_instruction_form(instr_form_w_c) self.semantics_csx.assign_src_dst(instr_form_w_c) - instr_form_rw_ymm_1 = self.parser_x86.parse_line("vinsertf128 $0x1, %xmm1, %ymm0, %ymm1") + instr_form_rw_ymm_1 = self.parser_x86_att.parse_line( + "vinsertf128 $0x1, %xmm1, %ymm0, %ymm1" + ) + self.semantics_csx.normalize_instruction_form(instr_form_rw_ymm_1) self.semantics_csx.assign_src_dst(instr_form_rw_ymm_1) - instr_form_rw_ymm_2 = self.parser_x86.parse_line("vinsertf128 $0x1, %xmm0, %ymm1, %ymm1") + instr_form_rw_ymm_2 = self.parser_x86_att.parse_line( + "vinsertf128 $0x1, %xmm0, %ymm1, %ymm1" + ) + self.semantics_csx.normalize_instruction_form(instr_form_rw_ymm_2) self.semantics_csx.assign_src_dst(instr_form_rw_ymm_2) - instr_form_r_ymm = self.parser_x86.parse_line("vmovapd %ymm1, %ymm0") + instr_form_r_ymm = self.parser_x86_att.parse_line("vmovapd %ymm1, %ymm0") + self.semantics_csx.normalize_instruction_form(instr_form_r_ymm) self.semantics_csx.assign_src_dst(instr_form_r_ymm) self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c)) self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c)) @@ -637,6 +824,43 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm)) + def test_is_read_is_written_x86_intel(self): + # independent form HW model + dag = KernelDG(self.kernel_x86_intel, self.parser_x86_intel, None, None) + reg_rcx = RegisterOperand(name="rcx") + reg_ymm1 = RegisterOperand(name="ymm1") + + instr_form_r_c = self.parser_x86_intel.parse_line("vmovsd QWORD PTR [r15+rcx*8], xmm0") + self.semantics_csx_intel.normalize_instruction_form(instr_form_r_c) + self.semantics_csx_intel.assign_src_dst(instr_form_r_c) + instr_form_non_r_c = self.parser_x86_intel.parse_line("mov QWORD PTR [r15+rax*8], xmm0") + self.semantics_csx_intel.normalize_instruction_form(instr_form_non_r_c) + self.semantics_csx_intel.assign_src_dst(instr_form_non_r_c) + instr_form_w_c = self.parser_x86_intel.parse_line("mov rcx, H05ACA") + self.semantics_csx_intel.normalize_instruction_form(instr_form_w_c) + self.semantics_csx_intel.assign_src_dst(instr_form_w_c) + + instr_form_rw_ymm_1 = self.parser_x86_intel.parse_line("vinsertf128 ymm1, ymm0, xmm1, 1") + self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_ymm_1) + self.semantics_csx_intel.assign_src_dst(instr_form_rw_ymm_1) + instr_form_rw_ymm_2 = self.parser_x86_intel.parse_line("vinsertf128 ymm1, ymm1, xmm0, 1") + self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_ymm_2) + self.semantics_csx_intel.assign_src_dst(instr_form_rw_ymm_2) + instr_form_r_ymm = self.parser_x86_intel.parse_line("vmovapd ymm0, ymm1") + self.semantics_csx_intel.normalize_instruction_form(instr_form_r_ymm) + self.semantics_csx_intel.assign_src_dst(instr_form_r_ymm) + self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c)) + self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c)) + self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c)) + self.assertTrue(dag.is_written(reg_rcx, instr_form_w_c)) + self.assertFalse(dag.is_written(reg_rcx, instr_form_r_c)) + self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_1)) + self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_2)) + self.assertTrue(dag.is_read(reg_ymm1, instr_form_r_ymm)) + self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1)) + self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) + self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm)) + def test_is_read_is_written_AArch64(self): # independent form HW model dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None, None) @@ -649,20 +873,28 @@ class TestSemanticTools(unittest.TestCase): regs_gp = [reg_w1, reg_x1] instr_form_r_1 = self.parser_AArch64.parse_line("stp q1, q3, [x12, #192]") + self.semantics_tx2.normalize_instruction_form(instr_form_r_1) self.semantics_tx2.assign_src_dst(instr_form_r_1) instr_form_r_2 = self.parser_AArch64.parse_line("fadd v2.2d, v1.2d, v0.2d") + self.semantics_tx2.normalize_instruction_form(instr_form_r_2) self.semantics_tx2.assign_src_dst(instr_form_r_2) instr_form_w_1 = self.parser_AArch64.parse_line("ldr d1, [x1, #:got_lo12:q2c]") + self.semantics_tx2.normalize_instruction_form(instr_form_w_1) self.semantics_tx2.assign_src_dst(instr_form_w_1) instr_form_non_w_1 = self.parser_AArch64.parse_line("ldr x1, [x1, #:got_lo12:q2c]") + self.semantics_tx2.normalize_instruction_form(instr_form_non_w_1) self.semantics_tx2.assign_src_dst(instr_form_non_w_1) instr_form_rw_1 = self.parser_AArch64.parse_line("fmul v1.2d, v1.2d, v0.2d") + self.semantics_tx2.normalize_instruction_form(instr_form_rw_1) self.semantics_tx2.assign_src_dst(instr_form_rw_1) instr_form_rw_2 = self.parser_AArch64.parse_line("ldp q2, q4, [x1, #64]!") + self.semantics_tx2.normalize_instruction_form(instr_form_rw_2) self.semantics_tx2.assign_src_dst(instr_form_rw_2) instr_form_rw_3 = self.parser_AArch64.parse_line("str x4, [x1], #64") + self.semantics_tx2.normalize_instruction_form(instr_form_rw_3) self.semantics_tx2.assign_src_dst(instr_form_rw_3) instr_form_non_rw_1 = self.parser_AArch64.parse_line("adds x1, x11") + self.semantics_tx2.normalize_instruction_form(instr_form_non_rw_1) self.semantics_tx2.assign_src_dst(instr_form_non_rw_1) for reg in regs: diff --git a/validation/kernels/striad.c b/validation/kernels/striad.c index 49b1feb..3c7ab43 100644 --- a/validation/kernels/striad.c +++ b/validation/kernels/striad.c @@ -7,6 +7,12 @@ #endif #endif +#define USE_IACA 0 + +#if USE_IACA +#include "intel\iacaMarks.h" +#endif + #define DTYPE double void dummy(void *); @@ -15,9 +21,15 @@ void kernel(DTYPE* a, DTYPE* b, DTYPE* c, DTYPE* d, const int repeat, const int #ifndef MAIN { for(int r=0; r < repeat; r++) { +#if USE_IACA + IACA_VC64_START +#endif for(int i=0; i