Merge pull request #112 from pleroy/Intel

Add support for the Intel syntax produced by MSVC and ICC
This commit is contained in:
Jan
2025-03-17 10:20:40 +01:00
committed by GitHub
34 changed files with 2915 additions and 497 deletions

View File

@@ -14,7 +14,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.x"
- name: Install Python dependencies
run: python -m pip install black flake8

View File

@@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
@@ -18,11 +18,13 @@ jobs:
run: |
python -m pip install wheel
python -m pip install --upgrade pip
python -m pip install setuptools
python -m pip install codecov requests
python -m pip install bs4
sudo apt-get -y install graphviz libgraphviz-dev pkg-config
python -m pip install pygraphviz
python -m pip install "kerncraft>=0.8.16"
#python -m pip install "kerncraft>=0.8.16"
python -m pip install git+https://github.com/RRZE-HPC/kerncraft.git@7caff4e2ecdbef595013041ba0131e37ed33c72c
python -m pip install -e .
- name: Test
run: |

4
.gitignore vendored
View File

@@ -109,3 +109,7 @@ venv.bak/
# mypy
.mypy_cache/
# Visual Studio
.vs
x64/

View File

@@ -111,7 +111,8 @@ def extract_model(tree, arch, skip_mem=True):
print("Skipping...", file=sys.stderr)
return None
mm = MachineModel(isa=isa)
parser = get_parser(isa)
# The model uses the AT&T syntax.
parser = get_parser(isa, "ATT")
for instruction_tag in tree.findall(".//instruction"):
ignore = False

View File

@@ -11,7 +11,7 @@ from ruamel.yaml import YAML
from osaca.db_interface import import_benchmark_output, sanity_check
from osaca.frontend import Frontend
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT, ParserX86Intel
from osaca.semantics import (
INSTR_FLAGS,
ArchSemantics,
@@ -47,6 +47,10 @@ DEFAULT_ARCHS = {
"aarch64": "V2",
"x86": "SPR",
}
SUPPORTED_SYNTAXES = [
"ATT",
"INTEL",
]
# Stolen from pip
@@ -108,6 +112,12 @@ def create_parser(parser=None):
"ZEN4, TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a "
"default uarch for x86/AArch64.",
)
parser.add_argument(
"--syntax",
type=str,
help="Define the assembly syntax (ATT, Intel) for x86. If no syntax is given, OSACA "
"tries to determine automatically the syntax to use.",
)
parser.add_argument(
"--fixed",
action="store_true",
@@ -232,6 +242,14 @@ def check_arguments(args, parser):
parser.error(
"Microarchitecture not supported. Please see --help for all valid architecture codes."
)
if args.syntax and args.arch and MachineModel.get_isa_for_arch(args.arch) != "x86":
parser.error("Syntax can only be explicitly specified for an x86 microarchitecture")
if args.syntax:
args.syntax = args.syntax.upper()
if args.syntax not in SUPPORTED_SYNTAXES:
parser.error(
"Assembly syntax not supported. Please see --help for all valid assembly syntaxes."
)
if "import_data" in args and args.import_data not in supported_import_files:
parser.error(
"Microbenchmark not supported for data import. Please see --help for all valid "
@@ -310,30 +328,56 @@ def inspect(args, output_file=sys.stdout):
code = args.file.read()
# Detect ISA if necessary
arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
print_arch_warning = False if args.arch else True
isa = MachineModel.get_isa_for_arch(arch)
detected_isa, detected_syntax = BaseParser.detect_ISA(code)
detected_arch = DEFAULT_ARCHS[detected_isa]
print_arch_warning = not args.arch
verbose = args.verbose
ignore_unknown = args.ignore_unknown
# Parse file
parser = get_asm_parser(arch)
try:
parsed_code = parser.parse_file(code)
except Exception as e:
# probably the wrong parser based on heuristic
if args.arch is None:
# change ISA and try again
arch = (
DEFAULT_ARCHS["x86"]
if BaseParser.detect_ISA(code) == "aarch64"
else DEFAULT_ARCHS["aarch64"]
)
isa = MachineModel.get_isa_for_arch(arch)
parser = get_asm_parser(arch)
# If the arch/syntax is explicitly specified, that's the only thing we'll try. Otherwise, we'll
# look at all the possible archs/syntaxes, but with our detected arch/syntax last in the list,
# thus tried first.
if args.arch:
archs_to_try = [args.arch]
else:
archs_to_try = list(DEFAULT_ARCHS.values())
archs_to_try.remove(detected_arch)
archs_to_try.append(detected_arch)
if args.syntax:
syntaxes_to_try = [args.syntax]
else:
syntaxes_to_try = SUPPORTED_SYNTAXES + [None]
syntaxes_to_try.remove(detected_syntax)
syntaxes_to_try.append(detected_syntax)
# Filter the cross-product of archs and syntaxes to eliminate the combinations that don't make
# sense.
combinations_to_try = [
(arch, syntax)
for arch in archs_to_try
for syntax in syntaxes_to_try
if (syntax is not None) == (MachineModel.get_isa_for_arch(arch) == "x86")
]
# Parse file.
message = ""
single_combination = len(combinations_to_try) == 1
while True:
arch, syntax = combinations_to_try.pop()
parser = get_asm_parser(arch, syntax)
try:
parsed_code = parser.parse_file(code)
else:
raise e
break
except Exception as e:
message += f"\nWith arch {arch} and syntax {syntax} got error: {e}."
# Either the wrong parser based on heuristic, or a bona fide syntax error (or
# unsupported syntax). For ease of debugging, we emit the entire exception trace if
# we tried a single arch/syntax combination. If we tried multiple combinations, we
# don't emit the traceback as it would apply to the latest combination tried, which is
# probably the less interesting.
if not combinations_to_try:
raise SyntaxError(message) from e if single_combination else None
# Reduce to marked kernel or chosen section and add semantics
if args.lines:
@@ -341,13 +385,14 @@ def inspect(args, output_file=sys.stdout):
kernel = [line for line in parsed_code if line.line_number in line_range]
print_length_warning = False
else:
kernel = reduce_to_section(parsed_code, isa)
kernel = reduce_to_section(parsed_code, parser)
# Print warning if kernel has no markers and is larger than threshold (100)
print_length_warning = (
True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
)
machine_model = MachineModel(arch=arch)
semantics = ArchSemantics(machine_model)
semantics = ArchSemantics(parser, machine_model)
semantics.normalize_instruction_forms(kernel)
semantics.add_semantics(kernel)
# Do optimal schedule for kernel throughput if wished
if not args.fixed:
@@ -417,7 +462,7 @@ def run(args, output_file=sys.stdout):
@lru_cache()
def get_asm_parser(arch) -> BaseParser:
def get_asm_parser(arch, syntax="ATT") -> BaseParser:
"""
Helper function to create the right parser for a specific architecture.
@@ -427,7 +472,7 @@ def get_asm_parser(arch) -> BaseParser:
"""
isa = MachineModel.get_isa_for_arch(arch)
if isa == "x86":
return ParserX86ATT()
return ParserX86ATT() if syntax == "ATT" else ParserX86Intel()
elif isa == "aarch64":
return ParserAArch64()

View File

@@ -1,11 +1,13 @@
"""
Collection of parsers supported by OSACA.
Only the parser below will be exported, so please add new parsers to __all__.
Only the parsers below will be exported, so please add new parsers to __all__.
"""
from .base_parser import BaseParser
from .parser_x86 import ParserX86
from .parser_x86att import ParserX86ATT
from .parser_x86intel import ParserX86Intel
from .parser_AArch64 import ParserAArch64
from .instruction_form import InstructionForm
from .operand import Operand
@@ -14,15 +16,17 @@ __all__ = [
"Operand",
"InstructionForm",
"BaseParser",
"ParserX86",
"ParserX86ATT",
"ParserX86Intel",
"ParserAArch64",
"get_parser",
]
def get_parser(isa):
def get_parser(isa, syntax="ATT"):
if isa.lower() == "x86":
return ParserX86ATT()
return ParserX86ATT() if syntax.upper() == "ATT" else ParserX86Intel()
elif isa.lower() == "aarch64":
return ParserAArch64()
else:

View File

@@ -25,20 +25,57 @@ class BaseParser(object):
self.construct_parser()
self._parser_constructed = True
def isa(self):
# Done in derived classes
raise NotImplementedError
# The marker functions return lists of `InstructionForm` that are used to find the IACA markers
# in the parsed code. In addition to just a list, the marker may have a structure like
# [I1, [I2, I3], I4, ...] where the nested list indicates that at least one of I2 and I3 must
# match the second instruction in the fragment of parsed code.
# If an instruction form is a `DirectiveOperand`, the match may happen over several directive
# operands in the parsed code, provided that the directives have the same name and the
# parameters are in sequence with respect to the pattern. This provides an easy way to describe
# a sequence of bytes irrespective of the way it was grouped in the assembly source.
# Note that markers must be matched *before* normalization.
def start_marker(self):
# Done in derived classes
raise NotImplementedError
def end_marker(self):
# Done in derived classes
raise NotImplementedError
# Performs all the normalization needed to match the instruction to the ISO/arch model. This
# method must set the `normalized` property of the instruction and must be idempotent.
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
raise NotImplementedError
@staticmethod
def detect_ISA(file_content):
"""Detect the ISA of the assembly based on the used registers and return the ISA code."""
"""
Detect the ISA of the assembly based on the used registers and return the ISA code.
:param str file_content: assembly code.
:return: a tuple isa, syntax describing the architecture and the assembly syntax,
if appropriate. If there is no notion of syntax, the second element is None.
"""
# Check for the amount of registers in the code to determine the ISA
# 1) Check for xmm, ymm, zmm, rax, rbx, rcx, and rdx registers in x86
# AT&T syntax. There is a % before each register name.
heuristics_x86ATT = [r"%[xyz]mm[0-9]", r"%[er][abcd]x[0-9]"]
# 2) check for v and z vector registers and x/w general-purpose registers
# 2) Same as above, but for the Intel syntax. There is no % before the register names.
heuristics_x86Intel = [r"[^%][xyz]mm[0-9]", r"[^%][er][abcd]x[0-9]"]
# 3) check for v and z vector registers and x/w general-purpose registers
heuristics_aarch64 = [r"[vz][0-9][0-9]?\.[0-9][0-9]?[bhsd]", r"[wx][0-9]"]
matches = {"x86": 0, "aarch64": 0}
matches = {("x86", "ATT"): 0, ("x86", "INTEL"): 0, ("aarch64", None): 0}
for h in heuristics_x86ATT:
matches["x86"] += len(re.findall(h, file_content))
matches[("x86", "ATT")] += len(re.findall(h, file_content))
for h in heuristics_x86Intel:
matches[("x86", "INTEL")] += len(re.findall(h, file_content))
for h in heuristics_aarch64:
matches["aarch64"] += len(re.findall(h, file_content))
matches[("aarch64", None)] += len(re.findall(h, file_content))
return max(matches.items(), key=operator.itemgetter(1))[0]
@@ -94,6 +131,14 @@ class BaseParser(object):
def get_full_reg_name(self, register):
raise NotImplementedError
# Must be called on a *normalized* instruction.
def get_regular_source_operands(self, instruction_form):
raise NotImplementedError
# Must be called on a *normalized* instruction.
def get_regular_destination_operands(self, instruction_form):
raise NotImplementedError
def normalize_imd(self, imd):
raise NotImplementedError

View File

@@ -41,3 +41,12 @@ class IdentifierOperand(Operand):
def __repr__(self):
return self.__str__()
def __eq__(self, other):
if isinstance(other, IdentifierOperand):
return (
self._name == other._name
and self._offset == other._offset
and self._relocation == other._relocation
)
return False

View File

@@ -19,6 +19,7 @@ class InstructionForm:
port_pressure=None,
operation=None,
breaks_dependency_on_equal_operands=False,
normalized=False,
):
self._mnemonic = mnemonic
self._operands = operands
@@ -33,6 +34,7 @@ class InstructionForm:
self._operation = operation
self._uops = uops
self._breaks_dependency_on_equal_operands = breaks_dependency_on_equal_operands
self._normalized = normalized
self._latency = latency
self._throughput = throughput
self._latency_cp = []
@@ -42,6 +44,10 @@ class InstructionForm:
self._port_uops = []
self._flags = []
def check_normalized(self):
if not self._normalized:
raise AssertionError("Unnormalized instruction")
@property
def semantic_operands(self):
return self._semantic_operands
@@ -114,6 +120,10 @@ class InstructionForm:
def breaks_dependency_on_equal_operands(self):
return self._breaks_dependency_on_equal_operands
@property
def normalized(self):
return self._normalized
@semantic_operands.setter
def semantic_operands(self, semantic_operands):
self._semantic_operands = semantic_operands
@@ -142,6 +152,10 @@ class InstructionForm:
def breaks_dependency_on_equal_operands(self, boolean):
self._breaks_dependency_on_equal_operands = boolean
@normalized.setter
def normalized(self, normalized):
self._normalized = normalized
@mnemonic.setter
def mnemonic(self, mnemonic):
self._mnemonic = mnemonic

View File

@@ -20,3 +20,8 @@ class LabelOperand(Operand):
def __repr__(self):
return self.__str__()
def __eq__(self, other):
if isinstance(other, LabelOperand):
return self._name == other._name
return False

View File

@@ -15,6 +15,7 @@ class MemoryOperand(Operand):
pre_indexed=False,
post_indexed=False,
indexed_val=None,
data_type=None,
src=None,
dst=None,
source=False,
@@ -30,6 +31,7 @@ class MemoryOperand(Operand):
self._pre_indexed = pre_indexed
self._post_indexed = post_indexed
self._indexed_val = indexed_val
self._data_type = data_type
# type of register we store from (`src`) or load to (`dst`)
self._src = src
self._dst = dst
@@ -74,6 +76,14 @@ class MemoryOperand(Operand):
def indexed_val(self):
return self._indexed_val
@property
def data_type(self):
return self._data_type
@data_type.setter
def data_type(self, data_type):
self._data_type = data_type
@property
def src(self):
return self._src

View File

@@ -26,7 +26,53 @@ class ParserAArch64(BaseParser):
def __init__(self):
super().__init__()
self.isa = "aarch64"
def isa(self):
return "aarch64"
def start_marker(self):
return [
InstructionForm(
mnemonic="mov",
operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=111)],
),
InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"])
),
]
def end_marker(self):
return [
InstructionForm(
mnemonic="mov",
operands=[RegisterOperand(name="1", prefix="x"), ImmediateOperand(value=222)],
),
InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["213", "3", "32", "31"])
),
]
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
"""
If the instruction doesn't exist in the machine model, normalize it by dropping the shape
suffix.
"""
if instruction_form.normalized:
return
instruction_form.normalized = True
mnemonic = instruction_form.mnemonic
if not mnemonic:
return
model = arch_model.get_instruction(mnemonic, instruction_form.operands)
if not model:
if "." in mnemonic:
# Check for instruction without shape/cc suffix.
suffix_start = mnemonic.index(".")
mnemonic = mnemonic[:suffix_start]
model = arch_model.get_instruction(mnemonic, instruction_form.operands)
if model:
instruction_form.mnemonic = mnemonic
def construct_parser(self):
"""Create parser for ARM AArch64 ISA."""
@@ -592,6 +638,21 @@ class ParserAArch64(BaseParser):
name += "[" + str(register.index) + "]"
return name
def get_regular_source_operands(self, instruction_form):
"""Get source operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume it is a source operand
if len(instruction_form.operands) == 1:
return [instruction_form.operands[0]]
return [op for op in instruction_form.operands[1:]]
def get_regular_destination_operands(self, instruction_form):
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume no destination
if len(instruction_form.operands) == 1:
return []
# return first operand
return instruction_form.operands[:1]
def normalize_imd(self, imd):
"""Normalize immediate to decimal based representation"""
if isinstance(imd, IdentifierOperand):

123
osaca/parser/parser_x86.py Normal file
View File

@@ -0,0 +1,123 @@
import re
import string
from osaca.parser import BaseParser
class ParserX86(BaseParser):
_instance = None
# Singleton pattern, as this is created very many times.
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserX86, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
def isa(self):
return "x86"
def is_reg_dependend_of(self, reg_a, reg_b):
"""Check if ``reg_a`` is dependent on ``reg_b``"""
reg_a_name = reg_a.name.upper()
reg_b_name = reg_b.name.upper()
# Check if they are the same registers
if reg_a_name == reg_b_name:
return True
# Check vector registers first
if self.is_vector_register(reg_a):
if self.is_vector_register(reg_b):
if reg_a_name[1:] == reg_b_name[1:]:
# Registers in the same vector space
return True
return False
# Check basic GPRs
gpr_groups = {
"A": ["RAX", "EAX", "AX", "AH", "AL"],
"B": ["RBX", "EBX", "BX", "BH", "BL"],
"C": ["RCX", "ECX", "CX", "CH", "CL"],
"D": ["RDX", "EDX", "DX", "DH", "DL"],
"SP": ["RSP", "ESP", "SP", "SPL"],
"SRC": ["RSI", "ESI", "SI", "SIL"],
"DST": ["RDI", "EDI", "DI", "DIL"],
}
if self.is_basic_gpr(reg_a):
if self.is_basic_gpr(reg_b):
for dep_group in gpr_groups.values():
if reg_a_name in dep_group:
if reg_b_name in dep_group:
return True
return False
# Check other GPRs
ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name)
mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name)
if ma and mb and ma.group(1) == mb.group(1):
return True
# No dependencies
return False
def is_basic_gpr(self, register):
"""Check if register is a basic general purpose register (ebi, rax, ...)"""
if any(char.isdigit() for char in register.name) or any(
register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"]
):
return False
return True
def is_gpr(self, register):
"""Check if register is a general purpose register"""
if register is None:
return False
if self.is_basic_gpr(register):
return True
return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE)
def is_vector_register(self, register):
"""Check if register is a vector register"""
if register is None or register.name is None:
return False
if register.name.rstrip(string.digits).lower() in [
"mm",
"xmm",
"ymm",
"zmm",
]:
return True
return False
def get_reg_type(self, register):
"""Get register type"""
if register is None:
return False
if self.is_gpr(register):
return "gpr"
elif self.is_vector_register(register):
return register.name.rstrip(string.digits).lower()
raise ValueError
def is_flag_dependend_of(self, flag_a, flag_b):
"""Check if ``flag_a`` is dependent on ``flag_b``"""
# we assume flags are independent of each other, e.g., CF can be read while ZF gets written
# TODO validate this assumption
return flag_a.name == flag_b.name
def get_regular_source_operands(self, instruction_form):
"""Get source operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume it is a source operand
if len(instruction_form.operands) == 1:
return [instruction_form.operands[0]]
# return all but last operand
return [op for op in instruction_form.operands[0:-1]]
def get_regular_destination_operands(self, instruction_form):
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume no destination
if len(instruction_form.operands) == 1:
return []
# return last operand
return instruction_form.operands[-1:]

View File

@@ -1,11 +1,8 @@
#!/usr/bin/env python3
import string
import re
import pyparsing as pp
from osaca.parser import BaseParser
from osaca.parser import ParserX86
from osaca.parser.instruction_form import InstructionForm
from osaca.parser.directive import DirectiveOperand
from osaca.parser.memory import MemoryOperand
@@ -15,8 +12,9 @@ from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand
class ParserX86ATT(BaseParser):
class ParserX86ATT(ParserX86):
_instance = None
GAS_SUFFIXES = "bswlqt"
# Singelton pattern, as this is created very many times
def __new__(cls):
@@ -26,7 +24,61 @@ class ParserX86ATT(BaseParser):
def __init__(self):
super().__init__()
self.isa = "x86"
def start_marker(self):
return [
[
InstructionForm(
mnemonic="mov",
operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")],
),
InstructionForm(
mnemonic="movl",
operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")],
),
],
InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])
),
]
def end_marker(self):
return [
[
InstructionForm(
mnemonic="mov",
operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")],
),
InstructionForm(
mnemonic="movl",
operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")],
),
],
InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])
),
]
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
"""
If the instruction doesn't exist in the machine model, normalize it by dropping the GAS
suffix.
"""
if instruction_form.normalized:
return
instruction_form.normalized = True
mnemonic = instruction_form.mnemonic
if not mnemonic:
return
model = arch_model.get_instruction(mnemonic, instruction_form.operands)
if not model:
# Check for instruction without GAS suffix.
if mnemonic[-1] in self.GAS_SUFFIXES:
mnemonic = mnemonic[:-1]
model = arch_model.get_instruction(mnemonic, instruction_form.operands)
if model:
instruction_form.mnemonic = mnemonic
def construct_parser(self):
"""Create parser for x86 AT&T ISA."""
@@ -253,10 +305,10 @@ class ParserX86ATT(BaseParser):
if result is None:
try:
result = self.parse_instruction(line)
except pp.ParseException:
except pp.ParseException as e:
raise ValueError(
"Could not parse instruction on line {}: {!r}".format(line_number, line)
)
) from e
instruction_form.mnemonic = result.mnemonic
instruction_form.operands = result.operands
instruction_form.comment = result.comment
@@ -393,90 +445,3 @@ class ParserX86ATT(BaseParser):
return imd.value
# identifier
return imd
def is_flag_dependend_of(self, flag_a, flag_b):
"""Check if ``flag_a`` is dependent on ``flag_b``"""
# we assume flags are independent of each other, e.g., CF can be read while ZF gets written
# TODO validate this assumption
return flag_a.name == flag_b.name
def is_reg_dependend_of(self, reg_a, reg_b):
"""Check if ``reg_a`` is dependent on ``reg_b``"""
reg_a_name = reg_a.name.upper()
reg_b_name = reg_b.name.upper()
# Check if they are the same registers
if reg_a_name == reg_b_name:
return True
# Check vector registers first
if self.is_vector_register(reg_a):
if self.is_vector_register(reg_b):
if reg_a_name[1:] == reg_b_name[1:]:
# Registers in the same vector space
return True
return False
# Check basic GPRs
gpr_groups = {
"A": ["RAX", "EAX", "AX", "AH", "AL"],
"B": ["RBX", "EBX", "BX", "BH", "BL"],
"C": ["RCX", "ECX", "CX", "CH", "CL"],
"D": ["RDX", "EDX", "DX", "DH", "DL"],
"SP": ["RSP", "ESP", "SP", "SPL"],
"SRC": ["RSI", "ESI", "SI", "SIL"],
"DST": ["RDI", "EDI", "DI", "DIL"],
}
if self.is_basic_gpr(reg_a):
if self.is_basic_gpr(reg_b):
for dep_group in gpr_groups.values():
if reg_a_name in dep_group:
if reg_b_name in dep_group:
return True
return False
# Check other GPRs
ma = re.match(r"R([0-9]+)[DWB]?", reg_a_name)
mb = re.match(r"R([0-9]+)[DWB]?", reg_b_name)
if ma and mb and ma.group(1) == mb.group(1):
return True
# No dependencies
return False
def is_basic_gpr(self, register):
"""Check if register is a basic general purpose register (ebi, rax, ...)"""
if any(char.isdigit() for char in register.name) or any(
register.name.lower().startswith(x) for x in ["mm", "xmm", "ymm", "zmm"]
):
return False
return True
def is_gpr(self, register):
"""Check if register is a general purpose register"""
if register is None:
return False
if self.is_basic_gpr(register):
return True
return re.match(r"R([0-9]+)[DWB]?", register.name, re.IGNORECASE)
def is_vector_register(self, register):
"""Check if register is a vector register"""
if register is None or register.name is None:
return False
if register.name.rstrip(string.digits).lower() in [
"mm",
"xmm",
"ymm",
"zmm",
]:
return True
return False
def get_reg_type(self, register):
"""Get register type"""
if register is None:
return False
if self.is_gpr(register):
return "gpr"
elif self.is_vector_register(register):
return register.name.rstrip(string.digits).lower()
raise ValueError

View File

@@ -0,0 +1,807 @@
#!/usr/bin/env python3
import pyparsing as pp
import unicodedata
from osaca.parser import ParserX86
from osaca.parser.directive import DirectiveOperand
from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand
from osaca.parser.instruction_form import InstructionForm
from osaca.parser.label import LabelOperand
from osaca.parser.memory import MemoryOperand
from osaca.parser.register import RegisterOperand
# We assume any non-ASCII characters except control characters and line terminators can be part of
# identifiers; this is based on the assumption that no assembler uses non-ASCII white space and
# syntax characters.
# This approach is described at the end of https://www.unicode.org/reports/tr55/#Whitespace-Syntax.
# It is appropriate for tools, such as this one, which process source code but do not fully validate
# it (in this case, thats the job of the assembler).
NON_ASCII_PRINTABLE_CHARACTERS = "".join(
chr(cp)
for cp in range(0x80, 0x10FFFF + 1)
if unicodedata.category(chr(cp)) not in ("Cc", "Zl", "Zp", "Cs", "Cn")
)
# References:
# ASM386 Assembly Language Reference, document number 469165-003, https://mirror.math.princeton.edu/pub/oldlinux/Linux.old/Ref-docs/asm-ref.pdf.
# Microsoft Macro Assembler BNF Grammar, https://learn.microsoft.com/en-us/cpp/assembler/masm/masm-bnf-grammar?view=msvc-170.
# Intel Architecture Code Analyzer User's Guide, https://www.intel.com/content/dam/develop/external/us/en/documents/intel-architecture-code-analyzer-3-0-users-guide-157552.pdf.
class ParserX86Intel(ParserX86):
_instance = None
# Singleton pattern, as this is created very many times.
def __new__(cls):
if cls._instance is None:
cls._instance = super(ParserX86Intel, cls).__new__(cls)
return cls._instance
def __init__(self):
super().__init__()
self._equ = {}
# The IACA manual says: "For For Microsoft* Visual C++ compiler, 64-bit version, use
# IACA_VC64_START and IACA_VC64_END, instead" (of IACA_START and IACA_END).
# TODO: Inconveniently, the code generated with optimization disabled (/Od) has two
# instructions. We should support both patterns, but then who runs OSACA with /Od?
def start_marker(self):
return [
InstructionForm(
mnemonic="mov",
operands=[
MemoryOperand(
base=RegisterOperand(name="GS"), offset=ImmediateOperand(value=111)
),
ImmediateOperand(value=111),
],
),
]
def end_marker(self):
return [
InstructionForm(
mnemonic="mov",
operands=[
MemoryOperand(
base=RegisterOperand(name="GS"), offset=ImmediateOperand(value=222)
),
ImmediateOperand(value=222),
],
),
]
def normalize_instruction_form(self, instruction_form, isa_model, arch_model):
"""
If the model indicates that this instruction has a single destination that is the last
operand, move the first operand to the last position. This effectively converts the Intel
syntax to the AT&T one.
"""
if instruction_form.normalized:
return
instruction_form.normalized = True
mnemonic = instruction_form.mnemonic
if not mnemonic:
return
# The model may only contain the VEX-encoded instruction and we may have the non-VEX-encoded
# one, or vice-versa. Note that this doesn't work when the arguments differ between VEX-
# encoded and non-VEX-encoded, e.g., for psubq.
if not arch_model.get_instruction(mnemonic, len(instruction_form.operands)):
if mnemonic[0] == "v":
unvexed_mnemonic = mnemonic[1:]
if arch_model.get_instruction(unvexed_mnemonic, len(instruction_form.operands)):
mnemonic = unvexed_mnemonic
else:
vexed_mnemonic = "v" + mnemonic
if arch_model.get_instruction(vexed_mnemonic, len(instruction_form.operands)):
mnemonic = vexed_mnemonic
instruction_form.mnemonic = mnemonic
# We cannot pass the operands because they may not match before the reordering. We just
# pass the arity instead. Also, this must use the ISA model, because that's where the
# source/destination information is found.
model = isa_model.get_instruction(mnemonic, len(instruction_form.operands))
has_single_destination_at_end = False
has_destination = False
if model:
for o in model.operands:
if o.source:
if has_destination:
has_single_destination_at_end = False
if o.destination:
if has_destination:
has_single_destination_at_end = False
else:
has_destination = True
has_single_destination_at_end = True
else:
# if there is only one operand, assume it is a source operand
has_single_destination_at_end = len(instruction_form.operands) > 1
if has_single_destination_at_end:
# It is important to reverse the operands, we cannot just move the first one last. This
# makes a difference for instructions with 3 operands or more, such as roundsd: the
# model files expect the rounding mode (an immediate) first but the Intel syntax has it
# last.
instruction_form.operands.reverse()
# A hack to help with comparison instruction: if the instruction is in the model, and has
# exactly two sources, swap its operands.
if (
model
and not has_destination
and len(instruction_form.operands) == 2
and not isa_model.get_instruction(mnemonic, instruction_form.operands)
and not arch_model.get_instruction(mnemonic, instruction_form.operands)
):
instruction_form.operands.reverse()
# If the instruction has a well-known data type, append a suffix.
data_type_to_suffix = {"DWORD": "d", "QWORD": "q"}
for o in instruction_form.operands:
if isinstance(o, MemoryOperand) and o.data_type:
suffix = data_type_to_suffix.get(o.data_type, None)
if suffix:
suffixed_mnemonic = mnemonic + suffix
if isa_model.get_instruction(
suffixed_mnemonic, len(instruction_form.operands)
) or arch_model.get_instruction(
suffixed_mnemonic, len(instruction_form.operands)
):
instruction_form.mnemonic = suffixed_mnemonic
break
def construct_parser(self):
"""Create parser for x86 Intel ISA."""
# Numeric literal.
binary_number = pp.Combine(pp.Word("01") + pp.CaselessLiteral("B"))
octal_number = pp.Combine(pp.Word("01234567") + pp.CaselessLiteral("O"))
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums))
hex_number = pp.Combine(pp.Word(pp.hexnums) + pp.CaselessLiteral("H"))
float_number = pp.Combine(
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + pp.Word(".", pp.nums)
).setResultsName("value")
integer_number = (
binary_number ^ octal_number ^ decimal_number ^ hex_number
).setResultsName("value")
# Comment.
self.comment = pp.Word(";#", exact=1) + pp.Group(
pp.ZeroOrMore(pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS))
).setResultsName(self.comment_id)
# Types.
data_type = (
pp.CaselessKeyword("BYTE")
| pp.CaselessKeyword("DWORD")
| pp.CaselessKeyword("FWORD")
| pp.CaselessKeyword("MMWORD")
| pp.CaselessKeyword("OWORD")
| pp.CaselessKeyword("QWORD")
| pp.CaselessKeyword("REAL10")
| pp.CaselessKeyword("REAL4")
| pp.CaselessKeyword("REAL8")
| pp.CaselessKeyword("SBYTE")
| pp.CaselessKeyword("SDWORD")
| pp.CaselessKeyword("SQWORD")
| pp.CaselessKeyword("SWORD")
| pp.CaselessKeyword("TBYTE")
| pp.CaselessKeyword("WORD")
| pp.CaselessKeyword("XMMWORD")
| pp.CaselessKeyword("YMMWORD")
).setResultsName("data_type")
# Identifier. Note that $ is not mentioned in the ASM386 Assembly Language Reference,
# but it is mentioned in the MASM syntax. < and > apparently show up in C++ mangled names.
# ICC allows ".", at least in labels.
first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>", exact=1)
rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>")
identifier = pp.Group(
pp.Combine(first + pp.Optional(rest)).setResultsName("name")
).setResultsName("identifier")
# Register.
# This follows the MASM grammar.
special_register = (
pp.CaselessKeyword("CR0")
| pp.CaselessKeyword("CR2")
| pp.CaselessKeyword("CR3")
| pp.CaselessKeyword("DR0")
| pp.CaselessKeyword("DR1")
| pp.CaselessKeyword("DR2")
| pp.CaselessKeyword("DR3")
| pp.CaselessKeyword("DR6")
| pp.CaselessKeyword("DR7")
| pp.CaselessKeyword("TR3")
| pp.CaselessKeyword("TR4")
| pp.CaselessKeyword("TR5")
| pp.CaselessKeyword("TR6")
| pp.CaselessKeyword("TR7")
).setResultsName("name")
gp_register = (
pp.CaselessKeyword("AX")
| pp.CaselessKeyword("EAX")
| pp.CaselessKeyword("CX")
| pp.CaselessKeyword("ECX")
| pp.CaselessKeyword("DX")
| pp.CaselessKeyword("EDX")
| pp.CaselessKeyword("BX")
| pp.CaselessKeyword("EBX")
| pp.CaselessKeyword("DI")
| pp.CaselessKeyword("EDI")
| pp.CaselessKeyword("SI")
| pp.CaselessKeyword("ESI")
| pp.CaselessKeyword("BP")
| pp.CaselessKeyword("EBP")
| pp.CaselessKeyword("SP")
| pp.CaselessKeyword("ESP")
| pp.CaselessKeyword("R8W")
| pp.CaselessKeyword("R8D")
| pp.CaselessKeyword("R9W")
| pp.CaselessKeyword("R9D")
| pp.CaselessKeyword("R12D")
| pp.CaselessKeyword("R13W")
| pp.CaselessKeyword("R13D")
| pp.CaselessKeyword("R14W")
| pp.CaselessKeyword("R14D")
).setResultsName("name")
byte_register = (
pp.CaselessKeyword("AL")
| pp.CaselessKeyword("AH")
| pp.CaselessKeyword("CL")
| pp.CaselessKeyword("CH")
| pp.CaselessKeyword("DL")
| pp.CaselessKeyword("DH")
| pp.CaselessKeyword("BL")
| pp.CaselessKeyword("BH")
| pp.CaselessKeyword("R8B")
| pp.CaselessKeyword("R9B")
| pp.CaselessKeyword("R10B")
| pp.CaselessKeyword("R11B")
| pp.CaselessKeyword("R12B")
| pp.CaselessKeyword("R13B")
).setResultsName("name")
qword_register = (
pp.CaselessKeyword("RAX")
| pp.CaselessKeyword("RCX")
| pp.CaselessKeyword("RDX")
| pp.CaselessKeyword("RBX")
| pp.CaselessKeyword("RSP")
| pp.CaselessKeyword("RBP")
| pp.CaselessKeyword("RSI")
| pp.CaselessKeyword("RDI")
| pp.CaselessKeyword("R8")
| pp.CaselessKeyword("R9")
| pp.CaselessKeyword("R10")
| pp.CaselessKeyword("R11")
| pp.CaselessKeyword("R12")
| pp.CaselessKeyword("R13")
| pp.CaselessKeyword("R14")
| pp.CaselessKeyword("R15")
).setResultsName("name")
fpu_register = pp.Combine(
pp.CaselessKeyword("ST")
+ pp.Optional(pp.Literal("(") + pp.Word("01234567") + pp.Literal(")"))
).setResultsName("name")
xmm_register = pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums)) | pp.Combine(
pp.CaselessLiteral("XMM1") + pp.Word("012345")
)
simd_register = (
pp.Combine(pp.CaselessLiteral("MM") + pp.Word("01234567"))
| xmm_register
| pp.Combine(pp.CaselessLiteral("YMM") + pp.Word(pp.nums))
| pp.Combine(pp.CaselessLiteral("YMM1") + pp.Word("012345"))
).setResultsName("name")
segment_register = (
pp.CaselessKeyword("CS")
| pp.CaselessKeyword("DS")
| pp.CaselessKeyword("ES")
| pp.CaselessKeyword("FS")
| pp.CaselessKeyword("GS")
| pp.CaselessKeyword("SS")
).setResultsName("name")
self.register = pp.Group(
special_register
| gp_register
| byte_register
| qword_register
| fpu_register
| simd_register
| segment_register
| pp.CaselessKeyword("RIP")
).setResultsName(self.register_id)
# Register expressions.
base_register = self.register
index_register = self.register
scale = pp.Word("1248", exact=1)
base = base_register.setResultsName("base")
displacement = pp.Group(
pp.Group(integer_number ^ identifier).setResultsName(self.immediate_id)
).setResultsName("displacement")
short_indexed = index_register.setResultsName("index")
long_indexed = (
index_register.setResultsName("index")
+ pp.Literal("*")
+ scale.setResultsName("scale")
)
indexed = pp.Group(short_indexed ^ long_indexed).setResultsName("indexed")
operator = pp.Word("+-", exact=1)
operator_index = pp.Word("+-", exact=1).setResultsName("operator_idx")
operator_displacement = pp.Word("+-", exact=1).setResultsName("operator_disp")
# Syntax:
# `base` always preceedes `indexed`.
# `short_indexed` is only allowed if it follows `base`, not alone.
# `displacement` can go anywhere.
# It's easier to list all the alternatives than to represent these rules using complicated
# `Optional` and what not.
register_expression = pp.Group(
pp.Literal("[")
+ (
base
^ (base + operator_displacement + displacement)
^ (base + operator_displacement + displacement + operator_index + indexed)
^ (base + operator_index + indexed)
^ (base + operator_index + indexed + operator_displacement + displacement)
^ (displacement + operator + base)
^ (displacement + operator + base + operator_index + indexed)
^ (
displacement
+ operator_index
+ pp.Group(long_indexed).setResultsName("indexed")
)
^ pp.Group(long_indexed).setResultsName("indexed")
^ (
pp.Group(long_indexed).setResultsName("indexed")
+ operator_displacement
+ displacement
)
)
+ pp.Literal("]")
).setResultsName("register_expression")
# Immediate.
immediate = pp.Group(integer_number | float_number | identifier).setResultsName(
self.immediate_id
)
# Expressions.
# The ASM86 manual has weird expressions on page 130 (displacement outside of the register
# expression, multiple register expressions). Let's ignore those for now, but see
# https://stackoverflow.com/questions/71540754/why-sometimes-use-offset-flatlabel-and-sometimes-not.
address_expression = pp.Group(
self.register.setResultsName("segment") + pp.Literal(":") + immediate
^ immediate + register_expression
^ register_expression
^ identifier + pp.Optional(operator + immediate)
).setResultsName("address_expression")
offset_expression = pp.Group(
pp.CaselessKeyword("OFFSET")
+ pp.Group(
pp.CaselessKeyword("GROUP")
| pp.CaselessKeyword("SEGMENT")
| pp.CaselessKeyword("FLAT")
)
# The MASM grammar has the ":" immediately after "OFFSET", but that's not what MSVC
# outputs.
+ pp.Literal(":")
+ identifier.setResultsName("identifier")
+ pp.Optional(pp.Literal("+") + immediate.setResultsName("displacement"))
).setResultsName("offset_expression")
ptr_expression = pp.Group(
data_type + pp.CaselessKeyword("PTR") + address_expression
).setResultsName("ptr_expression")
short_expression = pp.Group(pp.CaselessKeyword("SHORT") + identifier).setResultsName(
"short_expression"
)
# Instructions.
mnemonic = pp.Word(pp.alphas, pp.alphanums).setResultsName("mnemonic")
operand = pp.Group(
self.register
| pp.Group(
offset_expression | ptr_expression | short_expression | address_expression
).setResultsName(self.memory_id)
| immediate
)
self.instruction_parser = (
mnemonic
+ pp.Optional(operand.setResultsName("operand1"))
+ pp.Optional(pp.Suppress(pp.Literal(",")))
+ pp.Optional(operand.setResultsName("operand2"))
+ pp.Optional(pp.Suppress(pp.Literal(",")))
+ pp.Optional(operand.setResultsName("operand3"))
+ pp.Optional(pp.Suppress(pp.Literal(",")))
+ pp.Optional(operand.setResultsName("operand4"))
+ pp.Optional(self.comment)
)
# Label.
self.label = pp.Group(
identifier.setResultsName("name")
+ pp.Literal(":")
+ pp.Optional(self.instruction_parser)
+ pp.Optional(self.comment)
).setResultsName(self.label_id)
# Directives.
# The identifiers at the beginnig of a directive cannot start with a "." otherwise we end up
# with ambiguities.
directive_first = pp.Word(pp.alphas + NON_ASCII_PRINTABLE_CHARACTERS + "$?@_<>", exact=1)
directive_rest = pp.Word(pp.alphanums + NON_ASCII_PRINTABLE_CHARACTERS + ".$?@_<>")
directive_identifier = pp.Group(
pp.Combine(directive_first + pp.Optional(directive_rest)).setResultsName("name")
).setResultsName("identifier")
# Parameter can be any quoted string or sequence of characters besides ';' (for comments)
# or ',' (parameter delimiter). See ASM386 p. 38.
directive_parameter = (
pp.quotedString
^ (
pp.Word(pp.printables + NON_ASCII_PRINTABLE_CHARACTERS, excludeChars=",;")
+ pp.Optional(pp.Suppress(pp.Literal(",")))
)
^ pp.Suppress(pp.Literal(","))
)
# The directives that don't start with a "." are ambiguous with instructions, so we list
# them explicitly.
# TODO: The directives that are types introduce a nasty ambiguity with instructions. Skip
# them for now, apparently the MSVC output uses the short D? directives.
directive_keywords = (
pp.CaselessKeyword("ALIAS")
| pp.CaselessKeyword("ALIGN")
| pp.CaselessKeyword("ASSUME")
# | pp.CaselessKeyword("BYTE")
| pp.CaselessKeyword("CATSTR")
| pp.CaselessKeyword("COMM")
| pp.CaselessKeyword("COMMENT")
| pp.CaselessKeyword("DB")
| pp.CaselessKeyword("DD")
| pp.CaselessKeyword("DF")
| pp.CaselessKeyword("DQ")
| pp.CaselessKeyword("DT")
| pp.CaselessKeyword("DW")
# | pp.CaselessKeyword("DWORD")
| pp.CaselessKeyword("ECHO")
| pp.CaselessKeyword("END")
| pp.CaselessKeyword("ENDP")
| pp.CaselessKeyword("ENDS")
| pp.CaselessKeyword("EQU")
| pp.CaselessKeyword("EVEN")
| pp.CaselessKeyword("EXTRN")
| pp.CaselessKeyword("EXTERNDEF")
# | pp.CaselessKeyword("FWORD")
| pp.CaselessKeyword("GROUP")
| pp.CaselessKeyword("INCLUDE")
| pp.CaselessKeyword("INCLUDELIB")
| pp.CaselessKeyword("INSTR")
| pp.CaselessKeyword("INVOKE")
| pp.CaselessKeyword("LABEL")
# | pp.CaselessKeyword("MMWORD")
| pp.CaselessKeyword("OPTION")
| pp.CaselessKeyword("ORG")
| pp.CaselessKeyword("PAGE")
| pp.CaselessKeyword("POPCONTEXT")
| pp.CaselessKeyword("PROC")
| pp.CaselessKeyword("PROTO")
| pp.CaselessKeyword("PUBLIC")
| pp.CaselessKeyword("PUSHCONTEXT")
# | pp.CaselessKeyword("QWORD")
# | pp.CaselessKeyword("REAL10")
# | pp.CaselessKeyword("REAL4")
# | pp.CaselessKeyword("REAL8")
| pp.CaselessKeyword("RECORD")
# | pp.CaselessKeyword("SBYTE")
# | pp.CaselessKeyword("SDWORD")
| pp.CaselessKeyword("SEGMENT")
| pp.CaselessKeyword("SIZESTR")
| pp.CaselessKeyword("STRUCT")
| pp.CaselessKeyword("SUBSTR")
| pp.CaselessKeyword("SUBTITLE")
# | pp.CaselessKeyword("SWORD")
# | pp.CaselessKeyword("TBYTE")
| pp.CaselessKeyword("TEXTEQU")
| pp.CaselessKeyword("TITLE")
| pp.CaselessKeyword("TYPEDEF")
| pp.CaselessKeyword("UNION")
# | pp.CaselessKeyword("WORD")
# | pp.CaselessKeyword("XMMWORD")
# | pp.CaselessKeyword("YMMWORD")
)
self.directive = pp.Group(
pp.Optional(~directive_keywords + directive_identifier)
+ (
pp.Combine(pp.Literal(".") + pp.Word(pp.alphanums + "_"))
| pp.Literal("=")
| directive_keywords
).setResultsName("name")
+ pp.ZeroOrMore(directive_parameter).setResultsName("parameters")
+ pp.Optional(self.comment)
).setResultsName(self.directive_id)
def parse_line(self, line, line_number=None):
"""
Parse line and return instruction form.
:param str line: line of assembly code
:param line_number: default None, identifier of instruction form
:type line_number: int, optional
:return: ``dict`` -- parsed asm line (comment, label, directive or instruction form)
"""
instruction_form = InstructionForm(line=line, line_number=line_number)
result = None
# 1. Parse comment.
try:
result = self.process_operand(self.comment.parseString(line, parseAll=True))
instruction_form.comment = " ".join(result[self.comment_id])
except pp.ParseException:
pass
# 2. Parse label.
if not result:
try:
# Returns tuple with label operand and comment, if any.
result = self.process_operand(self.label.parseString(line, parseAll=True))
instruction_form.label = result[0].name
if result[1]:
instruction_form.comment = " ".join(result[1])
except pp.ParseException:
pass
# 3. Parse directive.
if not result:
try:
# Returns tuple with directive operand and comment, if any.
result = self.process_operand(self.directive.parseString(line, parseAll=True))
instruction_form.directive = result[0]
if result[1]:
instruction_form.comment = " ".join(result[1])
except pp.ParseException:
pass
# 4. Parse instruction.
if not result:
try:
result = self.parse_instruction(line)
except pp.ParseException as e:
raise ValueError(
"Could not parse instruction on line {}: {!r}".format(line_number, line)
) from e
instruction_form.mnemonic = result.mnemonic
instruction_form.operands = result.operands
instruction_form.comment = result.comment
return instruction_form
def make_instruction(self, parse_result):
"""
Parse instruction in asm line.
:param parse_result: tuple resulting from calling `parseString` on the `instruction_parser`.
:returns: `dict` -- parsed instruction form
"""
operands = []
# Add operands to list
# Check first operand
if "operand1" in parse_result:
operands.append(self.process_operand(parse_result.operand1))
# Check second operand
if "operand2" in parse_result:
operands.append(self.process_operand(parse_result.operand2))
# Check third operand
if "operand3" in parse_result:
operands.append(self.process_operand(parse_result.operand3))
# Check fourth operand
if "operand4" in parse_result:
operands.append(self.process_operand(parse_result.operand4))
return_dict = InstructionForm(
mnemonic=parse_result.mnemonic,
operands=operands,
label_id=None,
comment_id=(
" ".join(parse_result[self.comment_id])
if self.comment_id in parse_result
else None
),
)
return return_dict
def parse_instruction(self, instruction):
"""
Parse instruction in asm line.
:param str instruction: Assembly line string.
:returns: `dict` -- parsed instruction form
"""
return self.make_instruction(
self.instruction_parser.parseString(instruction, parseAll=True)
)
def parse_register(self, register_string):
"""Parse register string"""
try:
return self.process_operand(self.register.parseString(register_string, parseAll=True))
except pp.ParseException:
return None
def process_operand(self, operand):
"""Post-process operand"""
if self.directive_id in operand:
return self.process_directive(operand[self.directive_id])
if self.identifier in operand:
return self.process_identifier(operand[self.identifier])
if self.immediate_id in operand:
return self.process_immediate(operand[self.immediate_id])
if self.label_id in operand:
return self.process_label(operand[self.label_id])
if self.memory_id in operand:
return self.process_memory_address(operand[self.memory_id])
if self.register_id in operand:
return self.process_register(operand[self.register_id])
return operand
def process_directive(self, directive):
# TODO: This is putting the identifier in the parameters. No idea if it's right.
parameters = [directive.identifier.name] if "identifier" in directive else []
parameters.extend(directive.parameters)
directive_new = DirectiveOperand(name=directive.name, parameters=parameters or None)
# Interpret the "=" directives because the generated assembly is full of symbols that are
# defined there.
if directive.name == "=":
self._equ[parameters[0]] = parameters[1]
return directive_new, directive.get("comment")
def process_register(self, operand):
return RegisterOperand(name=operand.name)
def process_register_expression(self, register_expression):
base = register_expression.get("base")
displacement = register_expression.get("displacement")
indexed = register_expression.get("indexed")
index = None
scale = 1
if indexed:
index = indexed.get("index")
scale = int(indexed.get("scale", "1"), 0)
if register_expression.get("operator_index") == "-":
scale *= -1
displacement_op = self.process_immediate(displacement.immediate) if displacement else None
if displacement_op and register_expression.get("operator_disp") == "-":
displacement_op.value *= -1
base_op = RegisterOperand(name=base.name) if base else None
index_op = RegisterOperand(name=index.name) if index else None
new_memory = MemoryOperand(
offset=displacement_op, base=base_op, index=index_op, scale=scale
)
return new_memory
def process_address_expression(self, address_expression, data_type=None):
# TODO: It seems that we could have a prefix immediate operand, a displacement in the
# brackets, and an offset. How all of this works together is somewhat mysterious.
immediate_operand = (
self.process_immediate(address_expression.immediate)
if "immediate" in address_expression
else None
)
register_expression = (
self.process_register_expression(address_expression.register_expression)
if "register_expression" in address_expression
else None
)
segment = (
self.process_register(address_expression.segment)
if "segment" in address_expression
else None
)
identifier = (
self.process_identifier(address_expression.identifier)
if "identifier" in address_expression
else None
)
if register_expression:
if immediate_operand:
register_expression.offset = immediate_operand
if data_type:
register_expression.data_type = data_type
return register_expression
elif segment:
return MemoryOperand(base=segment, offset=immediate_operand, data_type=data_type)
elif identifier:
if immediate_operand:
identifier.offset = immediate_operand
elif not data_type:
# An address expression without a data type or an offset is just an identifier.
# This matters for jumps.
return identifier
return MemoryOperand(offset=identifier, data_type=data_type)
else:
return MemoryOperand(base=immediate_operand, data_type=data_type)
def process_offset_expression(self, offset_expression):
# TODO: Record that this is an offset expression.
displacement = (
self.process_immediate(offset_expression.displacement)
if "displacement" in offset_expression
else None
)
if displacement and "operator_disp" == "-":
displacement.value *= -1
identifier = self.process_identifier(offset_expression.identifier)
identifier.offset = displacement
return MemoryOperand(offset=identifier)
def process_ptr_expression(self, ptr_expression):
# TODO: Do something with the data_type.
return self.process_address_expression(
ptr_expression.address_expression, ptr_expression.data_type
)
def process_short_expression(self, short_expression):
# TODO: Do something with the fact that it is short.
return LabelOperand(name=short_expression.identifier.name)
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
if "address_expression" in memory_address:
return self.process_address_expression(memory_address.address_expression)
elif "offset_expression" in memory_address:
return self.process_offset_expression(memory_address.offset_expression)
elif "ptr_expression" in memory_address:
return self.process_ptr_expression(memory_address.ptr_expression)
elif "short_expression" in memory_address:
return self.process_short_expression(memory_address.short_expression)
return memory_address
def process_label(self, label):
"""Post-process label asm line"""
# Remove duplicated 'name' level due to identifier. Note that there is no place to put the
# comment, if any.
label["name"] = label["name"]["name"]
return (
LabelOperand(name=label.name),
self.make_instruction(label) if "mnemonic" in label else None,
)
def process_immediate(self, immediate):
"""Post-process immediate operand"""
if "identifier" in immediate:
# Actually an identifier, change declaration.
return self.process_identifier(immediate.identifier)
new_immediate = ImmediateOperand(value=immediate.get("sign", "") + immediate.value)
new_immediate.value = self.normalize_imd(new_immediate)
return new_immediate
def process_identifier(self, identifier):
if identifier.name in self._equ:
# Actually an immediate, change declaration.
new_immediate = ImmediateOperand(
identifier=identifier.name, value=self._equ[identifier.name]
)
new_immediate.value = self.normalize_imd(new_immediate)
return new_immediate
return IdentifierOperand(name=identifier.name)
def normalize_imd(self, imd):
"""Normalize immediate to decimal based representation"""
if isinstance(imd.value, str):
if "." in imd.value:
return float(imd.value)
# Now parse depending on the base.
base = {"B": 2, "O": 8, "H": 16}.get(imd.value[-1], 10)
value = 0
negative = imd.value[0] == "-"
positive = imd.value[0] == "+"
start = +(negative or positive)
stop = len(imd.value) if base == 10 else -1
for c in imd.value[start:stop]:
value = value * base + int(c, base)
return -value if negative else value
else:
return imd.value

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python3
"""Semantics opbject responsible for architecture specific semantic operations"""
import sys
import warnings
from itertools import chain
@@ -14,12 +13,22 @@ from osaca.parser.register import RegisterOperand
class ArchSemantics(ISASemantics):
GAS_SUFFIXES = "bswlqt"
def __init__(self, machine_model: MachineModel, path_to_yaml=None):
super().__init__(machine_model.get_ISA().lower(), path_to_yaml=path_to_yaml)
def __init__(self, parser, machine_model: MachineModel, path_to_yaml=None):
super().__init__(parser, path_to_yaml=path_to_yaml)
self._machine_model = machine_model
self._isa = machine_model.get_ISA().lower()
def normalize_instruction_form(self, instruction_form):
self.parser.normalize_instruction_form(
instruction_form, self.isa_model, self._machine_model
)
def normalize_instruction_forms(self, instruction_forms):
for instruction_form in instruction_forms:
self.normalize_instruction_form(instruction_form)
def _check_normalized(self, instruction_forms):
for instruction_form in instruction_forms:
instruction_form.check_normalized()
# SUMMARY FUNCTION
def add_semantics(self, kernel):
@@ -29,6 +38,7 @@ class ArchSemantics(ISASemantics):
:param list kernel: kernel to apply semantics
"""
self._check_normalized(kernel)
for instruction_form in kernel:
self.assign_src_dst(instruction_form)
self.assign_tp_lt(instruction_form)
@@ -41,6 +51,7 @@ class ArchSemantics(ISASemantics):
:param list kernel: kernel to apply optimal port utilization
"""
self._check_normalized(kernel)
INC = 0.01
kernel.reverse()
port_list = self._machine_model.get_ports()
@@ -137,6 +148,7 @@ class ArchSemantics(ISASemantics):
def set_hidden_loads(self, kernel):
"""Hide loads behind stores if architecture supports hidden loads (depricated)"""
self._check_normalized(kernel)
loads = [instr for instr in kernel if INSTR_FLAGS.HAS_LD in instr.flags]
stores = [instr for instr in kernel if INSTR_FLAGS.HAS_ST in instr.flags]
# Filter instructions including load and store
@@ -176,6 +188,7 @@ class ArchSemantics(ISASemantics):
# mark instruction form with semantic flags
def assign_tp_lt(self, instruction_form):
"""Assign throughput and latency to an instruction form."""
instruction_form.check_normalized()
flags = []
port_number = len(self._machine_model["ports"])
if instruction_form.mnemonic is None:
@@ -189,25 +202,6 @@ class ArchSemantics(ISASemantics):
instruction_data = self._machine_model.get_instruction(
instruction_form.mnemonic, instruction_form.operands
)
if (
not instruction_data
and self._isa == "x86"
and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
):
# check for instruction without GAS suffix
instruction_data = self._machine_model.get_instruction(
instruction_form.mnemonic[:-1], instruction_form.operands
)
if (
instruction_data is None
and self._isa == "aarch64"
and "." in instruction_form.mnemonic
):
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.mnemonic.index(".")
instruction_data = self._machine_model.get_instruction(
instruction_form.mnemonic[:suffix_start], instruction_form.operands
)
if instruction_data:
# instruction form in DB
(
@@ -232,25 +226,6 @@ class ArchSemantics(ISASemantics):
instruction_data_reg = self._machine_model.get_instruction(
instruction_form.mnemonic, operands
)
if (
not instruction_data_reg
and self._isa == "x86"
and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
):
# check for instruction without GAS suffix
instruction_data_reg = self._machine_model.get_instruction(
instruction_form.mnemonic[:-1], operands
)
if (
instruction_data_reg is None
and self._isa == "aarch64"
and "." in instruction_form.mnemonic
):
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.mnemonic.index(".")
instruction_data_reg = self._machine_model.get_instruction(
instruction_form.mnemonic[:suffix_start], operands
)
if instruction_data_reg:
assign_unknown = False
reg_type = self._parser.get_reg_type(
@@ -310,7 +285,7 @@ class ArchSemantics(ISASemantics):
# - all mem operands in src_dst are pre-/post_indexed
# since it is no mem store
if (
self._isa == "aarch64"
self._parser.isa() == "aarch64"
and not isinstance(
instruction_form.semantic_operands["destination"],
MemoryOperand,
@@ -406,6 +381,7 @@ class ArchSemantics(ISASemantics):
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
"""Apply performance data to instruction if it was found in the archDB"""
instruction_form.check_normalized()
throughput = instruction_data.throughput
port_pressure = self._machine_model.average_port_pressure(instruction_data.port_pressure)
instruction_form.port_uops = instruction_data.port_pressure
@@ -441,12 +417,12 @@ class ArchSemantics(ISASemantics):
def convert_op_to_reg(self, reg_type, regtype="0"):
"""Create register operand for a memory addressing operand"""
if self._isa == "x86":
if self._parser.isa() == "x86":
if reg_type == "gpr":
register = RegisterOperand(name="r" + str(int(regtype) + 9))
else:
register = RegisterOperand(name=reg_type + regtype)
elif self._isa == "aarch64":
elif self._parser.isa() == "aarch64":
register = RegisterOperand(name=regtype, prefix=reg_type)
return register

View File

@@ -11,7 +11,6 @@ from pathlib import Path
import ruamel.yaml
from osaca import __version__, utils
from osaca.parser import ParserX86ATT
from osaca.parser.instruction_form import InstructionForm
from osaca.parser.operand import Operand
from osaca.parser.memory import MemoryOperand
@@ -79,7 +78,7 @@ class MachineModel(object):
else:
yaml = self._create_yaml_object()
# otherwise load
with open(self._path, "r") as f:
with open(self._path, "r", encoding="utf8") as f:
if not lazy:
self._data = yaml.load(f)
else:
@@ -286,23 +285,35 @@ class MachineModel(object):
######################################################
def get_instruction(self, name, operands):
"""Find and return instruction data from name and operands."""
"""Find and return instruction data from name and operands/arity."""
# For use with dict instead of list as DB
if name is None:
return None
name_matched_iforms = self._data["instruction_forms_dict"].get(name.upper(), [])
try:
return next(
instruction_form
for instruction_form in name_matched_iforms
if self._match_operands(
instruction_form.operands,
operands,
# If `operands` is an integer, it represents the arity of the instruction. This is
# useful to reorder the operands in the Intel syntax because in their original order
# they may not match the model.
if isinstance(operands, int):
arity = operands
return next(
(
instruction_form
for instruction_form in name_matched_iforms
if len(instruction_form.operands) == arity
),
None,
)
else:
return next(
(
instruction_form
for instruction_form in name_matched_iforms
if self._match_operands(instruction_form.operands, operands)
),
None,
)
)
except StopIteration:
return None
except TypeError as e:
print("\nname: {}\noperands: {}".format(name, operands))
raise TypeError from e
@@ -878,6 +889,8 @@ class MachineModel(object):
return True
def _is_x86_reg_type(self, i_reg, reg, consider_masking=False):
from osaca.parser import ParserX86
"""Check if register type match."""
if reg is None:
if i_reg is None:
@@ -895,7 +908,7 @@ class MachineModel(object):
if i_reg_name == self.WILDCARD or reg.name == self.WILDCARD:
return True
# differentiate between vector registers (mm, xmm, ymm, zmm) and others (gpr)
parser_x86 = ParserX86ATT()
parser_x86 = ParserX86()
if parser_x86.is_vector_register(reg):
if reg.name.rstrip(string.digits).lower() == i_reg_name:
# Consider masking and zeroing for AVX512

View File

@@ -2,7 +2,6 @@
from itertools import chain
from osaca import utils
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.parser.memory import MemoryOperand
from osaca.parser.operand import Operand
from osaca.parser.register import RegisterOperand
@@ -26,20 +25,23 @@ class INSTR_FLAGS:
class ISASemantics(object):
GAS_SUFFIXES = "bswlqt"
def __init__(self, isa, path_to_yaml=None):
self._isa = isa.lower()
path = path_to_yaml or utils.find_datafile("isa/" + self._isa + ".yml")
def __init__(self, parser, path_to_yaml=None):
path = path_to_yaml or utils.find_datafile("isa/" + parser.isa() + ".yml")
self._isa_model = MachineModel(path_to_yaml=path)
if self._isa == "x86":
self._parser = ParserX86ATT()
elif self._isa == "aarch64":
self._parser = ParserAArch64()
self._parser = parser
@property
def parser(self):
return self._parser
@property
def isa_model(self):
return self._isa_model
def process(self, instruction_forms):
"""Process a list of instruction forms."""
for i in instruction_forms:
i.check_normalized()
self.assign_src_dst(i)
# get ;parser result and assign operands to
@@ -48,6 +50,7 @@ class ISASemantics(object):
# - source/destination
def assign_src_dst(self, instruction_form):
"""Update instruction form dictionary with source, destination and flag information."""
instruction_form.check_normalized()
# if the instruction form doesn't have operands or is None, there's nothing to do
if instruction_form.operands is None or instruction_form.mnemonic is None:
instruction_form.semantic_operands = {"source": [], "destination": [], "src_dst": []}
@@ -57,21 +60,6 @@ class ISASemantics(object):
isa_data = self._isa_model.get_instruction(
instruction_form.mnemonic, instruction_form.operands
)
if (
isa_data is None
and self._isa == "x86"
and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data = self._isa_model.get_instruction(
instruction_form.mnemonic[:-1], instruction_form.operands
)
if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic:
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.mnemonic.index(".")
isa_data = self._isa_model.get_instruction(
instruction_form.mnemonic[:suffix_start], instruction_form.operands
)
operands = instruction_form.operands
op_dict = {}
@@ -88,33 +76,16 @@ class ISASemantics(object):
isa_data_reg = self._isa_model.get_instruction(
instruction_form.mnemonic, operands_reg
)
if (
isa_data_reg is None
and self._isa == "x86"
and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data_reg = self._isa_model.get_instruction(
instruction_form.mnemonic[:-1], operands_reg
)
if (
isa_data_reg is None
and self._isa == "aarch64"
and "." in instruction_form.mnemonic
):
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.mnemonic.index(".")
isa_data_reg = self._isa_model.get_instruction(
instruction_form.mnemonic[:suffix_start], operands_reg
)
if isa_data_reg:
assign_default = False
op_dict = self._apply_found_ISA_data(isa_data_reg, operands)
if assign_default:
# no irregular operand structure, apply default
op_dict["source"] = self._get_regular_source_operands(instruction_form)
op_dict["destination"] = self._get_regular_destination_operands(instruction_form)
op_dict["source"] = self._parser.get_regular_source_operands(instruction_form)
op_dict["destination"] = self._parser.get_regular_destination_operands(
instruction_form
)
op_dict["src_dst"] = []
# handle Xd! registers in aarch64
if any(
@@ -133,7 +104,7 @@ class ISASemantics(object):
op_dict["source"].remove(reg)
op_dict["src_dst"].append(reg)
# post-process pre- and post-indexing for aarch64 memory operands
if self._isa == "aarch64":
if self._parser.isa() == "aarch64":
for operand in [op for op in op_dict["source"] if isinstance(op, MemoryOperand)]:
post_indexed = operand.post_indexed
pre_indexed = operand.pre_indexed
@@ -177,6 +148,7 @@ class ISASemantics(object):
Empty dict if no changes of registers occured. None for registers with unknown changes.
If only_postindexed is True, only considers changes due to post_indexed memory references.
"""
instruction_form.check_normalized()
if instruction_form.mnemonic is None:
return {}
dest_reg_names = [
@@ -190,21 +162,6 @@ class ISASemantics(object):
isa_data = self._isa_model.get_instruction(
instruction_form.mnemonic, instruction_form.operands
)
if (
isa_data is None
and self._isa == "x86"
and instruction_form.mnemonic[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data = self._isa_model.get_instruction(
instruction_form.mnemonic[:-1], instruction_form.operands
)
if isa_data is None and self._isa == "aarch64" and "." in instruction_form.mnemonic:
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.mnemonic.index(".")
isa_data = self._isa_model.get_instruction(
instruction_form.mnemonic[:suffix_start], instruction_form.operands
)
if only_postindexed:
for o in instruction_form.operands:
@@ -321,6 +278,7 @@ class ISASemantics(object):
def _has_load(self, instruction_form):
"""Check if instruction form performs a LOAD"""
instruction_form.check_normalized()
for operand in chain(
instruction_form.semantic_operands["source"],
instruction_form.semantic_operands["src_dst"],
@@ -331,6 +289,7 @@ class ISASemantics(object):
def _has_store(self, instruction_form):
"""Check if instruction form perfroms a STORE"""
instruction_form.check_normalized()
for operand in chain(
instruction_form.semantic_operands["destination"],
instruction_form.semantic_operands["src_dst"],
@@ -339,33 +298,6 @@ class ISASemantics(object):
return True
return False
def _get_regular_source_operands(self, instruction_form):
"""Get source operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume it is a source operand
if len(instruction_form.operands) == 1:
return [instruction_form.operands[0]]
if self._isa == "x86":
# return all but last operand
return [op for op in instruction_form.operands[0:-1]]
elif self._isa == "aarch64":
return [op for op in instruction_form.operands[1:]]
else:
raise ValueError("Unsupported ISA {}.".format(self._isa))
def _get_regular_destination_operands(self, instruction_form):
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume no destination
if len(instruction_form.operands) == 1:
return []
if self._isa == "x86":
# return last operand
return instruction_form.operands[-1:]
if self._isa == "aarch64":
# return first operand
return instruction_form.operands[:1]
else:
raise ValueError("Unsupported ISA {}.".format(self._isa))
def substitute_mem_address(self, operands):
"""Create memory wildcard for all memory operands"""
return [

View File

@@ -1,8 +1,6 @@
#!/usr/bin/env python3
import copy
import os
import signal
import time
from itertools import chain
from multiprocessing import Manager, Process, cpu_count
@@ -38,7 +36,8 @@ class KernelDG(nx.DiGraph):
self.kernel, timeout, flag_dependencies
)
def _extend_path(self, dst_list, kernel, dg, offset):
@classmethod
def _extend_path(cls, dst_list, kernel, dg, offset):
for instr in kernel:
generator_path = nx.algorithms.simple_paths.all_simple_paths(
dg, instr.line_number, instr.line_number + offset
@@ -138,7 +137,7 @@ class KernelDG(nx.DiGraph):
all_paths = manager.list()
processes = [
Process(
target=self._extend_path,
target=KernelDG._extend_path,
args=(all_paths, instr_section, dg, offset),
)
for instr_section in instrs
@@ -164,9 +163,7 @@ class KernelDG(nx.DiGraph):
# terminate running processes
for p in processes:
if p.is_alive():
# Python 3.6 does not support Process.kill().
# Can be changed to `p.kill()` after EoL (01/22) of Py3.6
os.kill(p.pid, signal.SIGKILL)
p.kill()
p.join()
all_paths = list(all_paths)
else:
@@ -186,11 +183,11 @@ class KernelDG(nx.DiGraph):
for s, d in nx.utils.pairwise(path):
edge_lat = dg.edges[s, d]["latency"]
# map source node back to original line numbers
if s >= offset:
if s > offset:
s -= offset
lat_path.append((s, edge_lat))
lat_sum += edge_lat
if d >= offset:
if d > offset:
d -= offset
lat_path.sort()
@@ -413,7 +410,7 @@ class KernelDG(nx.DiGraph):
addr_change = 0
if isinstance(src.offset, ImmediateOperand) and src.offset.value is not None:
addr_change += src.offset.value
if mem.offset:
if isinstance(mem.offset, ImmediateOperand) and mem.offset.value is not None:
addr_change -= mem.offset.value
if mem.base and src.base:
base_change = register_changes.get(

View File

@@ -1,29 +1,35 @@
#!/usr/bin/env python3
from collections import OrderedDict
from enum import Enum
from osaca.parser import ParserAArch64, ParserX86ATT, get_parser
from osaca.parser.register import RegisterOperand
from osaca.parser import get_parser
from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand
from osaca.parser.memory import MemoryOperand
from osaca.parser.register import RegisterOperand
COMMENT_MARKER = {"start": "OSACA-BEGIN", "end": "OSACA-END"}
def reduce_to_section(kernel, isa):
# State of marker matching.
# No: we have determined that the code doesn't match the marker.
# Partial: so far the code matches the marker, but we have not reached the end of the marker yet.
# Full: the code matches all instructions in the marker.
class Matching(Enum):
No = 0
Partial = 1
Full = 2
def reduce_to_section(kernel, parser):
"""
Finds OSACA markers in given kernel and returns marked section
:param list kernel: kernel to check
:param str isa: ISA of given kernel
:param BaseParser parser: parser used to produce the kernel
:returns: `list` -- marked section of kernel as list of instruction forms
"""
isa = isa.lower()
if isa == "x86":
start, end = find_marked_kernel_x86ATT(kernel)
elif isa == "aarch64":
start, end = find_marked_kernel_AArch64(kernel)
else:
raise ValueError("ISA not supported.")
start, end = find_marked_section(kernel, parser, COMMENT_MARKER)
if start == -1:
start = 0
if end == -1:
@@ -31,63 +37,121 @@ def reduce_to_section(kernel, isa):
return kernel[start:end]
def find_marked_kernel_AArch64(lines):
def find_marked_section(lines, parser, comments=None):
"""
Find marked section for AArch64
Return indexes of marked section
:param list lines: kernel
:param parser: parser to use for checking
:type parser: :class:`~parser.BaseParser`
:param comments: dictionary with start and end markers in comment format, defaults to None
:type comments: dict, optional
:returns: `tuple of int` -- start and end line of marked section
"""
nop_bytes = [213, 3, 32, 31]
return find_marked_section(
lines,
ParserAArch64(),
["mov"],
"x1",
[111, 222],
nop_bytes,
reverse=True,
comments=COMMENT_MARKER,
)
index_start = -1
index_end = -1
start_marker = parser.start_marker()
end_marker = parser.end_marker()
for i, line in enumerate(lines):
try:
if line.mnemonic is None and comments is not None and line.comment is not None:
if comments["start"] == line.comment:
index_start = i + 1
elif comments["end"] == line.comment:
index_end = i
if index_start == -1:
matching_lines = match_lines(parser, lines[i:], start_marker)
if matching_lines > 0:
# Return the first line after the marker.
index_start = i + matching_lines
if index_end == -1:
if match_lines(parser, lines[i:], end_marker):
index_end = i
except TypeError as e:
print(i, e, line)
if index_start != -1 and index_end != -1:
break
return index_start, index_end
def find_marked_kernel_x86ATT(lines):
# This function and the following ones traverse the syntactic tree produced by the parser and try to
# match it to the marker. This is necessary because the IACA markers are significantly different on
# MSVC x86 than on other ISA/compilers. Therefore, simple string matching is not sufficient. Also,
# the syntax of numeric literals depends on the parser and should not be known to this class.
# The matching only checks for a limited number of properties (and the marker doesn't specify the
# rest).
def match_lines(parser, lines, marker):
"""
Find marked section for x86
Returns True iff the `lines` match the `marker`.
:param list lines: kernel
:returns: `tuple of int` -- start and end line of marked section
:param list of `InstructionForm` lines: parsed assembly code.
:param list of `InstructionForm` marker: pattern to match against the `lines`.
:return int: the length of the match in the parsed code, 0 if there is no match.
"""
nop_bytes = [100, 103, 144]
return find_marked_section(
lines,
ParserX86ATT(),
["mov", "movl"],
"ebx",
[111, 222],
nop_bytes,
comments=COMMENT_MARKER,
)
marker_iter = iter(marker)
marker_line = next(marker_iter)
for matched_lines, line in enumerate(lines):
if isinstance(marker_line, list):
# No support for partial matching in lists.
for marker_alternative in marker_line:
matching = match_line(parser, line, marker_alternative)
if matching == Matching.Full:
break
else:
return 0
marker_line = next(marker_iter, None)
else:
matching = match_line(parser, line, marker_line)
if matching == Matching.No:
return 0
elif matching == Matching.Partial:
# Try the same marker line again. The call to `match_line` consumed some of the
# directive parameters.
pass
elif matching == Matching.Full:
# Move to the next marker line, the current one has been fully matched.
marker_line = next(marker_iter, None)
# If we have reached the last marker line, the parsed code matches the marker.
if not marker_line:
return matched_lines + 1
def get_marker(isa, comment=""):
def get_marker(isa, syntax="ATT", comment=""):
"""Return tuple of start and end marker lines."""
isa = isa.lower()
syntax = syntax.lower()
if isa == "x86":
start_marker_raw = (
"movl $111, %ebx # OSACA START MARKER\n"
".byte 100 # OSACA START MARKER\n"
".byte 103 # OSACA START MARKER\n"
".byte 144 # OSACA START MARKER\n"
)
if comment:
start_marker_raw += "# {}\n".format(comment)
end_marker_raw = (
"movl $222, %ebx # OSACA END MARKER\n"
".byte 100 # OSACA END MARKER\n"
".byte 103 # OSACA END MARKER\n"
".byte 144 # OSACA END MARKER\n"
)
if syntax == "att":
start_marker_raw = (
"movl $111, %ebx # OSACA START MARKER\n"
".byte 100 # OSACA START MARKER\n"
".byte 103 # OSACA START MARKER\n"
".byte 144 # OSACA START MARKER\n"
)
if comment:
start_marker_raw += "# {}\n".format(comment)
end_marker_raw = (
"movl $222, %ebx # OSACA END MARKER\n"
".byte 100 # OSACA END MARKER\n"
".byte 103 # OSACA END MARKER\n"
".byte 144 # OSACA END MARKER\n"
)
else:
# Intel syntax
start_marker_raw = (
"movl ebx, 111 # OSACA START MARKER\n"
".byte 100 # OSACA START MARKER\n"
".byte 103 # OSACA START MARKER\n"
".byte 144 # OSACA START MARKER\n"
)
if comment:
start_marker_raw += "# {}\n".format(comment)
end_marker_raw = (
"movl ebx, 222 # OSACA END MARKER\n"
".byte 100 # OSACA END MARKER\n"
".byte 103 # OSACA END MARKER\n"
".byte 144 # OSACA END MARKER\n"
)
elif isa == "aarch64":
start_marker_raw = (
"mov x1, #111 // OSACA START MARKER\n"
@@ -108,92 +172,97 @@ def get_marker(isa, comment=""):
return start_marker, end_marker
def find_marked_section(
lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes, reverse=False, comments=None
):
def match_line(parser, line, marker_line):
"""
Return indexes of marked section
Returns whether `line` matches `marker_line`.
:param list lines: kernel
:param parser: parser to use for checking
:type parser: :class:`~parser.BaseParser`
:param mov_instr: all MOV instruction possible for the marker
:type mov_instr: `list of str`
:param mov_reg: register used for the marker
:type mov_reg: `str`
:param mov_vals: values needed to be moved to ``mov_reg`` for valid marker
:type mov_vals: `list of int`
:param nop_bytes: bytes representing opcode of NOP
:type nop_bytes: `list of int`
:param reverse: indicating if ISA syntax requires reverse operand order, defaults to `False`
:type reverse: boolean, optional
:param comments: dictionary with start and end markers in comment format, defaults to None
:type comments: dict, optional
:returns: `tuple of int` -- start and end line of marked section
:param `IntructionForm` line: parsed assembly code.
:param marker_line `InstructionForm` marker: pattern to match against `line`.
:return: Matching. In case of partial match, `marker_line` is modified and should be reused for
matching the next line in the parsed assembly code.
"""
# TODO match to instructions returned by get_marker
index_start = -1
index_end = -1
for i, line in enumerate(lines):
try:
if line.mnemonic is None and comments is not None and line.comment is not None:
if comments["start"] == line.comment:
index_start = i + 1
elif comments["end"] == line.comment:
index_end = i
elif (
line.mnemonic in mov_instr
and len(lines) > i + 1
and lines[i + 1].directive is not None
):
source = line.operands[0 if not reverse else 1]
destination = line.operands[1 if not reverse else 0]
# instruction pair matches, check for operands
if (
isinstance(source, ImmediateOperand)
and parser.normalize_imd(source) == mov_vals[0]
and isinstance(destination, RegisterOperand)
and parser.get_full_reg_name(destination) == mov_reg
):
# operands of first instruction match start, check for second one
match, line_count = match_bytes(lines, i + 1, nop_bytes)
if match:
# return first line after the marker
index_start = i + 1 + line_count
elif (
isinstance(source, ImmediateOperand)
and parser.normalize_imd(source) == mov_vals[1]
and isinstance(destination, RegisterOperand)
and parser.get_full_reg_name(destination) == mov_reg
):
# operand of first instruction match end, check for second one
match, line_count = match_bytes(lines, i + 1, nop_bytes)
if match:
# return line of the marker
index_end = i
except TypeError:
print(i, line)
if index_start != -1 and index_end != -1:
break
return index_start, index_end
def match_bytes(lines, index, byte_list):
"""Match bytes directives of markers"""
# either all bytes are in one line or in separate ones
extracted_bytes = []
line_count = 0
while (
index < len(lines)
and lines[index].directive is not None
and lines[index].directive.name == "byte"
if (
line.mnemonic
and marker_line.mnemonic
and line.mnemonic == marker_line.mnemonic
and match_operands(line.operands, marker_line.operands)
):
line_count += 1
extracted_bytes += [int(x, 0) for x in lines[index].directive.parameters]
index += 1
if extracted_bytes[0 : len(byte_list)] == byte_list:
return True, line_count
return False, -1
return Matching.Full
if (
line.directive
and marker_line.directive
and line.directive.name == marker_line.directive.name
):
return match_parameters(
parser, line.directive.parameters, marker_line.directive.parameters
)
else:
return Matching.No
def match_operands(line_operands, marker_line_operands):
if len(line_operands) != len(marker_line_operands):
return False
return all(
match_operand(line_operand, marker_line_operand)
for line_operand, marker_line_operand in zip(line_operands, marker_line_operands)
)
def match_operand(line_operand, marker_line_operand):
if (
isinstance(line_operand, ImmediateOperand)
and isinstance(marker_line_operand, ImmediateOperand)
and line_operand.value == marker_line_operand.value
):
return True
if (
isinstance(line_operand, RegisterOperand)
and isinstance(marker_line_operand, RegisterOperand)
and line_operand.name.lower() == marker_line_operand.name.lower()
):
return True
if (
isinstance(line_operand, MemoryOperand)
and isinstance(marker_line_operand, MemoryOperand)
and match_operand(line_operand.base, marker_line_operand.base)
and match_operand(line_operand.offset, line_operand.offset)
):
return True
return False
def match_parameters(parser, line_parameters, marker_line_parameters):
"""
Returns whether `line_parameters` matches `marker_line_parameters`.
:param list of strings line_parameters: parameters of a directive in the parsed assembly code.
:param list of strings marker_line_parameters: parameters of a directive in the marker.
:return: Matching. In case of partial match, `marker_line_parameters` is modified and should be
reused for matching the next line in the parsed assembly code.
"""
# The elements of `marker_line_parameters` are consumed as they are matched.
for line_parameter in line_parameters:
if not marker_line_parameters:
break
marker_line_parameter = marker_line_parameters[0]
if not match_parameter(parser, line_parameter, marker_line_parameter):
return Matching.No
marker_line_parameters.pop(0)
if marker_line_parameters:
return Matching.Partial
else:
return Matching.Full
def match_parameter(parser, line_parameter, marker_line_parameter):
if line_parameter.lower() == marker_line_parameter.lower():
return True
else:
# If the parameters don't match verbatim, check if they represent the same immediate value.
line_immediate = ImmediateOperand(value=line_parameter)
marker_line_immediate = ImmediateOperand(value=marker_line_parameter)
return parser.normalize_imd(line_immediate) == parser.normalize_imd(marker_line_immediate)
def find_jump_labels(lines):

View File

@@ -20,6 +20,8 @@ class TestBaseParser(unittest.TestCase):
pass
with open(self._find_file("triad_x86_iaca.s")) as f:
self.triad_code = f.read()
with open(self._find_file("triad_x86_intel.s")) as f:
self.triad_code_intel = f.read()
with open(self._find_file("triad_arm_iaca.s")) as f:
self.triad_code_arm = f.read()
with open(self._find_file("kernel_x86.s")) as f:
@@ -68,10 +70,11 @@ class TestBaseParser(unittest.TestCase):
self.parser.normalize_imd(imd_hex_1)
def test_detect_ISA(self):
self.assertEqual(BaseParser.detect_ISA(self.triad_code), "x86")
self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), "aarch64")
self.assertEqual(BaseParser.detect_ISA(self.x86_code), "x86")
self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), "aarch64")
self.assertEqual(BaseParser.detect_ISA(self.triad_code), ("x86", "ATT"))
self.assertEqual(BaseParser.detect_ISA(self.triad_code_intel), ("x86", "INTEL"))
self.assertEqual(BaseParser.detect_ISA(self.triad_code_arm), ("aarch64", None))
self.assertEqual(BaseParser.detect_ISA(self.x86_code), ("x86", "ATT"))
self.assertEqual(BaseParser.detect_ISA(self.aarch64_code), ("aarch64", None))
##################
# Helper functions

View File

@@ -12,7 +12,7 @@ from unittest.mock import patch
import osaca.osaca as osaca
from osaca.db_interface import sanity_check
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel
from osaca.semantics import MachineModel
@@ -83,6 +83,7 @@ class TestCLI(unittest.TestCase):
def test_get_parser(self):
self.assertTrue(isinstance(osaca.get_asm_parser("csx"), ParserX86ATT))
self.assertTrue(isinstance(osaca.get_asm_parser("csx", "intel"), ParserX86Intel))
self.assertTrue(isinstance(osaca.get_asm_parser("tx2"), ParserAArch64))
with self.assertRaises(ValueError):
osaca.get_asm_parser("UNKNOWN")

View File

@@ -0,0 +1,102 @@
# Produced with gcc 14.2 with -O3 -march=sapphirerapids -fopenmp-simd -mprefer-vector-width=512, https://godbolt.org/z/drE47x1b4.
.LC3:
.string "%f\n"
main:
push r14
xor edi, edi
push r13
push r12
push rbp
push rbx
call time
mov edi, eax
call srand
mov edi, 1600
call malloc
mov r12, rax
mov rbp, rax
lea r13, [rax+1600]
mov rbx, rax
.L2:
mov edi, 1600
add rbx, 8
call malloc
mov QWORD PTR [rbx-8], rax
cmp r13, rbx
jne .L2
lea rbx, [r12+8]
lea r13, [r12+1592]
.L5:
mov r14d, 8
.L4:
call rand
vxorpd xmm2, xmm2, xmm2
mov rcx, QWORD PTR [rbx]
movsx rdx, eax
mov esi, eax
imul rdx, rdx, 351843721
sar esi, 31
sar rdx, 45
sub edx, esi
imul edx, edx, 100000
sub eax, edx
vcvtsi2sd xmm0, xmm2, eax
vdivsd xmm0, xmm0, QWORD PTR .LC0[rip]
vmovsd QWORD PTR [rcx+r14], xmm0
add r14, 8
cmp r14, 1592
jne .L4
add rbx, 8
cmp r13, rbx
jne .L5
vmovsd xmm1, QWORD PTR .LC1[rip]
lea rdi, [r12+1584]
.L6:
mov rdx, QWORD PTR [rbp+8]
mov rcx, QWORD PTR [rbp+16]
mov eax, 1
mov rsi, QWORD PTR [rbp+0]
vmovsd xmm0, QWORD PTR [rdx]
.L7:
vaddsd xmm0, xmm0, QWORD PTR [rcx+rax*8]
vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]
vaddsd xmm0, xmm0, QWORD PTR [rsi+rax*8]
vmulsd xmm0, xmm0, xmm1
vmovsd QWORD PTR [rdx+rax*8], xmm0
inc rax
cmp rax, 199
jne .L7
vmovsd xmm0, QWORD PTR [rdx+1592]
add rbp, 8
vmovsd QWORD PTR [rcx+8], xmm0
cmp rdi, rbp
jne .L6
mov rax, QWORD PTR [r12+1584]
vmovsd xmm0, QWORD PTR .LC2[rip]
vucomisd xmm0, QWORD PTR [rax+1584]
jp .L9
je .L19
.L9:
pop rbx
xor eax, eax
pop rbp
pop r12
pop r13
pop r14
ret
.L19:
mov rax, QWORD PTR [r12]
mov edi, OFFSET FLAT:.LC3
vmovsd xmm0, QWORD PTR [rax]
mov eax, 1
call printf
jmp .L9
.LC0:
.long 0
.long 1083129856
.LC1:
.long 2061584302
.long 1072934420
.LC2:
.long -57724360
.long 1072939201

View File

@@ -0,0 +1,227 @@
# Produced with ICC 2021.10.0 with -O3 -xcore-avx512, https://godbolt.org/z/87bYseh8r
..B1.1: # Preds ..B1.0
push rbp #5.32
mov rbp, rsp #5.32
and rsp, -128 #5.32
push r15 #5.32
push rbx #5.32
sub rsp, 112 #5.32
mov edi, 3 #5.32
mov rsi, 0x64199d9ffe #5.32
call __intel_new_feature_proc_init #5.32
..B1.34: # Preds ..B1.1
vstmxcsr DWORD PTR [rsp] #5.32
xor edi, edi #11.7
or DWORD PTR [rsp], 32832 #5.32
vldmxcsr DWORD PTR [rsp] #5.32
call time #11.7
..B1.2: # Preds ..B1.34
mov edi, eax #11.1
call srand #11.1
..B1.3: # Preds ..B1.2
mov edi, 1600 #13.16
call malloc #13.16
..B1.35: # Preds ..B1.3
mov rsi, rax #13.16
..B1.4: # Preds ..B1.35
xor eax, eax #14.1
mov rbx, rsi #14.1
mov r15, rax #14.1
..B1.5: # Preds ..B1.6 ..B1.4
mov edi, 1600 #15.22
call malloc #15.22
..B1.6: # Preds ..B1.5
mov QWORD PTR [rbx+r15*8], rax #15.5
inc r15 #14.1
cmp r15, 200 #14.1
jb ..B1.5 # Prob 82% #14.1
..B1.7: # Preds ..B1.6
xor eax, eax #17.1
mov rsi, rbx #
mov r15, rax #19.44
mov QWORD PTR [rsp], r13 #19.44[spill]
mov QWORD PTR [8+rsp], r14 #19.44[spill]
..B1.8: # Preds ..B1.11 ..B1.7
mov r13, QWORD PTR [8+rbx+r15*8] #19.5
xor r14d, r14d #18.3
..B1.9: # Preds ..B1.10 ..B1.8
call rand #19.26
..B1.37: # Preds ..B1.9
mov r8d, eax #19.26
..B1.10: # Preds ..B1.37
mov eax, 351843721 #19.33
mov ecx, r8d #19.33
imul r8d #19.33
sar ecx, 31 #19.33
vxorpd xmm0, xmm0, xmm0 #19.33
sar edx, 13 #19.33
sub edx, ecx #19.33
imul edi, edx, -100000 #19.33
add r8d, edi #19.33
vcvtsi2sd xmm0, xmm0, r8d #19.33
vdivsd xmm1, xmm0, QWORD PTR .L_2il0floatpacket.0[rip] #19.44
vmovsd QWORD PTR [8+r13+r14*8], xmm1 #19.5
inc r14 #18.3
cmp r14, 198 #18.3
jb ..B1.9 # Prob 82% #18.3
..B1.11: # Preds ..B1.10
inc r15 #17.1
cmp r15, 198 #17.1
jb ..B1.8 # Prob 91% #17.1
..B1.12: # Preds ..B1.11
mov r13, QWORD PTR [rsp] #[spill]
mov rsi, rbx #
mov r14, QWORD PTR [8+rsp] #[spill]
xor ecx, ecx #23.1
vmovsd xmm0, QWORD PTR .L_2il0floatpacket.1[rip] #10.14
xor dil, dil #10.14
mov edx, 196 #10.14
..B1.13: # Preds ..B1.27 ..B1.12
mov rax, QWORD PTR [8+rsi+rcx*8] #25.5
mov r8, rax #25.5
lea r9, QWORD PTR [8+rax] #25.5
sub r8, r9 #25.5
cmp r8, 1584 #24.3
jge ..B1.15 # Prob 50% #24.3
..B1.14: # Preds ..B1.13
neg r8 #26.7
cmp r8, 1584 #24.3
jl ..B1.22 # Prob 50% #24.3
..B1.15: # Preds ..B1.13 ..B1.14
lea r8, QWORD PTR [16+rax] #27.9
sub r9, r8 #27.9
cmp r9, 1584 #24.3
jge ..B1.17 # Prob 50% #24.3
..B1.16: # Preds ..B1.15
neg r9 #25.5
cmp r9, 1584 #24.3
jl ..B1.22 # Prob 50% #24.3
..B1.17: # Preds ..B1.15 ..B1.16
vmovsd xmm1, QWORD PTR [rax] #27.9
mov bl, dil #24.3
mov r9, QWORD PTR [rsi+rcx*8] #27.21
xor r11d, r11d #25.5
mov r10, QWORD PTR [16+rsi+rcx*8] #26.19
mov r8, QWORD PTR [8+rsi+rcx*8] #27.9
..B1.18: # Preds ..B1.18 ..B1.17
vmovsd xmm2, QWORD PTR [8+r11+r10] #26.19
inc bl #24.3
vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #25.5
vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #25.5
vaddsd xmm1, xmm4, xmm1 #25.5
vmulsd xmm8, xmm0, xmm1 #27.21
vmovsd QWORD PTR [8+r11+r8], xmm8 #25.5
vmovsd xmm5, QWORD PTR [16+r11+r10] #26.19
vaddsd xmm6, xmm5, QWORD PTR [24+r11+r8] #26.19
vaddsd xmm7, xmm6, QWORD PTR [16+r11+r9] #27.9
vaddsd xmm9, xmm7, xmm8 #27.21
vmulsd xmm13, xmm0, xmm9 #27.21
vmovsd QWORD PTR [16+r11+r8], xmm13 #25.5
vmovsd xmm10, QWORD PTR [24+r11+r10] #26.19
vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #26.19
vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.9
vaddsd xmm14, xmm12, xmm13 #27.21
vmulsd xmm18, xmm0, xmm14 #27.21
vmovsd QWORD PTR [24+r11+r8], xmm18 #25.5
vmovsd xmm15, QWORD PTR [32+r11+r10] #26.19
vaddsd xmm16, xmm15, QWORD PTR [40+r11+r8] #26.19
vaddsd xmm17, xmm16, QWORD PTR [32+r11+r9] #27.9
vaddsd xmm19, xmm17, xmm18 #27.21
vmulsd xmm1, xmm0, xmm19 #27.21
vmovsd QWORD PTR [32+r11+r8], xmm1 #25.5
add r11, 32 #24.3
cmp bl, 49 #24.3
jb ..B1.18 # Prob 27% #24.3
..B1.19: # Preds ..B1.18
mov r11, rdx #24.3
..B1.20: # Preds ..B1.20 ..B1.19
vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7
vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19
vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9
vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21
vmulsd xmm5, xmm0, xmm4 #27.21
vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5
inc r11 #24.3
cmp r11, 198 #24.3
jb ..B1.20 # Prob 66% #24.3
jmp ..B1.27 # Prob 100% #24.3
..B1.22: # Preds ..B1.14 ..B1.16
mov r9, QWORD PTR [rsi+rcx*8] #27.21
mov bl, dil #24.3
mov r10, QWORD PTR [16+rsi+rcx*8] #26.19
xor r11d, r11d #25.5
mov r8, QWORD PTR [8+rsi+rcx*8] #26.7
..B1.23: # Preds ..B1.23 ..B1.22
inc bl #24.3
vmovsd xmm1, QWORD PTR [r11+r8] #26.7
vaddsd xmm2, xmm1, QWORD PTR [8+r11+r10] #26.19
vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #27.9
vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #27.21
vmulsd xmm5, xmm0, xmm4 #27.21
vmovsd QWORD PTR [8+r11+r8], xmm5 #25.5
vaddsd xmm6, xmm5, QWORD PTR [16+r11+r10] #26.19
vaddsd xmm7, xmm6, QWORD PTR [24+r11+r8] #27.9
vaddsd xmm8, xmm7, QWORD PTR [16+r11+r9] #27.21
vmulsd xmm9, xmm0, xmm8 #27.21
vmovsd QWORD PTR [16+r11+r8], xmm9 #25.5
vaddsd xmm10, xmm9, QWORD PTR [24+r11+r10] #26.19
vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #27.9
vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.21
vmulsd xmm13, xmm0, xmm12 #27.21
vmovsd QWORD PTR [24+r11+r8], xmm13 #25.5
vaddsd xmm14, xmm13, QWORD PTR [32+r11+r10] #26.19
vaddsd xmm15, xmm14, QWORD PTR [40+r11+r8] #27.9
vaddsd xmm16, xmm15, QWORD PTR [32+r11+r9] #27.21
vmulsd xmm17, xmm0, xmm16 #27.21
vmovsd QWORD PTR [32+r11+r8], xmm17 #25.5
add r11, 32 #24.3
cmp bl, 49 #24.3
jb ..B1.23 # Prob 27% #24.3
..B1.24: # Preds ..B1.23
mov r11, rdx #24.3
..B1.25: # Preds ..B1.25 ..B1.24
vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7
vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19
vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9
vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21
vmulsd xmm5, xmm0, xmm4 #27.21
vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5
inc r11 #24.3
cmp r11, 198 #24.3
jb ..B1.25 # Prob 66% #24.3
..B1.27: # Preds ..B1.25 ..B1.20
mov r8, QWORD PTR [16+rsi+rcx*8] #30.3
inc rcx #23.1
mov rax, QWORD PTR [1592+rax] #30.15
mov QWORD PTR [8+r8], rax #30.3
cmp rcx, 198 #23.1
jb ..B1.13 # Prob 91% #23.1
..B1.28: # Preds ..B1.27
mov rax, QWORD PTR [1584+rsi] #33.4
vmovsd xmm0, QWORD PTR [1584+rax] #33.4
vucomisd xmm0, QWORD PTR .L_2il0floatpacket.2[rip] #33.29
jp ..B1.29 # Prob 0% #33.29
je ..B1.30 # Prob 5% #33.29
..B1.29: # Preds ..B1.28 ..B1.30
xor eax, eax #34.1
add rsp, 112 #34.1
pop rbx #34.1
pop r15 #34.1
mov rsp, rbp #34.1
pop rbp #34.1
ret #34.1
..B1.30: # Preds ..B1.28
mov rax, QWORD PTR [rsi] #33.39
mov edi, offset flat: .L_2__STRING.0 #33.39
vmovsd xmm0, QWORD PTR [rax] #33.39
mov eax, 1 #33.39
call printf #33.39
jmp ..B1.29 # Prob 100% #33.39
.L_2il0floatpacket.0:
.long 0x00000000,0x408f4000
.L_2il0floatpacket.1:
.long 0x7ae147ae,0x3ff3ae14
.L_2il0floatpacket.2:
.long 0xfc8f3238,0x3ff3c0c1
.L_2__STRING.0:
.long 681509

View File

@@ -0,0 +1,9 @@
; https://godbolt.org/z/o49jjojnx /std:c++latest /O1 /fp:contract /arch:AVX2
$LL13@foo:
vmovsd xmm1, QWORD PTR [rax]
vmovsd xmm0, QWORD PTR [rcx+rax]
vfmadd213sd xmm1, xmm0, QWORD PTR [rdx+rax]
vmovsd QWORD PTR [r8+rax], xmm1
lea rax, QWORD PTR [rax+8]
sub rbx, 1
jne SHORT $LL13@foo

View File

@@ -0,0 +1,19 @@
; Translated from kernel_x86_memdep.s
L4:
vmovsd [rax+8], xmm0 # line 3 <---------------------------------+
add rax, 8 # rax=rax_orig+8 |
vmovsd [rax+rcx*8+8], xmm0 # line 5 <------------------------------------------+
vaddsd xmm0, xmm0, [rax] # depends on line 3, rax+8;[rax] == [rax+8] --------+ |
sub rax, -8 # rax=rax_orig+16 | |
vaddsd xmm0, xmm0, [rax-8] # depends on line 3, rax+16;[rax-8] == [rax+8] -----+ |
dec rcx # rcx=rcx_orig-1 |
vaddsd xmm0, xmm0, [rax+rcx*8+8] # depends on line 5, [(rax+8)+(rcx-1)*8+8] == [rax+rcx*+8] --+
mov rdx, rcx # |
vaddsd xmm0, xmm0, [rax+rdx*8+8] # depends on line 5, rcx == rdx -----------------------------+
vmulsd xmm0, xmm0, xmm1
add rax, 8
cmp rsi, rax
jne L4
; Added to test LOAD dependencies
shl rax, 5
subsd xmm10, QWORD PTR [rax+r8]

View File

@@ -0,0 +1,124 @@
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0
include listing.inc
INCLUDELIB MSVCRTD
INCLUDELIB OLDNAMES
msvcjmc SEGMENT
__FAC6D534_triad@c DB 01H
msvcjmc ENDS
PUBLIC kernel
PUBLIC __JustMyCode_Default
EXTRN dummy:PROC
EXTRN _RTC_InitBase:PROC
EXTRN _RTC_Shutdown:PROC
EXTRN __CheckForDebuggerJustMyCode:PROC
EXTRN _fltused:DWORD
; COMDAT pdata
pdata SEGMENT
$pdata$kernel DD imagerel $LN9
DD imagerel $LN9+194
DD imagerel $unwind$kernel
pdata ENDS
; COMDAT rtc$TMZ
rtc$TMZ SEGMENT
_RTC_Shutdown.rtc$TMZ DQ FLAT:_RTC_Shutdown
rtc$TMZ ENDS
; COMDAT rtc$IMZ
rtc$IMZ SEGMENT
_RTC_InitBase.rtc$IMZ DQ FLAT:_RTC_InitBase
rtc$IMZ ENDS
; COMDAT xdata
xdata SEGMENT
$unwind$kernel DD 025052301H
DD 011e2323H
DD 070170025H
DD 05016H
xdata ENDS
; Function compile flags: /Odt
; COMDAT __JustMyCode_Default
_TEXT SEGMENT
__JustMyCode_Default PROC ; COMDAT
ret 0
__JustMyCode_Default ENDP
_TEXT ENDS
; Function compile flags: /Odtp /RTCsu /ZI
; COMDAT kernel
_TEXT SEGMENT
r$1 = 4
i$2 = 36
a$ = 288
b$ = 296
c$ = 304
s$ = 312
repeat$ = 320
cur_elements$ = 328
kernel PROC ; COMDAT
; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c
; Line 16
$LN9:
movsd QWORD PTR [rsp+32], xmm3
mov QWORD PTR [rsp+24], r8
mov QWORD PTR [rsp+16], rdx
mov QWORD PTR [rsp+8], rcx
push rbp
push rdi
sub rsp, 296 ; 00000128H
lea rbp, QWORD PTR [rsp+32]
lea rcx, OFFSET FLAT:__FAC6D534_triad@c
call __CheckForDebuggerJustMyCode
npad 1
; Line 17
mov DWORD PTR r$1[rbp], 0
jmp SHORT $LN4@kernel
$LN2@kernel:
mov eax, DWORD PTR r$1[rbp]
inc eax
mov DWORD PTR r$1[rbp], eax
$LN4@kernel:
mov eax, DWORD PTR repeat$[rbp]
cmp DWORD PTR r$1[rbp], eax
jge SHORT $LN3@kernel
; Line 18
mov DWORD PTR i$2[rbp], 0
jmp SHORT $LN7@kernel
$LN5@kernel:
mov eax, DWORD PTR i$2[rbp]
inc eax
mov DWORD PTR i$2[rbp], eax
$LN7@kernel:
mov eax, DWORD PTR cur_elements$[rbp]
cmp DWORD PTR i$2[rbp], eax
jge SHORT $LN6@kernel
; Line 19
movsxd rax, DWORD PTR i$2[rbp]
movsxd rcx, DWORD PTR i$2[rbp]
mov rdx, QWORD PTR c$[rbp]
movsd xmm0, QWORD PTR s$[rbp]
mulsd xmm0, QWORD PTR [rdx+rcx*8]
mov rcx, QWORD PTR b$[rbp]
movsd xmm1, QWORD PTR [rcx+rax*8]
addsd xmm1, xmm0
movaps xmm0, xmm1
movsxd rax, DWORD PTR i$2[rbp]
mov rcx, QWORD PTR a$[rbp]
movsd QWORD PTR [rcx+rax*8], xmm0
; Line 20
jmp SHORT $LN5@kernel
$LN6@kernel:
; Line 21
mov rcx, QWORD PTR a$[rbp]
call dummy
npad 1
; Line 22
jmp SHORT $LN2@kernel
$LN3@kernel:
; Line 23
lea rsp, QWORD PTR [rbp+264]
pop rdi
pop rbp
ret 0
kernel ENDP
_TEXT ENDS
END

View File

@@ -0,0 +1,139 @@
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0
include listing.inc
INCLUDELIB MSVCRTD
INCLUDELIB OLDNAMES
msvcjmc SEGMENT
__68D132EB_concurrencysal@h DB 01H
__4DC47379_sal@h DB 01H
__B6ADDB23_vadefs@h DB 01H
__A2A1025A_vcruntime@h DB 01H
__0EF3BC42_intrin0@inl@h DB 01H
__5EC35D46_setjmp@h DB 01H
__368E74E0_mmintrin@h DB 01H
__735960E1_corecrt@h DB 01H
__211DB995_corecrt_malloc@h DB 01H
__7CD62D9E_malloc@h DB 01H
__22746E0E_xmmintrin@h DB 01H
__4716E7C2_emmintrin@h DB 01H
__98B78F4B_pmmintrin@h DB 01H
__286EFCC9_tmmintrin@h DB 01H
__0155E94A_smmintrin@h DB 01H
__64376086_nmmintrin@h DB 01H
__B18C9AC8_wmmintrin@h DB 01H
__7A18D7CF_zmmintrin@h DB 01H
__4D0C7505_immintrin@h DB 01H
__F7CF9440_ammintrin@h DB 01H
__78F5E131_intrin@h DB 01H
__6A584D4A_iacaMarks@h DB 01H
__FAC6D534_triad@c DB 01H
msvcjmc ENDS
PUBLIC kernel
PUBLIC __JustMyCode_Default
EXTRN dummy:PROC
EXTRN __CheckForDebuggerJustMyCode:PROC
EXTRN _fltused:DWORD
; COMDAT pdata
pdata SEGMENT
$pdata$kernel DD imagerel $LN18
DD imagerel $LN18+182
DD imagerel $unwind$kernel
pdata ENDS
; COMDAT voltbl
voltbl SEGMENT
_volmd DB 05bH
DB 079H
voltbl ENDS
; COMDAT xdata
xdata SEGMENT
$unwind$kernel DD 0c2001H
DD 026820H
DD 0b7419H
DD 0a6419H
DD 095419H
DD 083419H
DD 0e0155219H
xdata ENDS
; Function compile flags: /Odt
; COMDAT __JustMyCode_Default
_TEXT SEGMENT
__JustMyCode_Default PROC ; COMDAT
ret 0
__JustMyCode_Default ENDP
_TEXT ENDS
; Function compile flags: /Ogspy
; COMDAT kernel
_TEXT SEGMENT
a$ = 64
b$ = 72
c$ = 80
s$ = 88
repeat$ = 96
cur_elements$ = 104
kernel PROC ; COMDAT
; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c
; Line 22
$LN18:
mov rax, rsp
mov QWORD PTR [rax+8], rbx
mov QWORD PTR [rax+16], rbp
mov QWORD PTR [rax+24], rsi
mov QWORD PTR [rax+32], rdi
push r14
sub rsp, 48 ; 00000030H
mov rbp, rcx
movaps XMMWORD PTR [rax-24], xmm6
lea rcx, OFFSET FLAT:__FAC6D534_triad@c
movaps xmm6, xmm3
mov r14, r8
mov rdi, rdx
call __CheckForDebuggerJustMyCode
mov eax, DWORD PTR repeat$[rsp]
movsxd rsi, DWORD PTR cur_elements$[rsp]
test eax, eax
jle SHORT $LN3@kernel
mov ebx, eax
$LL4@kernel:
; Line 24
test rsi, rsi
jle SHORT $LN6@kernel
mov rcx, r14
mov rdx, rbp
sub rcx, rdi
mov rax, rdi
sub rdx, rdi
mov r8, rsi
$LL7@kernel:
; Line 26
mov BYTE PTR gs:111, 111 ; 0000006fH
; Line 28
movaps xmm0, xmm6
mulsd xmm0, QWORD PTR [rax+rcx]
addsd xmm0, QWORD PTR [rax]
movsd QWORD PTR [rdx+rax], xmm0
add rax, 8
; Line 30
mov BYTE PTR gs:222, 222 ; 000000deH
sub r8, 1
jne SHORT $LL7@kernel
$LN6@kernel:
; Line 33
mov rcx, rbp
call dummy
sub rbx, 1
jne SHORT $LL4@kernel
$LN3@kernel:
; Line 35
mov rbx, QWORD PTR [rsp+64]
mov rbp, QWORD PTR [rsp+72]
mov rsi, QWORD PTR [rsp+80]
mov rdi, QWORD PTR [rsp+88]
movaps xmm6, XMMWORD PTR [rsp+32]
add rsp, 48 ; 00000030H
pop r14
ret 0
kernel ENDP
_TEXT ENDS
END

View File

@@ -34,14 +34,19 @@ class TestFrontend(unittest.TestCase):
)
self.machine_model_tx2 = MachineModel(arch="tx2")
self.semantics_csx = ArchSemantics(
self.parser_x86,
self.machine_model_csx,
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"),
)
self.semantics_tx2 = ArchSemantics(
self.parser_AArch64,
self.machine_model_tx2,
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/aarch64.yml"),
)
self.semantics_csx.normalize_instruction_forms(self.kernel_x86)
self.semantics_tx2.normalize_instruction_forms(self.kernel_AArch64)
for i in range(len(self.kernel_x86)):
self.semantics_csx.assign_src_dst(self.kernel_x86[i])
self.semantics_csx.assign_tp_lt(self.kernel_x86[i])
@@ -114,7 +119,7 @@ class TestFrontend(unittest.TestCase):
self.assertEqual(line.line_number, analysis_dict["Kernel"][i]["LineNumber"])
def test_dict_output_AArch64(self):
reduced_kernel = reduce_to_section(self.kernel_AArch64, self.semantics_tx2._isa)
reduced_kernel = reduce_to_section(self.kernel_AArch64, self.parser_AArch64)
dg = KernelDG(
reduced_kernel,
self.parser_AArch64,

View File

@@ -12,37 +12,47 @@ from osaca.semantics import (
find_jump_labels,
find_basic_loop_bodies,
)
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel
class TestMarkerUtils(unittest.TestCase):
@classmethod
def setUpClass(self):
self.parser_AArch = ParserAArch64()
self.parser_x86 = ParserX86ATT()
self.parser_x86_att = ParserX86ATT()
self.parser_x86_intel = ParserX86Intel()
with open(self._find_file("triad_arm_iaca.s")) as f:
triad_code_arm = f.read()
with open(self._find_file("triad_x86_iaca.s")) as f:
triad_code_x86 = f.read()
triad_code_x86_att = f.read()
with open(self._find_file("triad_x86_intel_iaca.s")) as f:
triad_code_x86_intel = f.read()
self.parsed_AArch = self.parser_AArch.parse_file(triad_code_arm)
self.parsed_x86 = self.parser_x86.parse_file(triad_code_x86)
self.parsed_x86_att = self.parser_x86_att.parse_file(triad_code_x86_att)
self.parsed_x86_intel = self.parser_x86_intel.parse_file(triad_code_x86_intel)
#################
# Test
#################
def test_marker_detection_AArch64(self):
kernel = reduce_to_section(self.parsed_AArch, "AArch64")
kernel = reduce_to_section(self.parsed_AArch, ParserAArch64())
self.assertEqual(len(kernel), 138)
self.assertEqual(kernel[0].line_number, 307)
self.assertEqual(kernel[-1].line_number, 444)
def test_marker_detection_x86(self):
kernel = reduce_to_section(self.parsed_x86, "x86")
def test_marker_detection_x86_att(self):
kernel = reduce_to_section(self.parsed_x86_att, ParserX86ATT())
self.assertEqual(len(kernel), 9)
self.assertEqual(kernel[0].line_number, 146)
self.assertEqual(kernel[-1].line_number, 154)
def test_marker_detection_x86_intel(self):
kernel = reduce_to_section(self.parsed_x86_intel, ParserX86Intel())
self.assertEqual(len(kernel), 7)
self.assertEqual(kernel[0].line_number, 111)
self.assertEqual(kernel[-1].line_number, 117)
def test_marker_matching_AArch64(self):
# preparation
bytes_1_line = ".byte 213,3,32,31\n"
@@ -108,7 +118,7 @@ class TestMarkerUtils(unittest.TestCase):
bytes_end=bytes_var_2,
):
sample_parsed = self.parser_AArch.parse_file(sample_code)
sample_kernel = reduce_to_section(sample_parsed, "AArch64")
sample_kernel = reduce_to_section(sample_parsed, ParserAArch64())
self.assertEqual(len(sample_kernel), kernel_length)
kernel_start = len(
list(
@@ -179,8 +189,8 @@ class TestMarkerUtils(unittest.TestCase):
mov_end=mov_end_var,
bytes_end=bytes_var_2,
):
sample_parsed = self.parser_x86.parse_file(sample_code)
sample_kernel = reduce_to_section(sample_parsed, "x86")
sample_parsed = self.parser_x86_att.parse_file(sample_code)
sample_kernel = reduce_to_section(sample_parsed, ParserX86ATT())
self.assertEqual(len(sample_kernel), kernel_length)
kernel_start = len(
list(
@@ -190,7 +200,7 @@ class TestMarkerUtils(unittest.TestCase):
)
)
)
parsed_kernel = self.parser_x86.parse_file(
parsed_kernel = self.parser_x86_att.parse_file(
kernel, start_line=kernel_start
)
self.assertEqual(sample_kernel, parsed_kernel)
@@ -222,7 +232,7 @@ class TestMarkerUtils(unittest.TestCase):
for test_name, pro, kernel, epi in samples:
code = pro + kernel + epi
parsed = self.parser_AArch.parse_file(code)
test_kernel = reduce_to_section(parsed, "AArch64")
test_kernel = reduce_to_section(parsed, ParserAArch64())
if kernel:
kernel_length = len(kernel.strip().split("\n"))
else:
@@ -230,7 +240,7 @@ class TestMarkerUtils(unittest.TestCase):
self.assertEqual(
len(test_kernel),
kernel_length,
msg="Invalid exctracted kernel length on {!r} sample".format(test_name),
msg="Invalid extracted kernel length on {!r} sample".format(test_name),
)
if pro:
kernel_start = len((pro).strip().split("\n"))
@@ -240,7 +250,7 @@ class TestMarkerUtils(unittest.TestCase):
self.assertEqual(
test_kernel,
parsed_kernel,
msg="Invalid exctracted kernel on {!r}".format(test_name),
msg="Invalid extracted kernel on {!r}".format(test_name),
)
def test_marker_special_cases_x86(self):
@@ -269,8 +279,8 @@ class TestMarkerUtils(unittest.TestCase):
for test_name, pro, kernel, epi in samples:
code = pro + kernel + epi
parsed = self.parser_x86.parse_file(code)
test_kernel = reduce_to_section(parsed, "x86")
parsed = self.parser_x86_att.parse_file(code)
test_kernel = reduce_to_section(parsed, ParserX86ATT())
if kernel:
kernel_length = len(kernel.strip().split("\n"))
else:
@@ -278,23 +288,23 @@ class TestMarkerUtils(unittest.TestCase):
self.assertEqual(
len(test_kernel),
kernel_length,
msg="Invalid exctracted kernel length on {!r} sample".format(test_name),
msg="Invalid extracted kernel length on {!r} sample".format(test_name),
)
if pro:
kernel_start = len((pro).strip().split("\n"))
else:
kernel_start = 0
parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
parsed_kernel = self.parser_x86_att.parse_file(kernel, start_line=kernel_start)
self.assertEqual(
test_kernel,
parsed_kernel,
msg="Invalid exctracted kernel on {!r}".format(test_name),
msg="Invalid extracted kernel on {!r}".format(test_name),
)
def test_find_jump_labels(self):
self.assertEqual(
find_jump_labels(self.parsed_x86),
find_jump_labels(self.parsed_x86_att),
OrderedDict(
[
(".LFB24", 10),
@@ -358,7 +368,7 @@ class TestMarkerUtils(unittest.TestCase):
self.assertEqual(
[
(k, v[0].line_number, v[-1].line_number)
for k, v in find_basic_blocks(self.parsed_x86).items()
for k, v in find_basic_blocks(self.parsed_x86_att).items()
],
[
(".LFB24", 11, 56),
@@ -422,7 +432,7 @@ class TestMarkerUtils(unittest.TestCase):
self.assertEqual(
[
(k, v[0].line_number, v[-1].line_number)
for k, v in find_basic_loop_bodies(self.parsed_x86).items()
for k, v in find_basic_loop_bodies(self.parsed_x86_att).items()
],
[(".L4", 66, 74), (".L10", 146, 154), (".L28", 290, 300)],
)

441
tests/test_parser_x86intel.py Executable file
View File

@@ -0,0 +1,441 @@
#!/usr/bin/env python3
"""
Unit tests for x86 Intel assembly parser
"""
import os
import unittest
from osaca.parser import ParserX86Intel, InstructionForm
from osaca.parser.directive import DirectiveOperand
from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand
from osaca.parser.label import LabelOperand
from osaca.parser.memory import MemoryOperand
from osaca.parser.register import RegisterOperand
class TestParserX86Intel(unittest.TestCase):
@classmethod
def setUpClass(self):
self.parser = ParserX86Intel()
with open(self._find_file("triad_x86_intel.s")) as f:
self.triad_code = f.read()
with open(self._find_file("triad_x86_intel_iaca.s")) as f:
self.triad_iaca_code = f.read()
with open(self._find_file("gs_x86_icc.s")) as f:
self.gs_icc_code = f.read()
with open(self._find_file("gs_x86_gcc.s")) as f:
self.gs_gcc_code = f.read()
##################
# Test
##################
def test_comment_parser(self):
self.assertEqual(self._get_comment(self.parser, "; some comments"), "some comments")
self.assertEqual(self._get_comment(self.parser, "\t\t;AA BB CC \t end \t"), "AA BB CC end")
self.assertEqual(
self._get_comment(self.parser, "\t;; comment ;; comment"),
"; comment ;; comment",
)
def test_label_parser(self):
self.assertEqual(self._get_label(self.parser, "main:")[0].name, "main")
self.assertEqual(self._get_label(self.parser, "$$B1?10:")[0].name, "$$B1?10")
self.assertEqual(
self._get_label(self.parser, "$LN9:\tcall\t__CheckForDebuggerJustMyCode")[0].name,
"$LN9",
)
self.assertEqual(
self._get_label(self.parser, "$LN9:\tcall\t__CheckForDebuggerJustMyCode")[1],
InstructionForm(
mnemonic="call",
operands=[
{"identifier": {"name": "__CheckForDebuggerJustMyCode"}},
],
directive_id=None,
comment_id=None,
label_id=None,
line=None,
line_number=None,
),
)
def test_directive_parser(self):
self.assertEqual(
self._get_directive(self.parser, "\t.allocstack 16")[0],
DirectiveOperand(name=".allocstack", parameters=["16"]),
)
self.assertEqual(
self._get_directive(self.parser, "INCLUDELIB MSVCRTD")[0],
DirectiveOperand(name="INCLUDELIB", parameters=["MSVCRTD"]),
)
self.assertEqual(
self._get_directive(self.parser, "msvcjmc\tSEGMENT")[0],
DirectiveOperand(name="SEGMENT", parameters=["msvcjmc"]),
)
self.assertEqual(
self._get_directive(self.parser, "EXTRN\t_RTC_InitBase:PROC")[0],
DirectiveOperand(name="EXTRN", parameters=["_RTC_InitBase:PROC"]),
)
self.assertEqual(
self._get_directive(self.parser, "$pdata$kernel DD imagerel $LN9")[0],
DirectiveOperand(name="DD", parameters=["$pdata$kernel", "imagerel", "$LN9"]),
)
self.assertEqual(
self._get_directive(self.parser, "repeat$ = 320")[0],
DirectiveOperand(name="=", parameters=["repeat$", "320"]),
)
def test_parse_instruction(self):
instr1 = "\tsub\trsp, 296\t\t\t\t; 00000128H"
instr2 = " fst ST(3)\t; Good ol' x87."
instr3 = "\tmulsd\txmm0, QWORD PTR [rdx+rcx*8]"
instr4 = "\tmov\teax, DWORD PTR cur_elements$[rbp]"
instr5 = "\tmov\tQWORD PTR [rsp+24], r8"
instr6 = "\tjmp\tSHORT $LN2@kernel"
instr7 = "\tlea\trcx, OFFSET FLAT:__FAC6D534_triad@c"
instr8 = "\tmov\tBYTE PTR gs:111, al"
instr9 = "\tlea\tr8, QWORD PTR [r8*4]"
instr10 = "\tmovsd\txmm1, QWORD PTR boost@@XZ@4V456@A+16"
instr11 = "\tlea\trcx, OFFSET FLAT:??_R0N@8+8"
instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555"
instr13 = "\tjmp\t$LN18@operator"
instr14 = "vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]"
parsed_1 = self.parser.parse_instruction(instr1)
parsed_2 = self.parser.parse_instruction(instr2)
parsed_3 = self.parser.parse_instruction(instr3)
parsed_4 = self.parser.parse_instruction(instr4)
parsed_5 = self.parser.parse_instruction(instr5)
parsed_6 = self.parser.parse_instruction(instr6)
parsed_7 = self.parser.parse_instruction(instr7)
parsed_8 = self.parser.parse_instruction(instr8)
parsed_9 = self.parser.parse_instruction(instr9)
parsed_10 = self.parser.parse_instruction(instr10)
parsed_11 = self.parser.parse_instruction(instr11)
parsed_12 = self.parser.parse_instruction(instr12)
parsed_13 = self.parser.parse_instruction(instr13)
parsed_14 = self.parser.parse_instruction(instr14)
self.assertEqual(parsed_1.mnemonic, "sub")
self.assertEqual(parsed_1.operands[0], RegisterOperand(name="RSP"))
self.assertEqual(parsed_1.operands[1], ImmediateOperand(value=296))
self.assertEqual(parsed_1.comment, "00000128H")
self.assertEqual(parsed_2.mnemonic, "fst")
self.assertEqual(parsed_2.operands[0], RegisterOperand(name="ST(3)"))
self.assertEqual(parsed_2.comment, "Good ol' x87.")
self.assertEqual(parsed_3.mnemonic, "mulsd")
self.assertEqual(parsed_3.operands[0], RegisterOperand(name="XMM0"))
self.assertEqual(
parsed_3.operands[1],
MemoryOperand(
base=RegisterOperand(name="RDX"), index=RegisterOperand(name="RCX"), scale=8
),
)
self.assertEqual(parsed_4.mnemonic, "mov")
self.assertEqual(parsed_4.operands[0], RegisterOperand(name="EAX"))
self.assertEqual(
parsed_4.operands[1],
MemoryOperand(
offset=ImmediateOperand(identifier="cur_elements$", value=104),
base=RegisterOperand(name="RBP"),
),
)
self.assertEqual(parsed_5.mnemonic, "mov")
self.assertEqual(
parsed_5.operands[0],
MemoryOperand(offset=ImmediateOperand(value=24), base=RegisterOperand(name="RSP")),
)
self.assertEqual(parsed_5.operands[1], RegisterOperand(name="R8"))
self.assertEqual(parsed_6.mnemonic, "jmp")
self.assertEqual(parsed_6.operands[0], LabelOperand(name="$LN2@kernel"))
self.assertEqual(parsed_7.mnemonic, "lea")
self.assertEqual(parsed_7.operands[0], RegisterOperand(name="RCX"))
self.assertEqual(
parsed_7.operands[1],
MemoryOperand(offset=IdentifierOperand(name="__FAC6D534_triad@c")),
)
self.assertEqual(parsed_8.mnemonic, "mov")
self.assertEqual(
parsed_8.operands[0],
MemoryOperand(base=RegisterOperand(name="GS"), offset=ImmediateOperand(value=111)),
)
self.assertEqual(parsed_8.operands[1], RegisterOperand(name="AL"))
self.assertEqual(parsed_9.mnemonic, "lea")
self.assertEqual(parsed_9.operands[0], RegisterOperand(name="R8"))
self.assertEqual(
parsed_9.operands[1],
MemoryOperand(base=None, index=RegisterOperand(name="R8"), scale=4),
)
self.assertEqual(parsed_10.mnemonic, "movsd")
self.assertEqual(parsed_10.operands[0], RegisterOperand(name="XMM1"))
self.assertEqual(
parsed_10.operands[1],
MemoryOperand(
offset=IdentifierOperand(
name="boost@@XZ@4V456@A", offset=ImmediateOperand(value=16)
)
),
)
self.assertEqual(parsed_11.mnemonic, "lea")
self.assertEqual(parsed_11.operands[0], RegisterOperand(name="RCX"))
self.assertEqual(
parsed_11.operands[1],
MemoryOperand(
offset=IdentifierOperand(name="??_R0N@8", offset=ImmediateOperand(value=8))
),
)
self.assertEqual(parsed_12.mnemonic, "vfmadd213sd")
self.assertEqual(parsed_12.operands[0], RegisterOperand(name="XMM0"))
self.assertEqual(parsed_12.operands[1], RegisterOperand(name="XMM1"))
self.assertEqual(
parsed_12.operands[2],
MemoryOperand(offset=IdentifierOperand(name="__real@bfc5555555555555")),
)
self.assertEqual(parsed_13.mnemonic, "jmp")
self.assertEqual(parsed_13.operands[0], IdentifierOperand(name="$LN18@operator"))
self.assertEqual(parsed_14.mnemonic, "vaddsd")
self.assertEqual(parsed_14.operands[0], RegisterOperand(name="XMM0"))
self.assertEqual(parsed_14.operands[1], RegisterOperand(name="XMM0"))
self.assertEqual(
parsed_14.operands[2],
MemoryOperand(
base=RegisterOperand(name="RDX"),
offset=ImmediateOperand(value=8),
index=RegisterOperand(name="RAX"),
scale=8,
),
)
def test_parse_line(self):
line_comment = "; -- Begin main"
line_instruction = "\tret\t0"
instruction_form_1 = InstructionForm(
mnemonic=None,
operands=[],
directive_id=None,
comment_id="-- Begin main",
label_id=None,
line="; -- Begin main",
line_number=1,
)
instruction_form_2 = InstructionForm(
mnemonic="ret",
operands=[
{"immediate": {"value": 0}},
],
directive_id=None,
comment_id=None,
label_id=None,
line="\tret\t0",
line_number=2,
)
parsed_1 = self.parser.parse_line(line_comment, 1)
parsed_2 = self.parser.parse_line(line_instruction, 2)
self.assertEqual(parsed_1, instruction_form_1)
self.assertEqual(parsed_2, instruction_form_2)
def test_parse_register(self):
register_str_1 = "rax"
register_str_2 = "r9"
register_str_3 = "xmm1"
register_str_4 = "ST(4)"
parsed_reg_1 = RegisterOperand(name="RAX")
parsed_reg_2 = RegisterOperand(name="R9")
parsed_reg_3 = RegisterOperand(name="XMM1")
parsed_reg_4 = RegisterOperand(name="ST(4)")
self.assertEqual(self.parser.parse_register(register_str_1), parsed_reg_1)
self.assertEqual(self.parser.parse_register(register_str_2), parsed_reg_2)
self.assertEqual(self.parser.parse_register(register_str_3), parsed_reg_3)
self.assertEqual(self.parser.parse_register(register_str_4), parsed_reg_4)
def test_parse_file1(self):
parsed = self.parser.parse_file(self.triad_code)
self.assertEqual(parsed[0].line_number, 1)
# Check specifically that the values of the symbols defined by "=" were correctly
# propagated.
self.assertEqual(
parsed[69],
InstructionForm(
mnemonic="mov",
operands=[
MemoryOperand(
base=RegisterOperand("RBP"),
offset=ImmediateOperand(value=4, identifier="r$1"),
),
ImmediateOperand(value=0),
],
line="\tmov\tDWORD PTR r$1[rbp], 0",
line_number=73,
),
)
# Check a few lines to make sure that we produced something reasonable.
self.assertEqual(
parsed[60],
InstructionForm(
mnemonic="mov",
operands=[
MemoryOperand(base=RegisterOperand("RSP"), offset=ImmediateOperand(value=8)),
RegisterOperand(name="RCX"),
],
line="\tmov\tQWORD PTR [rsp+8], rcx",
line_number=64,
),
)
self.assertEqual(
parsed[120],
InstructionForm(
directive_id=DirectiveOperand(name="END"), line="END", line_number=124
),
)
self.assertEqual(len(parsed), 121)
def test_parse_file2(self):
parsed = self.parser.parse_file(self.triad_iaca_code)
self.assertEqual(parsed[0].line_number, 1)
# Check a few lines to make sure that we produced something reasonable.
self.assertEqual(
parsed[68],
InstructionForm(
directive_id=DirectiveOperand(name="=", parameters=["s$", "88"]),
line="s$ = 88",
line_number=72,
),
)
self.assertEqual(
parsed[135],
InstructionForm(
directive_id=DirectiveOperand(name="END"), line="END", line_number=139
),
)
self.assertEqual(len(parsed), 136)
def test_parse_file3(self):
parsed = self.parser.parse_file(self.gs_icc_code)
self.assertEqual(parsed[0].line_number, 1)
# Check a few lines to make sure that we produced something reasonable.
self.assertEqual(
parsed[113],
InstructionForm(
mnemonic="vmovsd",
operands=[
RegisterOperand("XMM5"),
MemoryOperand(
base=RegisterOperand("R11"),
index=RegisterOperand("R10"),
scale=1,
offset=ImmediateOperand(value=16),
),
],
comment_id="26.19",
line=" vmovsd xmm5, QWORD PTR [16+r11+r10]" + " #26.19",
line_number=114,
),
)
self.assertEqual(
parsed[226],
InstructionForm(
directive_id=DirectiveOperand(name=".long", parameters=["681509"]),
line=" .long 681509",
line_number=227,
),
)
self.assertEqual(len(parsed), 227)
def test_parse_file4(self):
parsed = self.parser.parse_file(self.gs_gcc_code)
self.assertEqual(parsed[0].line_number, 1)
# Check a few lines to make sure that we produced something reasonable.
self.assertEqual(
parsed[61],
InstructionForm(
mnemonic="vaddsd",
operands=[
RegisterOperand("XMM0"),
RegisterOperand("XMM0"),
MemoryOperand(
base=RegisterOperand("RDX"),
index=RegisterOperand("RAX"),
scale=8,
offset=ImmediateOperand(value=8),
),
],
line=" vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]",
line_number=62,
),
)
self.assertEqual(
parsed[101],
InstructionForm(
directive_id=DirectiveOperand(name=".long", parameters=["1072939201"]),
line=" .long 1072939201",
line_number=102,
),
)
self.assertEqual(len(parsed), 102)
def test_normalize_imd(self):
imd_binary = ImmediateOperand(value="1001111B")
imd_octal = ImmediateOperand(value="117O")
imd_decimal = ImmediateOperand(value="79")
imd_hex = ImmediateOperand(value="4fH")
imd_float = ImmediateOperand(value="-79.34")
self.assertEqual(
self.parser.normalize_imd(imd_binary),
self.parser.normalize_imd(imd_octal),
)
self.assertEqual(
self.parser.normalize_imd(imd_octal),
self.parser.normalize_imd(imd_decimal),
)
self.assertEqual(
self.parser.normalize_imd(imd_decimal),
self.parser.normalize_imd(imd_hex),
)
self.assertEqual(self.parser.normalize_imd(ImmediateOperand(value="-79")), -79)
self.assertEqual(self.parser.normalize_imd(imd_float), -79.34)
##################
# Helper functions
##################
def _get_comment(self, parser, comment):
return " ".join(
parser.process_operand(parser.comment.parseString(comment, parseAll=True))["comment"]
)
def _get_label(self, parser, label):
return parser.process_operand(parser.label.parseString(label, parseAll=True))
def _get_directive(self, parser, directive):
return parser.process_operand(parser.directive.parseString(directive, parseAll=True))
@staticmethod
def _find_file(name):
testdir = os.path.dirname(__file__)
name = os.path.join(testdir, "test_files", name)
assert os.path.exists(name)
return name
if __name__ == "__main__":
suite = unittest.TestLoader().loadTestsFromTestCase(TestParserX86Intel)
unittest.TextTestRunner(verbosity=2).run(suite)

View File

@@ -10,7 +10,7 @@ from copy import deepcopy
import networkx as nx
from osaca.osaca import get_unmatched_instruction_ratio
from osaca.parser import ParserAArch64, ParserX86ATT
from osaca.parser import ParserAArch64, ParserX86ATT, ParserX86Intel
from osaca.semantics import (
INSTR_FLAGS,
ArchSemantics,
@@ -32,7 +32,8 @@ class TestSemanticTools(unittest.TestCase):
@classmethod
def setUpClass(cls):
# set up parser and kernels
cls.parser_x86 = ParserX86ATT()
cls.parser_x86_att = ParserX86ATT()
cls.parser_x86_intel = ParserX86Intel()
cls.parser_AArch64 = ParserAArch64()
with open(cls._find_file("kernel_x86.s")) as f:
cls.code_x86 = f.read()
@@ -40,6 +41,10 @@ class TestSemanticTools(unittest.TestCase):
cls.code_x86_memdep = f.read()
with open(cls._find_file("kernel_x86_long_LCD.s")) as f:
cls.code_x86_long_LCD = f.read()
with open(cls._find_file("kernel_x86_intel.s")) as f:
cls.code_x86_intel = f.read()
with open(cls._find_file("kernel_x86_intel_memdep.s")) as f:
cls.code_x86_intel_memdep = f.read()
with open(cls._find_file("kernel_aarch64_memdep.s")) as f:
cls.code_aarch64_memdep = f.read()
with open(cls._find_file("kernel_aarch64.s")) as f:
@@ -52,24 +57,32 @@ class TestSemanticTools(unittest.TestCase):
cls.mops_1_code = f.read()
cls.mops_2_code = cls.mops_1_code.replace("//ALT1 ", "")
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
cls.kernel_x86 = reduce_to_section(
cls.parser_x86_att.parse_file(cls.code_x86), cls.parser_x86_att
)
cls.kernel_x86_memdep = reduce_to_section(
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86"
cls.parser_x86_att.parse_file(cls.code_x86_memdep), cls.parser_x86_att
)
cls.kernel_x86_long_LCD = reduce_to_section(
cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86"
cls.parser_x86_att.parse_file(cls.code_x86_long_LCD), cls.parser_x86_att
)
cls.kernel_x86_intel = reduce_to_section(
cls.parser_x86_intel.parse_file(cls.code_x86_intel), cls.parser_x86_intel
)
cls.kernel_x86_intel_memdep = reduce_to_section(
cls.parser_x86_intel.parse_file(cls.code_x86_intel_memdep), cls.parser_x86_intel
)
cls.kernel_AArch64 = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64"
cls.parser_AArch64.parse_file(cls.code_AArch64), cls.parser_AArch64
)
cls.kernel_aarch64_memdep = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64"
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), cls.parser_AArch64
)
cls.kernel_aarch64_SVE = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64"
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), cls.parser_AArch64
)
cls.kernel_aarch64_deps = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64_deps), "aarch64"
cls.parser_AArch64.parse_file(cls.code_AArch64_deps), cls.parser_AArch64
)
# set up machine models
@@ -82,40 +95,64 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_a64fx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")
)
cls.semantics_x86 = ISASemantics("x86")
cls.semantics_x86 = ISASemantics(cls.parser_x86_att)
cls.semantics_csx = ArchSemantics(
cls.parser_x86_att,
cls.machine_model_csx,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
)
cls.semantics_aarch64 = ISASemantics("aarch64")
cls.semantics_x86_intel = ISASemantics(cls.parser_x86_intel)
cls.semantics_csx_intel = ArchSemantics(
cls.parser_x86_intel,
cls.machine_model_csx,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
)
cls.semantics_aarch64 = ISASemantics(cls.parser_AArch64)
cls.semantics_tx2 = ArchSemantics(
cls.parser_AArch64,
cls.machine_model_tx2,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
)
cls.semantics_a64fx = ArchSemantics(
cls.parser_AArch64,
cls.machine_model_a64fx,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
)
cls.machine_model_zen = MachineModel(arch="zen1")
cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86)
for i in range(len(cls.kernel_x86)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86_memdep)
for i in range(len(cls.kernel_x86_memdep)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i])
cls.semantics_csx.normalize_instruction_forms(cls.kernel_x86_long_LCD)
for i in range(len(cls.kernel_x86_long_LCD)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i])
cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel)
for i in range(len(cls.kernel_x86_intel)):
cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel[i])
cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel[i])
cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel_memdep)
for i in range(len(cls.kernel_x86_intel_memdep)):
cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel_memdep[i])
cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel_memdep[i])
cls.semantics_tx2.normalize_instruction_forms(cls.kernel_AArch64)
for i in range(len(cls.kernel_AArch64)):
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
cls.semantics_tx2.normalize_instruction_forms(cls.kernel_aarch64_memdep)
for i in range(len(cls.kernel_aarch64_memdep)):
cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
cls.semantics_a64fx.normalize_instruction_forms(cls.kernel_aarch64_SVE)
for i in range(len(cls.kernel_aarch64_SVE)):
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
cls.semantics_a64fx.normalize_instruction_forms(cls.kernel_aarch64_deps)
for i in range(len(cls.kernel_aarch64_deps)):
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_deps[i])
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_deps[i])
@@ -127,7 +164,7 @@ class TestSemanticTools(unittest.TestCase):
def test_creation_by_name(self):
try:
tmp_mm = MachineModel(arch="CSX")
ArchSemantics(tmp_mm)
ArchSemantics(self.parser_x86_att, tmp_mm)
except ValueError:
self.fail()
@@ -258,7 +295,7 @@ class TestSemanticTools(unittest.TestCase):
test_mm_arm.add_port("dummyPort")
# test dump of DB
with open("/dev/null", "w") as dev_null:
with open(os.devnull, "w") as dev_null:
test_mm_x86.dump(stream=dev_null)
test_mm_arm.dump(stream=dev_null)
@@ -270,6 +307,14 @@ class TestSemanticTools(unittest.TestCase):
self.assertTrue("destination" in instruction_form.semantic_operands)
self.assertTrue("src_dst" in instruction_form.semantic_operands)
def test_src_dst_assignment_x86_intel(self):
for instruction_form in self.kernel_x86_intel:
with self.subTest(instruction_form=instruction_form):
if instruction_form.semantic_operands is not None:
self.assertTrue("source" in instruction_form.semantic_operands)
self.assertTrue("destination" in instruction_form.semantic_operands)
self.assertTrue("src_dst" in instruction_form.semantic_operands)
def test_src_dst_assignment_AArch64(self):
for instruction_form in self.kernel_AArch64:
with self.subTest(instruction_form=instruction_form):
@@ -288,6 +333,16 @@ class TestSemanticTools(unittest.TestCase):
self.assertIsInstance(instruction_form.port_pressure, list)
self.assertEqual(len(instruction_form.port_pressure), port_num)
def test_tp_lt_assignment_x86_intel(self):
self.assertTrue("ports" in self.machine_model_csx)
port_num = len(self.machine_model_csx["ports"])
for instruction_form in self.kernel_x86_intel:
with self.subTest(instruction_form=instruction_form):
self.assertTrue(instruction_form.throughput is not None)
self.assertTrue(instruction_form.latency is not None)
self.assertIsInstance(instruction_form.port_pressure, list)
self.assertEqual(len(instruction_form.port_pressure), port_num)
def test_tp_lt_assignment_AArch64(self):
self.assertTrue("ports" in self.machine_model_tx2)
port_num = len(self.machine_model_tx2["ports"])
@@ -298,8 +353,7 @@ class TestSemanticTools(unittest.TestCase):
self.assertIsInstance(instruction_form.port_pressure, list)
self.assertEqual(len(instruction_form.port_pressure), port_num)
def test_optimal_throughput_assignment(self):
# x86
def test_optimal_throughput_assignment_x86(self):
kernel_fixed = deepcopy(self.kernel_x86)
self.semantics_csx.add_semantics(kernel_fixed)
self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0)
@@ -312,11 +366,13 @@ class TestSemanticTools(unittest.TestCase):
self.assertTrue(max(tp_optimal) <= max(tp_fixed))
# test multiple port assignment options
test_mm_x86 = MachineModel(path_to_yaml=self._find_file("test_db_x86.yml"))
tmp_semantics = ArchSemantics(test_mm_x86)
tmp_semantics = ArchSemantics(self.parser_x86_att, test_mm_x86)
tmp_code_1 = "fantasyinstr1 %rax, %rax\n"
tmp_code_2 = "fantasyinstr1 %rax, %rax\nfantasyinstr2 %rbx, %rbx\n"
tmp_kernel_1 = self.parser_x86.parse_file(tmp_code_1)
tmp_kernel_2 = self.parser_x86.parse_file(tmp_code_2)
tmp_kernel_1 = self.parser_x86_att.parse_file(tmp_code_1)
tmp_kernel_2 = self.parser_x86_att.parse_file(tmp_code_2)
tmp_semantics.normalize_instruction_forms(tmp_kernel_1)
tmp_semantics.normalize_instruction_forms(tmp_kernel_2)
tmp_semantics.add_semantics(tmp_kernel_1)
tmp_semantics.add_semantics(tmp_kernel_2)
tmp_semantics.assign_optimal_throughput(tmp_kernel_1)
@@ -326,7 +382,36 @@ class TestSemanticTools(unittest.TestCase):
self.assertEqual(k1i1_pp, [0.33, 0.0, 0.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.0, 0.0])
self.assertEqual(k2i1_pp, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0])
# arm
def test_optimal_throughput_assignment_x86_intel(self):
kernel_fixed = deepcopy(self.kernel_x86_intel)
self.semantics_csx_intel.add_semantics(kernel_fixed)
self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0)
kernel_optimal = deepcopy(kernel_fixed)
self.semantics_csx_intel.assign_optimal_throughput(kernel_optimal)
tp_fixed = self.semantics_csx_intel.get_throughput_sum(kernel_fixed)
tp_optimal = self.semantics_csx_intel.get_throughput_sum(kernel_optimal)
self.assertNotEqual(tp_fixed, tp_optimal)
self.assertTrue(max(tp_optimal) <= max(tp_fixed))
# test multiple port assignment options
test_mm_x86 = MachineModel(path_to_yaml=self._find_file("test_db_x86.yml"))
tmp_semantics = ArchSemantics(self.parser_x86_intel, test_mm_x86)
tmp_code_1 = "fantasyinstr1 rax, rax\n"
tmp_code_2 = "fantasyinstr1 rax, rax\nfantasyinstr2 rbx, rbx\n"
tmp_kernel_1 = self.parser_x86_intel.parse_file(tmp_code_1)
tmp_kernel_2 = self.parser_x86_intel.parse_file(tmp_code_2)
tmp_semantics.normalize_instruction_forms(tmp_kernel_1)
tmp_semantics.normalize_instruction_forms(tmp_kernel_2)
tmp_semantics.add_semantics(tmp_kernel_1)
tmp_semantics.add_semantics(tmp_kernel_2)
tmp_semantics.assign_optimal_throughput(tmp_kernel_1)
tmp_semantics.assign_optimal_throughput(tmp_kernel_2)
k1i1_pp = [round(x, 2) for x in tmp_kernel_1[0].port_pressure]
k2i1_pp = [round(x, 2) for x in tmp_kernel_2[0].port_pressure]
self.assertEqual(k1i1_pp, [0.33, 0.0, 0.33, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33, 0.0, 0.0])
self.assertEqual(k2i1_pp, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0])
def test_optimal_throughput_assignment_AArch64(self):
kernel_fixed = deepcopy(self.kernel_AArch64)
self.semantics_tx2.add_semantics(kernel_fixed)
@@ -347,7 +432,9 @@ class TestSemanticTools(unittest.TestCase):
# 3
# 5_______>9
#
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
dg = KernelDG(
self.kernel_x86, self.parser_x86_att, self.machine_model_csx, self.semantics_csx
)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=3))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=3)), 6)
@@ -362,12 +449,44 @@ class TestSemanticTools(unittest.TestCase):
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath="/dev/null")
dg.export_graph(filepath=os.devnull)
def test_kernelDG_x86_intel(self):
#
# 3
# \___>5__>6
# / /
# 4 /
# /
# 5.1
#
dg = KernelDG(
self.kernel_x86_intel,
self.parser_x86_intel,
self.machine_model_csx,
self.semantics_csx_intel,
)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=3))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=3)), 5)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=4))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4)), 5)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5)), 6)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5.1))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5.1)), 5)
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=6)), [])
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=7)), [])
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=8)), [])
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath=os.devnull)
def test_memdependency_x86(self):
dg = KernelDG(
self.kernel_x86_memdep,
self.parser_x86,
self.parser_x86_att,
self.machine_model_csx,
self.semantics_csx,
)
@@ -377,7 +496,22 @@ class TestSemanticTools(unittest.TestCase):
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath="/dev/null")
dg.export_graph(filepath=os.devnull)
def test_memdependency_x86_intel(self):
dg = KernelDG(
self.kernel_x86_intel_memdep,
self.parser_x86_intel,
self.machine_model_csx,
self.semantics_csx_intel,
)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath=os.devnull)
def test_kernelDG_AArch64(self):
dg = KernelDG(
@@ -408,7 +542,7 @@ class TestSemanticTools(unittest.TestCase):
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath="/dev/null")
dg.export_graph(filepath=os.devnull)
def test_kernelDG_SVE(self):
KernelDG(
@@ -421,11 +555,13 @@ class TestSemanticTools(unittest.TestCase):
def test_mops_deps_AArch64(self):
self.kernel_mops_1 = reduce_to_section(
self.parser_AArch64.parse_file(self.mops_1_code), "aarch64"
self.parser_AArch64.parse_file(self.mops_1_code), self.parser_AArch64
)
self.kernel_mops_2 = reduce_to_section(
self.parser_AArch64.parse_file(self.mops_2_code), "aarch64"
self.parser_AArch64.parse_file(self.mops_2_code), self.parser_AArch64
)
self.semantics_a64fx.normalize_instruction_forms(self.kernel_mops_1)
self.semantics_a64fx.normalize_instruction_forms(self.kernel_mops_2)
for i in range(len(self.kernel_mops_1)):
self.semantics_a64fx.assign_src_dst(self.kernel_mops_1[i])
for i in range(len(self.kernel_mops_2)):
@@ -472,11 +608,15 @@ class TestSemanticTools(unittest.TestCase):
path_to_yaml=self._find_file("hidden_load_machine_model.yml")
)
self.assertTrue(machine_model_hld.has_hidden_loads())
semantics_hld = ArchSemantics(machine_model_hld)
kernel_hld = self.parser_x86.parse_file(self.code_x86)
kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)
kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)[-3:]
kernel_hld_3 = self.parser_x86.parse_file(self.code_x86)[5:8]
semantics_hld = ArchSemantics(self.parser_x86_att, machine_model_hld)
kernel_hld = self.parser_x86_att.parse_file(self.code_x86)
kernel_hld_2 = self.parser_x86_att.parse_file(self.code_x86)
kernel_hld_2 = self.parser_x86_att.parse_file(self.code_x86)[-3:]
kernel_hld_3 = self.parser_x86_att.parse_file(self.code_x86)[5:8]
semantics_hld.normalize_instruction_forms(kernel_hld)
semantics_hld.normalize_instruction_forms(kernel_hld_2)
semantics_hld.normalize_instruction_forms(kernel_hld_3)
semantics_hld.add_semantics(kernel_hld)
semantics_hld.add_semantics(kernel_hld_2)
@@ -490,7 +630,9 @@ class TestSemanticTools(unittest.TestCase):
self.assertEqual(num_hidden_loads_3, 1)
def test_cyclic_dag(self):
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
dg = KernelDG(
self.kernel_x86, self.parser_x86_att, self.machine_model_csx, self.semantics_csx
)
dg.dg.add_edge(100, 101, latency=1.0)
dg.dg.add_edge(101, 102, latency=2.0)
dg.dg.add_edge(102, 100, latency=3.0)
@@ -555,7 +697,42 @@ class TestSemanticTools(unittest.TestCase):
def test_loop_carried_dependency_x86(self):
lcd_id = "8"
lcd_id2 = "5"
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
dg = KernelDG(
self.kernel_x86, self.parser_x86_att, self.machine_model_csx, self.semantics_csx
)
lc_deps = dg.get_loopcarried_dependencies()
# self.assertEqual(len(lc_deps), 2)
# ID 8
self.assertEqual(
lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"]
)
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
self.assertEqual(
lc_deps[lcd_id]["dependencies"][0][0],
dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"],
)
# w/ flag dependencies: ID 9 w/ len=2
# w/o flag dependencies: ID 5 w/ len=1
# TODO discuss
self.assertEqual(
lc_deps[lcd_id2]["root"],
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
)
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
self.assertEqual(
lc_deps[lcd_id2]["dependencies"][0][0],
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
)
def test_loop_carried_dependency_x86_intel(self):
lcd_id = "8"
lcd_id2 = "7"
dg = KernelDG(
self.kernel_x86_intel,
self.parser_x86_intel,
self.machine_model_csx,
self.semantics_csx_intel,
)
lc_deps = dg.get_loopcarried_dependencies()
# self.assertEqual(len(lc_deps), 2)
# ID 8
@@ -584,7 +761,7 @@ class TestSemanticTools(unittest.TestCase):
start_time = time.perf_counter()
KernelDG(
self.kernel_x86_long_LCD,
self.parser_x86,
self.parser_x86_att,
self.machine_model_csx,
self.semantics_x86,
timeout=10,
@@ -594,7 +771,7 @@ class TestSemanticTools(unittest.TestCase):
start_time = time.perf_counter()
KernelDG(
self.kernel_x86_long_LCD,
self.parser_x86,
self.parser_x86_att,
self.machine_model_csx,
self.semantics_x86,
timeout=2,
@@ -608,22 +785,32 @@ class TestSemanticTools(unittest.TestCase):
def test_is_read_is_written_x86(self):
# independent form HW model
dag = KernelDG(self.kernel_x86, self.parser_x86, None, None)
dag = KernelDG(self.kernel_x86, self.parser_x86_att, None, None)
reg_rcx = RegisterOperand(name="rcx")
reg_ymm1 = RegisterOperand(name="ymm1")
instr_form_r_c = self.parser_x86.parse_line("vmovsd %xmm0, (%r15,%rcx,8)")
instr_form_r_c = self.parser_x86_att.parse_line("vmovsd %xmm0, (%r15,%rcx,8)")
self.semantics_csx.normalize_instruction_form(instr_form_r_c)
self.semantics_csx.assign_src_dst(instr_form_r_c)
instr_form_non_r_c = self.parser_x86.parse_line("movl %xmm0, (%r15,%rax,8)")
instr_form_non_r_c = self.parser_x86_att.parse_line("movl %xmm0, (%r15,%rax,8)")
self.semantics_csx.normalize_instruction_form(instr_form_non_r_c)
self.semantics_csx.assign_src_dst(instr_form_non_r_c)
instr_form_w_c = self.parser_x86.parse_line("movi $0x05ACA, %rcx")
instr_form_w_c = self.parser_x86_att.parse_line("movi $0x05ACA, %rcx")
self.semantics_csx.normalize_instruction_form(instr_form_w_c)
self.semantics_csx.assign_src_dst(instr_form_w_c)
instr_form_rw_ymm_1 = self.parser_x86.parse_line("vinsertf128 $0x1, %xmm1, %ymm0, %ymm1")
instr_form_rw_ymm_1 = self.parser_x86_att.parse_line(
"vinsertf128 $0x1, %xmm1, %ymm0, %ymm1"
)
self.semantics_csx.normalize_instruction_form(instr_form_rw_ymm_1)
self.semantics_csx.assign_src_dst(instr_form_rw_ymm_1)
instr_form_rw_ymm_2 = self.parser_x86.parse_line("vinsertf128 $0x1, %xmm0, %ymm1, %ymm1")
instr_form_rw_ymm_2 = self.parser_x86_att.parse_line(
"vinsertf128 $0x1, %xmm0, %ymm1, %ymm1"
)
self.semantics_csx.normalize_instruction_form(instr_form_rw_ymm_2)
self.semantics_csx.assign_src_dst(instr_form_rw_ymm_2)
instr_form_r_ymm = self.parser_x86.parse_line("vmovapd %ymm1, %ymm0")
instr_form_r_ymm = self.parser_x86_att.parse_line("vmovapd %ymm1, %ymm0")
self.semantics_csx.normalize_instruction_form(instr_form_r_ymm)
self.semantics_csx.assign_src_dst(instr_form_r_ymm)
self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c))
self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c))
@@ -637,6 +824,43 @@ class TestSemanticTools(unittest.TestCase):
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
def test_is_read_is_written_x86_intel(self):
# independent form HW model
dag = KernelDG(self.kernel_x86_intel, self.parser_x86_intel, None, None)
reg_rcx = RegisterOperand(name="rcx")
reg_ymm1 = RegisterOperand(name="ymm1")
instr_form_r_c = self.parser_x86_intel.parse_line("vmovsd QWORD PTR [r15+rcx*8], xmm0")
self.semantics_csx_intel.normalize_instruction_form(instr_form_r_c)
self.semantics_csx_intel.assign_src_dst(instr_form_r_c)
instr_form_non_r_c = self.parser_x86_intel.parse_line("mov QWORD PTR [r15+rax*8], xmm0")
self.semantics_csx_intel.normalize_instruction_form(instr_form_non_r_c)
self.semantics_csx_intel.assign_src_dst(instr_form_non_r_c)
instr_form_w_c = self.parser_x86_intel.parse_line("mov rcx, H05ACA")
self.semantics_csx_intel.normalize_instruction_form(instr_form_w_c)
self.semantics_csx_intel.assign_src_dst(instr_form_w_c)
instr_form_rw_ymm_1 = self.parser_x86_intel.parse_line("vinsertf128 ymm1, ymm0, xmm1, 1")
self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_ymm_1)
self.semantics_csx_intel.assign_src_dst(instr_form_rw_ymm_1)
instr_form_rw_ymm_2 = self.parser_x86_intel.parse_line("vinsertf128 ymm1, ymm1, xmm0, 1")
self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_ymm_2)
self.semantics_csx_intel.assign_src_dst(instr_form_rw_ymm_2)
instr_form_r_ymm = self.parser_x86_intel.parse_line("vmovapd ymm0, ymm1")
self.semantics_csx_intel.normalize_instruction_form(instr_form_r_ymm)
self.semantics_csx_intel.assign_src_dst(instr_form_r_ymm)
self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c))
self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c))
self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c))
self.assertTrue(dag.is_written(reg_rcx, instr_form_w_c))
self.assertFalse(dag.is_written(reg_rcx, instr_form_r_c))
self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_1))
self.assertTrue(dag.is_read(reg_ymm1, instr_form_rw_ymm_2))
self.assertTrue(dag.is_read(reg_ymm1, instr_form_r_ymm))
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
def test_is_read_is_written_AArch64(self):
# independent form HW model
dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None, None)
@@ -649,20 +873,28 @@ class TestSemanticTools(unittest.TestCase):
regs_gp = [reg_w1, reg_x1]
instr_form_r_1 = self.parser_AArch64.parse_line("stp q1, q3, [x12, #192]")
self.semantics_tx2.normalize_instruction_form(instr_form_r_1)
self.semantics_tx2.assign_src_dst(instr_form_r_1)
instr_form_r_2 = self.parser_AArch64.parse_line("fadd v2.2d, v1.2d, v0.2d")
self.semantics_tx2.normalize_instruction_form(instr_form_r_2)
self.semantics_tx2.assign_src_dst(instr_form_r_2)
instr_form_w_1 = self.parser_AArch64.parse_line("ldr d1, [x1, #:got_lo12:q2c]")
self.semantics_tx2.normalize_instruction_form(instr_form_w_1)
self.semantics_tx2.assign_src_dst(instr_form_w_1)
instr_form_non_w_1 = self.parser_AArch64.parse_line("ldr x1, [x1, #:got_lo12:q2c]")
self.semantics_tx2.normalize_instruction_form(instr_form_non_w_1)
self.semantics_tx2.assign_src_dst(instr_form_non_w_1)
instr_form_rw_1 = self.parser_AArch64.parse_line("fmul v1.2d, v1.2d, v0.2d")
self.semantics_tx2.normalize_instruction_form(instr_form_rw_1)
self.semantics_tx2.assign_src_dst(instr_form_rw_1)
instr_form_rw_2 = self.parser_AArch64.parse_line("ldp q2, q4, [x1, #64]!")
self.semantics_tx2.normalize_instruction_form(instr_form_rw_2)
self.semantics_tx2.assign_src_dst(instr_form_rw_2)
instr_form_rw_3 = self.parser_AArch64.parse_line("str x4, [x1], #64")
self.semantics_tx2.normalize_instruction_form(instr_form_rw_3)
self.semantics_tx2.assign_src_dst(instr_form_rw_3)
instr_form_non_rw_1 = self.parser_AArch64.parse_line("adds x1, x11")
self.semantics_tx2.normalize_instruction_form(instr_form_non_rw_1)
self.semantics_tx2.assign_src_dst(instr_form_non_rw_1)
for reg in regs:

View File

@@ -7,6 +7,12 @@
#endif
#endif
#define USE_IACA 0
#if USE_IACA
#include "intel\iacaMarks.h"
#endif
#define DTYPE double
void dummy(void *);
@@ -15,9 +21,15 @@ void kernel(DTYPE* a, DTYPE* b, DTYPE* c, DTYPE* d, const int repeat, const int
#ifndef MAIN
{
for(int r=0; r < repeat; r++) {
#if USE_IACA
IACA_VC64_START
#endif
for(int i=0; i<cur_elements; i++) {
a[i] = b[i] + c[i] * d[i];
}
#if USE_IACA
IACA_VC64_END
#endif
dummy((void*)a);
}
}

View File

@@ -7,6 +7,12 @@
#endif
#endif
#define USE_IACA 0
#if USE_IACA
#include "intel\iacaMarks.h"
#endif
#define DTYPE double
void dummy(void *);
@@ -16,7 +22,13 @@ void kernel(DTYPE* a, DTYPE* b, DTYPE* c, const DTYPE s, const int repeat, const
{
for(int r=0; r < repeat; r++) {
for(int i=0; i<cur_elements; i++) {
#if USE_IACA
IACA_VC64_START
#endif
a[i] = b[i] + s * c[i];
#if USE_IACA
IACA_VC64_END
#endif
}
dummy((void*)a);
}