mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bce837dec9 | ||
|
|
090c24ade1 | ||
|
|
03a2a1da33 | ||
|
|
d59b100fa8 | ||
|
|
5c741a8a2d | ||
|
|
2f4849f44e | ||
|
|
f13a97e5b5 | ||
|
|
66282b0eef | ||
|
|
9ec7c161ab | ||
|
|
8d8eaa8e4f | ||
|
|
88d5094bf1 | ||
|
|
1f32252f91 | ||
|
|
1de644cd62 | ||
|
|
3d1c6aae8d | ||
|
|
dafec70e6e | ||
|
|
6d85fbe9e4 | ||
|
|
3f31235f8a | ||
|
|
cfc061e5e3 | ||
|
|
5eb3e07ad6 | ||
|
|
a82a0e24a3 | ||
|
|
6db08c7e8e | ||
|
|
e6a54ee131 | ||
|
|
152360bad2 | ||
|
|
607d459569 | ||
|
|
b033b3b7aa |
4
.github/workflows/test-n-publish.yml
vendored
4
.github/workflows/test-n-publish.yml
vendored
@@ -31,11 +31,11 @@ jobs:
|
||||
- uses: codecov/codecov-action@v1
|
||||
- name: Build package
|
||||
run: |
|
||||
python setup.py build sdist
|
||||
python setup.py build sdist bdist_wheel
|
||||
- name: Publish to PyPI
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
|
||||
uses: pypa/gh-action-pypi-publish@master
|
||||
with:
|
||||
skip_existing: true
|
||||
user: __token__
|
||||
password: ${{ secrets.pypi_password }}
|
||||
password: ${{ secrets.pypi_password }}
|
||||
|
||||
20
README.rst
20
README.rst
@@ -82,10 +82,10 @@ The usage of OSACA can be listed as:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
osaca [-h] [-V] [--arch ARCH] [--fixed] [--lines LINES] [--db-check]
|
||||
[--import MICROBENCH] [--insert-marker]
|
||||
[--export-graph GRAPHNAME] [--ignore-unknown] [--verbose]
|
||||
[--out OUT]
|
||||
osaca [-h] [-V] [--arch ARCH] [--fixed] [--lines LINES]
|
||||
[--ignore-unknown] [--lcd-timeout SECONDS]
|
||||
[--db-check] [--import MICROBENCH] [--insert-marker]
|
||||
[--export-graph GRAPHNAME] [--out OUT] [--verbose]
|
||||
FILEPATH
|
||||
|
||||
-h, --help
|
||||
@@ -118,6 +118,9 @@ The usage of OSACA can be listed as:
|
||||
--ignore-unknown
|
||||
Force OSACA to apply a throughput and latency of 0.0 cy for all unknown instruction forms.
|
||||
If not specified, a warning will be printed instead if one ore more isntruction form is unknown to OSACA.
|
||||
--lcd-timeout SECONDS
|
||||
Set timeout in seconds for LCD analysis. After timeout, OSACA will continue its analysis with the dependency paths found up to this point.
|
||||
Defaults to `10`.
|
||||
-v, --verbose
|
||||
Increases verbosity level
|
||||
-o OUT, --out OUT
|
||||
@@ -370,9 +373,16 @@ In the bottom, all loop-carried dependencies are shown, each with a list of line
|
||||
|
||||
You can find more (already marked) examples and sample outputs for various architectures in the `examples <examples/>`__ directory.
|
||||
|
||||
Citations
|
||||
=========
|
||||
If you use OSACA for scientific work you can cite us as (for the Bibtex, see the `Wiki <https://github.com/RRZE-HPC/OSACA/wiki#acknowledgement>`_):
|
||||
|
||||
* `Automated Instruction Stream Throughput Prediction for Intel and AMD Microarchitectures <https://doi.org/10.1109/PMBS.2018.8641578>`_ (`Pre-print PMBS18 <https://arxiv.org/abs/1809.00912>`_)
|
||||
* `Automatic Throughput and Critical Path Analysis of x86 and ARM Assembly Kernels <https://doi.org/10.1109/PMBS49563.2019.00006>`_ (`Pre-print PMBS19 <https://arxiv.org/abs/1910.00214>`_)
|
||||
|
||||
Credits
|
||||
=======
|
||||
Implementation: Jan Laukemann
|
||||
Implementation: Jan Laukemann, Julian Hammer
|
||||
|
||||
License
|
||||
=======
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Open Source Architecture Code Analyzer"""
|
||||
name = "osaca"
|
||||
__version__ = "0.4.0"
|
||||
__version__ = "0.4.4"
|
||||
|
||||
# To trigger travis deployment to pypi, do the following:
|
||||
# 1. Increment __version___
|
||||
|
||||
@@ -1132,6 +1132,27 @@ instruction_forms:
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld2d
|
||||
operands:
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[2, '56'], [4, ['5D', '6D']]]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1414,6 +1435,22 @@ instruction_forms:
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: ld2
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 11.0 # 1*p56+2*p5D6D
|
||||
port_pressure: [[1, '56'], [2, ['5D','6D']]]
|
||||
- name: lsl
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1980,6 +2017,43 @@ instruction_forms:
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p5+1*p6+1*p0
|
||||
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
|
||||
- name: st2d
|
||||
operands:
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p5+1*p6+1*p0
|
||||
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
|
||||
- name: st2
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: register
|
||||
prefix: v
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 11.0 # 1*p56+2*p5D6D
|
||||
port_pressure: [[1, '5'], [1, ['6']], [1, '0']]
|
||||
- name: sub
|
||||
operands:
|
||||
- class: register
|
||||
|
||||
@@ -163,6 +163,7 @@ class Frontend(object):
|
||||
ignore_unknown=False,
|
||||
arch_warning=False,
|
||||
length_warning=False,
|
||||
lcd_warning=False,
|
||||
verbose=False,
|
||||
):
|
||||
"""
|
||||
@@ -176,17 +177,19 @@ class Frontend(object):
|
||||
:param ignore_unknown: flag for ignore warning if performance data is missing, defaults to
|
||||
`False`
|
||||
:type ignore_unknown: boolean, optional
|
||||
:param print_arch_warning: flag for additional user warning to specify micro-arch
|
||||
:type print_arch_warning: boolean, optional
|
||||
:param print_length_warning: flag for additional user warning to specify kernel length with
|
||||
:param arch_warning: flag for additional user warning to specify micro-arch
|
||||
:type arch_warning: boolean, optional
|
||||
:param length_warning: flag for additional user warning to specify kernel length with
|
||||
--lines
|
||||
:type print_length_warning: boolean, optional
|
||||
:type length_warning: boolean, optional
|
||||
:param lcd_warning: flag for additional user warning due to LCD analysis timed out
|
||||
:type lcd_warning: boolean, optional
|
||||
:param verbose: flag for verbosity level, defaults to False
|
||||
:type verbose: boolean, optional
|
||||
"""
|
||||
return (
|
||||
self._header_report()
|
||||
+ self._user_warnings(arch_warning, length_warning)
|
||||
+ self._user_warnings_header(arch_warning, length_warning)
|
||||
+ self._symbol_map()
|
||||
+ self.combined_view(
|
||||
kernel,
|
||||
@@ -194,6 +197,7 @@ class Frontend(object):
|
||||
kernel_dg.get_loopcarried_dependencies(),
|
||||
ignore_unknown,
|
||||
)
|
||||
+ self._user_warnings_footer(lcd_warning)
|
||||
+ self.loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())
|
||||
)
|
||||
|
||||
@@ -236,8 +240,9 @@ class Frontend(object):
|
||||
if dep_dict:
|
||||
longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
|
||||
lcd_sum = dep_dict[longest_lcd]['latency']
|
||||
lcd_lines = {instr["line_number"]: lat
|
||||
for instr, lat in dep_dict[longest_lcd]["dependencies"]}
|
||||
lcd_lines = {
|
||||
instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
|
||||
}
|
||||
|
||||
s += headline_str.format(headline) + "\n"
|
||||
s += (
|
||||
@@ -311,18 +316,24 @@ class Frontend(object):
|
||||
).format(amount, "-" * len(str(amount)))
|
||||
return s
|
||||
|
||||
def _user_warnings(self, arch_warning, length_warning):
|
||||
def _user_warnings_header(self, arch_warning, length_warning):
|
||||
"""Returns warning texts for giving the user more insight in what he is doing."""
|
||||
dashed_line = (
|
||||
"-------------------------------------------------------------------------"
|
||||
"------------------------\n"
|
||||
)
|
||||
arch_text = (
|
||||
"WARNING: No micro-architecture was specified and a default uarch was used.\n"
|
||||
" Specify the uarch with --arch. See --help for more information.\n"
|
||||
"-------------------------- WARNING: No micro-architecture was specified "
|
||||
"-------------------------\n"
|
||||
" A default uarch for this particular ISA was used. Specify "
|
||||
"the uarch with --arch.\n See --help for more information.\n" + dashed_line
|
||||
)
|
||||
length_text = (
|
||||
"WARNING: You are analyzing a large amount of instruction forms. Analysis "
|
||||
"across loops/block boundaries often do not make much sense.\n"
|
||||
" Specify the kernel length with --length. See --help for more "
|
||||
"information.\n"
|
||||
" If this is intentional, you can safely ignore this message.\n"
|
||||
"----------------- WARNING: You are analyzing a large amount of instruction forms "
|
||||
"----------------\n Analysis across loops/block boundaries often do not make"
|
||||
" much sense.\n Specify the kernel length with --length. See --help for more "
|
||||
"information.\n If this is intentional, you can safely ignore this message.\n"
|
||||
+ dashed_line
|
||||
)
|
||||
|
||||
warnings = ""
|
||||
@@ -331,6 +342,24 @@ class Frontend(object):
|
||||
warnings += "\n"
|
||||
return warnings
|
||||
|
||||
def _user_warnings_footer(self, lcd_warning):
|
||||
"""Returns warning texts for giving the user more insight in what he is doing."""
|
||||
dashed_line = (
|
||||
"-------------------------------------------------------------------------"
|
||||
"------------------------\n"
|
||||
)
|
||||
lcd_text = (
|
||||
"-------------------------------- WARNING: LCD analysis timed out "
|
||||
"-------------------------------\n While searching for all dependency chains"
|
||||
" the analysis timed out and might be\n incomplete. Decrease the number of "
|
||||
"instructions or set the timeout threshold\n with --lcd-timeout. See --help"
|
||||
" for more information.\n" + dashed_line
|
||||
)
|
||||
warnings = "\n"
|
||||
warnings += lcd_text if lcd_warning else ""
|
||||
warnings += "\n"
|
||||
return warnings
|
||||
|
||||
def _get_separator_list(self, separator, separator_2=" "):
|
||||
"""Creates column view for seperators in the TP/combined view."""
|
||||
separator_list = []
|
||||
|
||||
@@ -146,6 +146,16 @@ def create_parser(parser=None):
|
||||
action="store_true",
|
||||
help="Ignore if instructions cannot be found in the data file and print analysis anyway.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lcd-timeout",
|
||||
dest="lcd_timeout",
|
||||
metavar="SECONDS",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Set timeout in seconds for LCD analysis. After timeout, OSACA will continue"
|
||||
" its analysis with the dependency paths found up to this point. Defaults to 10."
|
||||
" Set to -1 for no timeout.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="count", default=0, help="Increases verbosity level."
|
||||
)
|
||||
@@ -172,6 +182,9 @@ def check_arguments(args, parser):
|
||||
"""
|
||||
supported_import_files = ["ibench", "asmbench"]
|
||||
|
||||
# manually set CLX to CSX to support both abbreviations
|
||||
if args.arch and args.arch.upper() == "CLX":
|
||||
args.arch = "CSX"
|
||||
if args.arch is None and (args.check_db or "import_data" in args):
|
||||
parser.error(
|
||||
"DB check and data import cannot work with a default microarchitecture. "
|
||||
@@ -303,7 +316,7 @@ def inspect(args, output_file=sys.stdout):
|
||||
semantics.assign_optimal_throughput(kernel)
|
||||
|
||||
# Create DiGrahps
|
||||
kernel_graph = KernelDG(kernel, parser, machine_model, semantics)
|
||||
kernel_graph = KernelDG(kernel, parser, machine_model, semantics, args.lcd_timeout)
|
||||
if args.dotpath is not None:
|
||||
kernel_graph.export_graph(args.dotpath if args.dotpath != "." else None)
|
||||
# Print analysis
|
||||
@@ -315,6 +328,7 @@ def inspect(args, output_file=sys.stdout):
|
||||
ignore_unknown=ignore_unknown,
|
||||
arch_warning=print_arch_warning,
|
||||
length_warning=print_length_warning,
|
||||
lcd_warning=kernel_graph.timed_out,
|
||||
verbose=verbose,
|
||||
),
|
||||
file=output_file,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
from copy import deepcopy
|
||||
import pyparsing as pp
|
||||
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
@@ -257,7 +256,9 @@ class ParserAArch64(BaseParser):
|
||||
# 2. Parse label
|
||||
if result is None:
|
||||
try:
|
||||
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
||||
result = self.process_operand(
|
||||
self.label.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
|
||||
if self.COMMENT_ID in result[self.LABEL_ID]:
|
||||
@@ -292,7 +293,6 @@ class ParserAArch64(BaseParser):
|
||||
try:
|
||||
result = self.parse_instruction(line)
|
||||
except (pp.ParseException, KeyError) as e:
|
||||
raise e
|
||||
raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
|
||||
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
||||
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
||||
@@ -313,19 +313,24 @@ class ParserAArch64(BaseParser):
|
||||
# Add operands to list
|
||||
# Check first operand
|
||||
if "operand1" in result:
|
||||
operands.append(self.process_operand(result["operand1"]))
|
||||
operand = self.process_operand(result["operand1"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check second operand
|
||||
if "operand2" in result:
|
||||
operands.append(self.process_operand(result["operand2"]))
|
||||
operand = self.process_operand(result["operand2"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check third operand
|
||||
if "operand3" in result:
|
||||
operands.append(self.process_operand(result["operand3"]))
|
||||
operand = self.process_operand(result["operand3"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check fourth operand
|
||||
if "operand4" in result:
|
||||
operands.append(self.process_operand(result["operand4"]))
|
||||
operand = self.process_operand(result["operand4"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
# Check fifth operand
|
||||
if "operand5" in result:
|
||||
operands.append(self.process_operand(result["operand5"]))
|
||||
operand = self.process_operand(result["operand5"])
|
||||
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
|
||||
|
||||
return_dict = AttrDict(
|
||||
{
|
||||
@@ -347,8 +352,8 @@ class ParserAArch64(BaseParser):
|
||||
if self.REGISTER_ID in operand and (
|
||||
"list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID]
|
||||
):
|
||||
# TODO: discuss if ranges should be converted to lists
|
||||
return self.process_register_list(operand[self.REGISTER_ID])
|
||||
# resolve ranges and lists
|
||||
return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID]))
|
||||
if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"] == "sp":
|
||||
return self.process_sp_register(operand[self.REGISTER_ID])
|
||||
# add value attribute to floating point immediates without exponent
|
||||
@@ -366,6 +371,8 @@ class ParserAArch64(BaseParser):
|
||||
offset = memory_address.get("offset", None)
|
||||
if isinstance(offset, list) and len(offset) == 1:
|
||||
offset = offset[0]
|
||||
if offset is not None and "value" in offset:
|
||||
offset["value"] = int(offset["value"], 0)
|
||||
base = memory_address.get("base", None)
|
||||
index = memory_address.get("index", None)
|
||||
scale = 1
|
||||
@@ -382,7 +389,12 @@ class ParserAArch64(BaseParser):
|
||||
if "pre_indexed" in memory_address:
|
||||
new_dict["pre_indexed"] = True
|
||||
if "post_indexed" in memory_address:
|
||||
new_dict["post_indexed"] = memory_address["post_indexed"]
|
||||
if "value" in memory_address["post_indexed"]:
|
||||
new_dict["post_indexed"] = {"value": int(
|
||||
memory_address["post_indexed"]["value"], 0
|
||||
)}
|
||||
else:
|
||||
new_dict["post_indexed"] = memory_address["post_indexed"]
|
||||
return AttrDict({self.MEMORY_ID: new_dict})
|
||||
|
||||
def process_sp_register(self, register):
|
||||
@@ -391,6 +403,37 @@ class ParserAArch64(BaseParser):
|
||||
reg["prefix"] = "x"
|
||||
return AttrDict({self.REGISTER_ID: reg})
|
||||
|
||||
def resolve_range_list(self, operand):
|
||||
"""
|
||||
Resolve range or list register operand to list of registers.
|
||||
Returns None if neither list nor range
|
||||
"""
|
||||
if 'register' in operand:
|
||||
if 'list' in operand.register:
|
||||
index = operand.register.get('index')
|
||||
range_list = []
|
||||
for reg in operand.register.list:
|
||||
reg = deepcopy(reg)
|
||||
if index is not None:
|
||||
reg['index'] = int(index, 0)
|
||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return range_list
|
||||
elif 'range' in operand.register:
|
||||
base_register = operand.register.range[0]
|
||||
index = operand.register.get('index')
|
||||
range_list = []
|
||||
start_name = base_register.name
|
||||
end_name = operand.register.range[1].name
|
||||
for name in range(int(start_name), int(end_name) + 1):
|
||||
reg = deepcopy(base_register)
|
||||
if index is not None:
|
||||
reg['index'] = int(index, 0)
|
||||
reg['name'] = str(name)
|
||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return range_list
|
||||
# neither register list nor range, return unmodified
|
||||
return operand
|
||||
|
||||
def process_register_list(self, register_list):
|
||||
"""Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})"""
|
||||
# Remove unnecessarily created dictionary entries during parsing
|
||||
@@ -419,11 +462,13 @@ class ParserAArch64(BaseParser):
|
||||
if "value" in immediate:
|
||||
# normal integer value
|
||||
immediate["type"] = "int"
|
||||
# convert hex/bin immediates to dec
|
||||
immediate["value"] = self.normalize_imd(immediate)
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
if "base_immediate" in immediate:
|
||||
# arithmetic immediate, add calculated value as value
|
||||
immediate["shift"] = immediate["shift"][0]
|
||||
immediate["value"] = int(immediate["base_immediate"]["value"], 0) << int(
|
||||
immediate["value"] = self.normalize_imd(immediate["base_immediate"]) << int(
|
||||
immediate["shift"]["value"]
|
||||
)
|
||||
immediate["type"] = "int"
|
||||
@@ -471,10 +516,11 @@ class ParserAArch64(BaseParser):
|
||||
def normalize_imd(self, imd):
|
||||
"""Normalize immediate to decimal based representation"""
|
||||
if "value" in imd:
|
||||
if imd["value"].lower().startswith("0x"):
|
||||
# hex, return decimal
|
||||
return int(imd["value"], 16)
|
||||
return int(imd["value"], 10)
|
||||
if isinstance(imd["value"], str):
|
||||
# hex or bin, return decimal
|
||||
return int(imd["value"], 0)
|
||||
else:
|
||||
return imd["value"]
|
||||
elif "float" in imd:
|
||||
return self.ieee_to_float(imd["float"])
|
||||
elif "double" in imd:
|
||||
|
||||
@@ -108,7 +108,8 @@ class ParserX86ATT(BaseParser):
|
||||
)
|
||||
)
|
||||
memory_segmentation = (
|
||||
self.register.setResultsName("base")
|
||||
pp.Optional(pp.Suppress(pp.Literal("*")))
|
||||
+ self.register.setResultsName("base")
|
||||
+ pp.Literal(":")
|
||||
+ segment_extension.setResultsName(self.SEGMENT_EXT_ID)
|
||||
)
|
||||
@@ -326,9 +327,14 @@ class ParserX86ATT(BaseParser):
|
||||
offset = memory_address.get("offset", None)
|
||||
base = memory_address.get("base", None)
|
||||
index = memory_address.get("index", None)
|
||||
scale = 1 if "scale" not in memory_address else int(memory_address["scale"])
|
||||
scale = 1 if "scale" not in memory_address else int(memory_address["scale"], 0)
|
||||
if isinstance(offset, str) and base is None and index is None:
|
||||
offset = {"value": offset}
|
||||
try:
|
||||
offset = {"value": int(offset, 0)}
|
||||
except ValueError:
|
||||
offset = {"value": offset}
|
||||
elif offset is not None and "value" in offset:
|
||||
offset["value"] = int(offset["value"], 0)
|
||||
new_dict = AttrDict({"offset": offset, "base": base, "index": index, "scale": scale})
|
||||
# Add segmentation extension if existing
|
||||
if self.SEGMENT_EXT_ID in memory_address:
|
||||
@@ -346,7 +352,8 @@ class ParserX86ATT(BaseParser):
|
||||
if "identifier" in immediate:
|
||||
# actually an identifier, change declaration
|
||||
return immediate
|
||||
# otherwise nothing to do
|
||||
# otherwise just make sure the immediate is a decimal
|
||||
immediate["value"] = int(immediate["value"], 0)
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
|
||||
def get_full_reg_name(self, register):
|
||||
@@ -357,10 +364,11 @@ class ParserX86ATT(BaseParser):
|
||||
def normalize_imd(self, imd):
|
||||
"""Normalize immediate to decimal based representation"""
|
||||
if "value" in imd:
|
||||
if imd["value"].lower().startswith("0x"):
|
||||
# hex, return decimal
|
||||
return int(imd["value"], 16)
|
||||
return int(imd["value"], 10)
|
||||
if isinstance(imd["value"], str):
|
||||
# return decimal
|
||||
return int(imd["value"], 0)
|
||||
else:
|
||||
return imd["value"]
|
||||
# identifier
|
||||
return imd
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
from itertools import chain
|
||||
from copy import deepcopy
|
||||
|
||||
from osaca import utils
|
||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||
@@ -122,6 +123,7 @@ class ISASemantics(object):
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed})
|
||||
)
|
||||
|
||||
# store operand list in dict and reassign operand key/value pair
|
||||
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
|
||||
# assign LD/ST flags
|
||||
@@ -130,6 +132,7 @@ class ISASemantics(object):
|
||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
|
||||
if self._has_store(instruction_form):
|
||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
|
||||
|
||||
|
||||
def get_reg_changes(self, instruction_form, only_postindexed=False):
|
||||
"""
|
||||
@@ -160,16 +163,16 @@ class ISASemantics(object):
|
||||
if only_postindexed:
|
||||
for o in instruction_form.operands:
|
||||
if 'post_indexed' in o.get('memory', {}):
|
||||
base_name = o.memory.base.get('prefix', '')+o.memory.base.name
|
||||
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
|
||||
return {base_name: {
|
||||
'name': o.memory.base.get('prefix', '')+o.memory.base.name,
|
||||
'value': int(o.memory.post_indexed.value)
|
||||
'name': o.memory.base.get('prefix', '') + o.memory.base.name,
|
||||
'value': o.memory.post_indexed.value
|
||||
}}
|
||||
return {}
|
||||
|
||||
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
||||
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
||||
|
||||
|
||||
for o in instruction_form.operands:
|
||||
if 'pre_indexed' in o.get('memory', {}):
|
||||
# Assuming no isa_data.operation
|
||||
@@ -177,24 +180,24 @@ class ISASemantics(object):
|
||||
raise ValueError(
|
||||
"ISA information for pre-indexed instruction {!r} has operation set."
|
||||
"This is currently not supprted.".format(instruction_form.line))
|
||||
base_name = o.memory.base.get('prefix', '')+o.memory.base.name
|
||||
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
|
||||
reg_operand_names = {base_name: 'op1'}
|
||||
operand_state = {'op1': {
|
||||
'name': base_name,
|
||||
'value': int(o.memory.offset.value)
|
||||
'value': o.memory.offset.value
|
||||
}}
|
||||
|
||||
if isa_data is not None and 'operation' in isa_data:
|
||||
for i, o in enumerate(instruction_form.operands):
|
||||
operand_name = "op{}".format(i+1)
|
||||
operand_name = "op{}".format(i + 1)
|
||||
if "register" in o:
|
||||
o_reg_name = o["register"].get('prefix', '')+o["register"]["name"]
|
||||
o_reg_name = o["register"].get('prefix', '') + o["register"]["name"]
|
||||
reg_operand_names[o_reg_name] = operand_name
|
||||
operand_state[operand_name] = {
|
||||
'name': o_reg_name,
|
||||
'value': 0}
|
||||
elif "immediate" in o:
|
||||
operand_state[operand_name] = {'value': int(o["immediate"]["value"])}
|
||||
operand_state[operand_name] = {'value': o["immediate"]["value"]}
|
||||
elif "memory" in o:
|
||||
# TODO lea needs some thinking about
|
||||
pass
|
||||
@@ -209,7 +212,7 @@ class ISASemantics(object):
|
||||
"""
|
||||
Create operand dictionary containing src/dst operands out of the ISA data entry and
|
||||
the oeprands of an instruction form
|
||||
|
||||
|
||||
If breaks_pedendency_on_equal_operands is True (configuted per instruction in ISA db)
|
||||
and all operands are equal, place operand into destination only.
|
||||
|
||||
|
||||
@@ -1,22 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import copy
|
||||
from itertools import chain, product
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from itertools import chain
|
||||
from multiprocessing import Manager, Process, cpu_count
|
||||
|
||||
import networkx as nx
|
||||
from osaca.semantics import INSTR_FLAGS, ArchSemantics, MachineModel
|
||||
|
||||
from osaca.parser import AttrDict
|
||||
from osaca.semantics import INSTR_FLAGS, MachineModel, ArchSemantics
|
||||
|
||||
class KernelDG(nx.DiGraph):
|
||||
def __init__(self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics):
|
||||
# threshold for checking dependency graph sequential or in parallel
|
||||
INSTRUCTION_THRESHOLD = 50
|
||||
|
||||
def __init__(
|
||||
self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics, timeout=10
|
||||
):
|
||||
self.timed_out = False
|
||||
self.kernel = parsed_kernel
|
||||
self.parser = parser
|
||||
self.model = hw_model
|
||||
self.arch_sem = semantics
|
||||
self.dg = self.create_DG(self.kernel)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel, timeout)
|
||||
|
||||
def _extend_path(self, dst_list, kernel, dg, offset):
|
||||
for instr in kernel:
|
||||
generator_path = nx.algorithms.simple_paths.all_simple_paths(
|
||||
dg, instr.line_number, instr.line_number + offset
|
||||
)
|
||||
tmp_list = list(generator_path)
|
||||
dst_list.extend(tmp_list)
|
||||
# print('Thread [{}-{}] done'.format(kernel[0]['line_number'], kernel[-1]['line_number']))
|
||||
|
||||
def create_DG(self, kernel):
|
||||
"""
|
||||
@@ -65,17 +82,19 @@ class KernelDG(nx.DiGraph):
|
||||
dg.nodes[dep["line_number"]]["instruction_form"] = dep
|
||||
return dg
|
||||
|
||||
def check_for_loopcarried_dep(self, kernel):
|
||||
def check_for_loopcarried_dep(self, kernel, timeout=10):
|
||||
"""
|
||||
Try to find loop-carried dependencies in given kernel.
|
||||
|
||||
:param kernel: Parsed asm kernel with assigned semantic information
|
||||
:type kernel: list
|
||||
:param timeout: Timeout in seconds for parallel execution, defaults
|
||||
to `10`. Set to `0` for no timeout
|
||||
:type timeout: int
|
||||
:returns: `dict` -- dependency dictionary with all cyclic LCDs
|
||||
"""
|
||||
# increase line number for second kernel loop
|
||||
offset = max(1000, max([i.line_number for i in kernel]))
|
||||
first_line_no = kernel[0].line_number
|
||||
tmp_kernel = [] + kernel
|
||||
for orig_iform in kernel:
|
||||
temp_iform = copy.copy(orig_iform)
|
||||
@@ -86,13 +105,59 @@ class KernelDG(nx.DiGraph):
|
||||
|
||||
# build cyclic loop-carried dependencies
|
||||
loopcarried_deps = []
|
||||
paths = []
|
||||
for instr in kernel:
|
||||
paths += list(nx.algorithms.simple_paths.all_simple_paths(
|
||||
dg, instr.line_number, instr.line_number + offset))
|
||||
all_paths = []
|
||||
|
||||
klen = len(kernel)
|
||||
if klen >= self.INSTRUCTION_THRESHOLD:
|
||||
# parallel execution with static scheduling
|
||||
num_cores = cpu_count()
|
||||
workload = int((klen - 1) / num_cores) + 1
|
||||
starts = [tid * workload for tid in range(num_cores)]
|
||||
ends = [min((tid + 1) * workload, klen) for tid in range(num_cores)]
|
||||
instrs = [kernel[s:e] for s, e in zip(starts, ends)]
|
||||
with Manager() as manager:
|
||||
all_paths = manager.list()
|
||||
processes = [
|
||||
Process(target=self._extend_path, args=(all_paths, instr_section, dg, offset))
|
||||
for instr_section in instrs
|
||||
]
|
||||
for p in processes:
|
||||
p.start()
|
||||
if (timeout == -1):
|
||||
# no timeout
|
||||
for p in processes:
|
||||
p.join()
|
||||
else:
|
||||
start_time = time.time()
|
||||
while time.time() - start_time <= timeout:
|
||||
if any(p.is_alive() for p in processes):
|
||||
time.sleep(0.2)
|
||||
else:
|
||||
# all procs done
|
||||
for p in processes:
|
||||
p.join()
|
||||
break
|
||||
else:
|
||||
self.timed_out = True
|
||||
# terminate running processes
|
||||
for p in processes:
|
||||
if p.is_alive():
|
||||
# Python 3.6 does not support Process.kill().
|
||||
# Can be changed to `p.kill()` after EoL (01/22) of Py3.6
|
||||
os.kill(p.pid, signal.SIGKILL)
|
||||
p.join()
|
||||
all_paths = list(all_paths)
|
||||
else:
|
||||
# sequential execution to avoid overhead when analyzing smaller kernels
|
||||
for instr in kernel:
|
||||
all_paths.extend(
|
||||
nx.algorithms.simple_paths.all_simple_paths(
|
||||
dg, instr.line_number, instr.line_number + offset
|
||||
)
|
||||
)
|
||||
|
||||
paths_set = set()
|
||||
for path in paths:
|
||||
for path in all_paths:
|
||||
lat_sum = 0.0
|
||||
# extend path by edge bound latencies (e.g., store-to-load latency)
|
||||
lat_path = []
|
||||
@@ -120,8 +185,10 @@ class KernelDG(nx.DiGraph):
|
||||
for lat_sum, involved_lines in loopcarried_deps:
|
||||
loopcarried_deps_dict[involved_lines[0][0]] = {
|
||||
"root": self._get_node_by_lineno(involved_lines[0][0]),
|
||||
"dependencies": [(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines],
|
||||
"latency": lat_sum
|
||||
"dependencies": [
|
||||
(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines
|
||||
],
|
||||
"latency": lat_sum,
|
||||
}
|
||||
return loopcarried_deps_dict
|
||||
|
||||
@@ -167,9 +234,7 @@ class KernelDG(nx.DiGraph):
|
||||
# split to DAG
|
||||
raise NotImplementedError("Kernel is cyclic.")
|
||||
|
||||
def find_depending(
|
||||
self, instruction_form, instructions, flag_dependencies=False
|
||||
):
|
||||
def find_depending(self, instruction_form, instructions, flag_dependencies=False):
|
||||
"""
|
||||
Find instructions in `instructions` depending on a given instruction form's results.
|
||||
|
||||
@@ -189,15 +254,15 @@ class KernelDG(nx.DiGraph):
|
||||
# TODO instructions before must be considered as well, if they update registers
|
||||
# not used by insruction_form. E.g., validation/build/A64FX/gcc/O1/gs-2d-5pt.marked.s
|
||||
register_changes = self._update_reg_changes(instruction_form)
|
||||
#print("FROM", instruction_form.line, register_changes)
|
||||
# print("FROM", instruction_form.line, register_changes)
|
||||
for i, instr_form in enumerate(instructions):
|
||||
self._update_reg_changes(instr_form, register_changes)
|
||||
#print(" TO", instr_form.line, register_changes)
|
||||
# print(" TO", instr_form.line, register_changes)
|
||||
if "register" in dst:
|
||||
# read of register
|
||||
if self.is_read(dst.register, instr_form) and not (
|
||||
dst.get("pre_indexed", False) or
|
||||
dst.get("post_indexed", False)):
|
||||
dst.get("pre_indexed", False) or dst.get("post_indexed", False)
|
||||
):
|
||||
yield instr_form, []
|
||||
# write to register -> abort
|
||||
if self.is_written(dst.register, instr_form):
|
||||
@@ -214,10 +279,10 @@ class KernelDG(nx.DiGraph):
|
||||
if "pre_indexed" in dst.memory:
|
||||
if self.is_written(dst.memory.base, instr_form):
|
||||
break
|
||||
#if dst.memory.base:
|
||||
# if dst.memory.base:
|
||||
# if self.is_read(dst.memory.base, instr_form):
|
||||
# yield instr_form, []
|
||||
#if dst.memory.index:
|
||||
# if dst.memory.index:
|
||||
# if self.is_read(dst.memory.index, instr_form):
|
||||
# yield instr_form, []
|
||||
if "post_indexed" in dst.memory:
|
||||
@@ -225,7 +290,7 @@ class KernelDG(nx.DiGraph):
|
||||
if self.is_written(dst.memory.base, instr_form):
|
||||
break
|
||||
# TODO record register changes
|
||||
# (e.g., mov, leaadd, sub, inc, dec) in instructions[:i]
|
||||
# (e.g., mov, leaadd, sub, inc, dec) in instructions[:i]
|
||||
# and pass to is_memload and is_memstore to consider relevance.
|
||||
# load from same location (presumed)
|
||||
if self.is_memload(dst.memory, instr_form, register_changes):
|
||||
@@ -285,7 +350,9 @@ class KernelDG(nx.DiGraph):
|
||||
if src.memory.base is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.memory.base) or is_read
|
||||
if src.memory.index is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, src.memory.index) or is_read
|
||||
)
|
||||
# Check also if read in destination memory address
|
||||
for dst in chain(
|
||||
instruction_form.semantic_operands.destination,
|
||||
@@ -295,7 +362,9 @@ class KernelDG(nx.DiGraph):
|
||||
if dst.memory.base is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.memory.base) or is_read
|
||||
if dst.memory.index is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
|
||||
is_read = (
|
||||
self.parser.is_reg_dependend_of(register, dst.memory.index) or is_read
|
||||
)
|
||||
return is_read
|
||||
|
||||
def is_memload(self, mem, instruction_form, register_changes={}):
|
||||
@@ -313,41 +382,43 @@ class KernelDG(nx.DiGraph):
|
||||
# determine absolute address change
|
||||
addr_change = 0
|
||||
if src.offset and "value" in src.offset:
|
||||
addr_change += int(src.offset.value)
|
||||
addr_change += src.offset.value
|
||||
if mem.offset:
|
||||
addr_change -= int(mem.offset.value)
|
||||
addr_change -= mem.offset.value
|
||||
if mem.base and src.base:
|
||||
base_change = register_changes.get(
|
||||
src.base.get('prefix', '')+src.base.name,
|
||||
{'name': src.base.get('prefix', '')+src.base.name, 'value': 0})
|
||||
src.base.get('prefix', '') + src.base.name,
|
||||
{'name': src.base.get('prefix', '') + src.base.name, 'value': 0},
|
||||
)
|
||||
if base_change is None:
|
||||
# Unknown change occurred
|
||||
continue
|
||||
if mem.base.get('prefix', '')+mem.base['name'] != base_change['name']:
|
||||
if mem.base.get('prefix', '') + mem.base['name'] != base_change['name']:
|
||||
# base registers do not match
|
||||
continue
|
||||
addr_change += base_change['value']
|
||||
elif mem.base or src.base:
|
||||
# base registers do not match
|
||||
continue
|
||||
# base registers do not match
|
||||
continue
|
||||
if mem.index and src.index:
|
||||
index_change = register_changes.get(
|
||||
src.index.get('prefix', '')+src.index.name,
|
||||
{'name': src.index.get('prefix', '')+src.index.name, 'value': 0})
|
||||
src.index.get('prefix', '') + src.index.name,
|
||||
{'name': src.index.get('prefix', '') + src.index.name, 'value': 0},
|
||||
)
|
||||
if index_change is None:
|
||||
# Unknown change occurred
|
||||
continue
|
||||
if mem.scale != src.scale:
|
||||
# scale factors do not match
|
||||
continue
|
||||
if mem.index.get('prefix', '')+mem.index['name'] != index_change['name']:
|
||||
if mem.index.get('prefix', '') + mem.index['name'] != index_change['name']:
|
||||
# index registers do not match
|
||||
continue
|
||||
addr_change += index_change['value'] * src.scale
|
||||
elif mem.index or src.index:
|
||||
# index registers do not match
|
||||
continue
|
||||
#if instruction_form.line_number == 3:
|
||||
# index registers do not match
|
||||
continue
|
||||
# if instruction_form.line_number == 3:
|
||||
if addr_change == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
3
setup.py
3
setup.py
@@ -91,7 +91,6 @@ setup(
|
||||
# Specify the Python versions you support here. In particular, ensure
|
||||
# that you indicate wheter you support Python2, Python 3 or both.
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
@@ -107,7 +106,7 @@ setup(
|
||||
# requirements files see:
|
||||
# https://packaging.python.org/en/latest/requirements.html
|
||||
install_requires=["networkx", "pyparsing>=2.3.1", "ruamel.yaml>=0.15.71"],
|
||||
python_requires=">=3.5",
|
||||
python_requires=">=3.6",
|
||||
# List additional groups of dependencies here (e.g. development
|
||||
# dependencies). You can install these using the following syntax,
|
||||
# for example:
|
||||
|
||||
@@ -183,14 +183,38 @@ class TestCLI(unittest.TestCase):
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
# WARNING for length
|
||||
self.assertTrue(output.getvalue().count("WARNING") == 1)
|
||||
self.assertTrue(
|
||||
output.getvalue().count(
|
||||
"WARNING: You are analyzing a large amount of instruction forms"
|
||||
)
|
||||
== 1
|
||||
)
|
||||
# WARNING for arch
|
||||
args = parser.parse_args(
|
||||
["--lines", "100-199", "--ignore-unknown", self._find_test_file(kernel)]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
# WARNING for arch
|
||||
self.assertTrue(output.getvalue().count("WARNING") == 1)
|
||||
self.assertTrue(
|
||||
output.getvalue().count("WARNING: No micro-architecture was specified") == 1
|
||||
)
|
||||
# WARNING for timeout
|
||||
args = parser.parse_args(
|
||||
["--ignore-unknown", "--lcd-timeout", "0", self._find_test_file(kernel)]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
self.assertTrue(
|
||||
output.getvalue().count("WARNING: LCD analysis timed out") == 1
|
||||
)
|
||||
args = parser.parse_args(
|
||||
["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
self.assertTrue(
|
||||
output.getvalue().count("WARNING: LCD analysis timed out") == 0
|
||||
)
|
||||
|
||||
def test_lines_arg(self):
|
||||
# Run tests with --lines option
|
||||
|
||||
32
tests/test_files/kernel_aarch64_sve.s
Normal file
32
tests/test_files/kernel_aarch64_sve.s
Normal file
@@ -0,0 +1,32 @@
|
||||
// OSACA-BEGIN
|
||||
.L5:
|
||||
add x10, x1, x11
|
||||
add x6, x1, x8
|
||||
ld2d {z0.d - z1.d}, p1/z, [x10]
|
||||
ld2d {z2.d - z3.d}, p1/z, [x6]
|
||||
mov z5.d, z1.d
|
||||
fadd z20.d, z3.d, z3.d
|
||||
mov z1.d, z0.d
|
||||
add x6, x1, x7
|
||||
fadd z2.d, z2.d, z2.d
|
||||
ld2d {z6.d - z7.d}, p1/z, [x6]
|
||||
fmul z4.d, z5.d, z20.d
|
||||
add x10, x1, x12
|
||||
mov z0.d, z7.d
|
||||
ld2d {z16.d - z17.d}, p1/z, [x10]
|
||||
mov z3.d, z4.d
|
||||
fmls z3.d, p0/m, z0.d, z17.d
|
||||
fmul z0.d, z0.d, z16.d
|
||||
fmla z3.d, p0/m, z6.d, z16.d
|
||||
fmla z0.d, p0/m, z6.d, z17.d
|
||||
fmls z3.d, p0/m, z1.d, z2.d
|
||||
fmls z0.d, p0/m, z1.d, z20.d
|
||||
mov z18.d, z3.d
|
||||
fmsb z5.d, p0/m, z2.d, z0.d
|
||||
mov z19.d, z5.d
|
||||
st2d {z18.d - z19.d}, p1, [x6]
|
||||
add x5, x5, 8
|
||||
add x1, x1, 128
|
||||
whilelo p1.d, x5, x9
|
||||
bne .L5
|
||||
// OSACA-END
|
||||
192
tests/test_files/kernel_x86_long_LCD.s
Normal file
192
tests/test_files/kernel_x86_long_LCD.s
Normal file
@@ -0,0 +1,192 @@
|
||||
# OSACA-BEGIN
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
push %rbp
|
||||
mov %ecx,%r12d
|
||||
mov %esi,%r14d
|
||||
mov %r12d,%ecx
|
||||
mov %r14d,%esi
|
||||
mov %rdx,%r13
|
||||
mov %rdi,%rbp
|
||||
callq 0x4210d0
|
||||
mov %rdx,%r8
|
||||
movzbl (%rdi),%r9d
|
||||
movslq %esi,%rsi
|
||||
movslq %ecx,%rcx
|
||||
movzbl (%r8),%r10d
|
||||
vmovd %r9d,%xmm13
|
||||
movzbl 0x4(%r8),%r9d
|
||||
vpinsrb $0x1,(%rsi,%rdi,1),%xmm13,%xmm14
|
||||
lea (%rsi,%rsi,2),%rdx
|
||||
vmovd %r10d,%xmm1
|
||||
vpinsrb $0x1,(%rcx,%r8,1),%xmm1,%xmm0
|
||||
vmovd %r9d,%xmm7
|
||||
vpinsrb $0x1,0x4(%rcx,%r8,1),%xmm7,%xmm5
|
||||
vpinsrb $0x2,(%rdi,%rsi,2),%xmm14,%xmm15
|
||||
vpinsrb $0x2,(%r8,%rcx,2),%xmm0,%xmm6
|
||||
vpinsrb $0x2,0x4(%r8,%rcx,2),%xmm5,%xmm9
|
||||
vpinsrb $0x3,(%rdx,%rdi,1),%xmm15,%xmm4
|
||||
movzbl 0x4(%rdi),%r11d
|
||||
lea (%rcx,%rcx,2),%rax
|
||||
vpinsrb $0x3,(%rax,%r8,1),%xmm6,%xmm10
|
||||
vpinsrb $0x3,0x4(%rax,%r8,1),%xmm9,%xmm11
|
||||
vmovd %r11d,%xmm2
|
||||
vpinsrb $0x1,0x4(%rsi,%rdi,1),%xmm2,%xmm8
|
||||
vpinsrb $0x2,0x4(%rdi,%rsi,2),%xmm8,%xmm3
|
||||
movzbl 0x1(%rdi),%r10d
|
||||
movzbl 0x5(%rdi),%r9d
|
||||
movzbl 0x1(%r8),%r11d
|
||||
vmovd %r10d,%xmm1
|
||||
movzbl 0x5(%r8),%r10d
|
||||
vmovd %r9d,%xmm7
|
||||
vpmovzxbd %xmm4,%xmm4
|
||||
vmovd %r11d,%xmm2
|
||||
vpmovzxbd %xmm10,%xmm10
|
||||
vpinsrb $0x3,0x4(%rdx,%rdi,1),%xmm3,%xmm12
|
||||
vpsubd %xmm10,%xmm4,%xmm14
|
||||
vpinsrb $0x1,0x5(%rsi,%rdi,1),%xmm7,%xmm5
|
||||
vmovd %r10d,%xmm4
|
||||
vpinsrb $0x1,0x5(%rcx,%r8,1),%xmm4,%xmm10
|
||||
vpinsrb $0x1,0x1(%rcx,%r8,1),%xmm2,%xmm8
|
||||
vpinsrb $0x1,0x1(%rsi,%rdi,1),%xmm1,%xmm0
|
||||
vpinsrb $0x2,0x5(%rdi,%rsi,2),%xmm5,%xmm9
|
||||
vpinsrb $0x2,0x1(%r8,%rcx,2),%xmm8,%xmm3
|
||||
vpinsrb $0x2,0x1(%rdi,%rsi,2),%xmm0,%xmm6
|
||||
vpmovzxbd %xmm12,%xmm12
|
||||
vpmovzxbd %xmm11,%xmm11
|
||||
vpsubd %xmm11,%xmm12,%xmm13
|
||||
vpinsrb $0x2,0x5(%r8,%rcx,2),%xmm10,%xmm11
|
||||
vpslld $0x10,%xmm13,%xmm15
|
||||
vpinsrb $0x3,0x1(%rdx,%rdi,1),%xmm6,%xmm13
|
||||
vpaddd %xmm15,%xmm14,%xmm12
|
||||
vpinsrb $0x3,0x5(%rdx,%rdi,1),%xmm9,%xmm15
|
||||
vpinsrb $0x3,0x1(%rax,%r8,1),%xmm3,%xmm14
|
||||
vpinsrb $0x3,0x5(%rax,%r8,1),%xmm11,%xmm1
|
||||
movzbl 0x2(%rdi),%r11d
|
||||
movzbl 0x2(%r8),%r9d
|
||||
vpmovzxbd %xmm15,%xmm15
|
||||
vmovd %r11d,%xmm8
|
||||
vmovd %r9d,%xmm5
|
||||
vpinsrb $0x1,0x2(%rsi,%rdi,1),%xmm8,%xmm3
|
||||
vpinsrb $0x1,0x2(%rcx,%r8,1),%xmm5,%xmm9
|
||||
vpinsrb $0x2,0x2(%rdi,%rsi,2),%xmm3,%xmm7
|
||||
vpinsrb $0x2,0x2(%r8,%rcx,2),%xmm9,%xmm4
|
||||
vpinsrb $0x3,0x2(%rdx,%rdi,1),%xmm7,%xmm3
|
||||
vpinsrb $0x3,0x2(%rax,%r8,1),%xmm4,%xmm7
|
||||
vpmovzxbd %xmm1,%xmm1
|
||||
movzbl 0x6(%r8),%r11d
|
||||
vpsubd %xmm1,%xmm15,%xmm0
|
||||
vpmovzxbd %xmm13,%xmm13
|
||||
vpslld $0x10,%xmm0,%xmm2
|
||||
vpmovzxbd %xmm14,%xmm14
|
||||
vpsubd %xmm14,%xmm13,%xmm6
|
||||
vpaddd %xmm2,%xmm6,%xmm11
|
||||
vmovd %r11d,%xmm6
|
||||
vpinsrb $0x1,0x6(%rcx,%r8,1),%xmm6,%xmm2
|
||||
movzbl 0x6(%rdi),%r10d
|
||||
vpinsrb $0x2,0x6(%r8,%rcx,2),%xmm2,%xmm8
|
||||
vmovd %r10d,%xmm10
|
||||
vpinsrb $0x1,0x6(%rsi,%rdi,1),%xmm10,%xmm1
|
||||
vpinsrb $0x3,0x6(%rax,%r8,1),%xmm8,%xmm9
|
||||
vpinsrb $0x2,0x6(%rdi,%rsi,2),%xmm1,%xmm0
|
||||
movzbl 0x3(%rdi),%r9d
|
||||
movzbl 0x7(%rdi),%r11d
|
||||
vpmovzxbd %xmm3,%xmm3
|
||||
vpmovzxbd %xmm7,%xmm7
|
||||
vmovd %r9d,%xmm14
|
||||
vmovd %r11d,%xmm8
|
||||
vpsubd %xmm7,%xmm3,%xmm10
|
||||
vpinsrb $0x1,0x3(%rsi,%rdi,1),%xmm14,%xmm15
|
||||
vpinsrb $0x1,0x7(%rsi,%rdi,1),%xmm8,%xmm3
|
||||
vpinsrb $0x3,0x6(%rdx,%rdi,1),%xmm0,%xmm5
|
||||
vpinsrb $0x2,0x3(%rdi,%rsi,2),%xmm15,%xmm1
|
||||
vpinsrb $0x2,0x7(%rdi,%rsi,2),%xmm3,%xmm7
|
||||
vpaddd %xmm11,%xmm12,%xmm3
|
||||
vpmovzxbd %xmm5,%xmm5
|
||||
vpmovzxbd %xmm9,%xmm9
|
||||
vpsubd %xmm9,%xmm5,%xmm4
|
||||
vpslld $0x10,%xmm4,%xmm13
|
||||
vpinsrb $0x3,0x7(%rdx,%rdi,1),%xmm7,%xmm15
|
||||
vpaddd %xmm13,%xmm10,%xmm10
|
||||
vpinsrb $0x3,0x3(%rdx,%rdi,1),%xmm1,%xmm13
|
||||
movzbl 0x7(%r8),%edx
|
||||
movzbl 0x3(%r8),%r10d
|
||||
vpmovzxbd %xmm15,%xmm15
|
||||
vmovd %edx,%xmm5
|
||||
vpinsrb $0x1,0x7(%rcx,%r8,1),%xmm5,%xmm9
|
||||
vmovd %r10d,%xmm0
|
||||
vpinsrb $0x1,0x3(%rcx,%r8,1),%xmm0,%xmm6
|
||||
vpinsrb $0x2,0x7(%r8,%rcx,2),%xmm9,%xmm4
|
||||
vpinsrb $0x2,0x3(%r8,%rcx,2),%xmm6,%xmm2
|
||||
vpinsrb $0x3,0x7(%rax,%r8,1),%xmm4,%xmm1
|
||||
vpinsrb $0x3,0x3(%rax,%r8,1),%xmm2,%xmm14
|
||||
vpmovzxbd %xmm1,%xmm1
|
||||
vpmovzxbd %xmm13,%xmm13
|
||||
vpsubd %xmm1,%xmm15,%xmm0
|
||||
vpmovzxbd %xmm14,%xmm14
|
||||
vpslld $0x10,%xmm0,%xmm2
|
||||
vpsubd %xmm14,%xmm13,%xmm6
|
||||
vpsubd %xmm11,%xmm12,%xmm1
|
||||
vpaddd %xmm2,%xmm6,%xmm8
|
||||
vpaddd %xmm8,%xmm10,%xmm12
|
||||
vpsubd %xmm8,%xmm10,%xmm0
|
||||
vpaddd %xmm12,%xmm3,%xmm8
|
||||
vpaddd %xmm0,%xmm1,%xmm7
|
||||
vpsubd %xmm12,%xmm3,%xmm3
|
||||
vpsubd %xmm0,%xmm1,%xmm5
|
||||
vunpcklps %xmm7,%xmm8,%xmm6
|
||||
vunpcklps %xmm5,%xmm3,%xmm2
|
||||
vunpckhps %xmm7,%xmm8,%xmm9
|
||||
vunpckhps %xmm5,%xmm3,%xmm4
|
||||
vunpcklpd %xmm2,%xmm6,%xmm10
|
||||
vunpckhpd %xmm2,%xmm6,%xmm11
|
||||
vunpcklpd %xmm4,%xmm9,%xmm12
|
||||
vpaddd %xmm11,%xmm10,%xmm14
|
||||
vunpckhpd %xmm4,%xmm9,%xmm13
|
||||
vpsubd %xmm11,%xmm10,%xmm1
|
||||
vpaddd %xmm13,%xmm12,%xmm15
|
||||
vpsubd %xmm13,%xmm12,%xmm0
|
||||
vpaddd %xmm15,%xmm14,%xmm9
|
||||
vpaddd %xmm0,%xmm1,%xmm7
|
||||
vpsubd %xmm15,%xmm14,%xmm8
|
||||
vpsubd %xmm0,%xmm1,%xmm6
|
||||
vmovdqu 0x279d68(%rip),%xmm15
|
||||
vpsrld $0xf,%xmm9,%xmm2
|
||||
vpsrld $0xf,%xmm7,%xmm10
|
||||
vpand %xmm15,%xmm2,%xmm3
|
||||
vmovdqu 0x279d40(%rip),%xmm4
|
||||
vpand %xmm15,%xmm10,%xmm11
|
||||
vpsrld $0xf,%xmm8,%xmm12
|
||||
vpsrld $0xf,%xmm6,%xmm14
|
||||
vpmulld %xmm3,%xmm4,%xmm5
|
||||
vpand %xmm15,%xmm12,%xmm13
|
||||
vpmulld %xmm11,%xmm4,%xmm3
|
||||
vpand %xmm15,%xmm14,%xmm1
|
||||
vpmulld %xmm13,%xmm4,%xmm2
|
||||
vpaddd %xmm3,%xmm7,%xmm7
|
||||
vpmulld %xmm1,%xmm4,%xmm0
|
||||
vpaddd %xmm5,%xmm9,%xmm4
|
||||
vpxor %xmm5,%xmm4,%xmm5
|
||||
vpxor %xmm3,%xmm7,%xmm9
|
||||
vpaddd %xmm2,%xmm8,%xmm8
|
||||
vpaddd %xmm9,%xmm5,%xmm3
|
||||
vpxor %xmm2,%xmm8,%xmm2
|
||||
vpaddd %xmm0,%xmm6,%xmm6
|
||||
vpaddd %xmm2,%xmm3,%xmm4
|
||||
vpxor %xmm0,%xmm6,%xmm0
|
||||
vpaddd %xmm0,%xmm4,%xmm2
|
||||
vpxor %xmm1,%xmm1,%xmm1
|
||||
vpaddd %xmm2,%xmm1,%xmm1
|
||||
vpsrldq $0x8,%xmm1,%xmm3
|
||||
vpaddd %xmm3,%xmm1,%xmm5
|
||||
vpsrlq $0x20,%xmm5,%xmm6
|
||||
vpaddd %xmm6,%xmm5,%xmm7
|
||||
vmovd %xmm7,%ecx
|
||||
movzwl %cx,%eax
|
||||
shr $0x10,%ecx
|
||||
add %ecx,%eax
|
||||
shr %eax
|
||||
retq
|
||||
# OSACA-END
|
||||
@@ -102,7 +102,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(parsed_3.instruction, "mov")
|
||||
self.assertEqual(parsed_3.operands[0].register.name, "2")
|
||||
self.assertEqual(parsed_3.operands[0].register.prefix, "x")
|
||||
self.assertEqual(parsed_3.operands[1].immediate.value, "0x222")
|
||||
self.assertEqual(parsed_3.operands[1].immediate.value, int("0x222", 0))
|
||||
self.assertEqual(parsed_3.comment, "NOT IACA END")
|
||||
|
||||
self.assertEqual(parsed_4.instruction, "str")
|
||||
@@ -208,7 +208,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
{"prfop": {"type": ["PLD"], "target": ["L1"], "policy": ["KEEP"]}},
|
||||
{
|
||||
"memory": {
|
||||
"offset": {"value": "2048"},
|
||||
"offset": {"value": 2048},
|
||||
"base": {"prefix": "x", "name": "26"},
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
@@ -228,7 +228,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
{"register": {"prefix": "x", "name": "30"}},
|
||||
{
|
||||
"memory": {
|
||||
"offset": {"value": "-16"},
|
||||
"offset": {"value": -16},
|
||||
"base": {"name": "sp", "prefix": "x"},
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
@@ -253,7 +253,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
"base": {"prefix": "x", "name": "11"},
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
"post_indexed": {"value": "64"},
|
||||
"post_indexed": {"value": 64},
|
||||
}
|
||||
},
|
||||
],
|
||||
@@ -270,7 +270,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
{"register": {"prefix": "p", "name": "0", "predication": "m"}},
|
||||
{"register": {"prefix": "z", "name": "29", "shape": "d"}},
|
||||
{"register": {"prefix": "z", "name": "21", "shape": "d"}},
|
||||
{"immediate": {"value": "90", "type": "int"}},
|
||||
{"immediate": {"value": 90, "type": "int"}},
|
||||
],
|
||||
"directive": None,
|
||||
"comment": None,
|
||||
@@ -326,32 +326,34 @@ class TestParserAArch64(unittest.TestCase):
|
||||
|
||||
def test_multiple_regs(self):
|
||||
instr_range = "PUSH {x5-x7}"
|
||||
reg_range = AttrDict(
|
||||
{
|
||||
"register": {
|
||||
"range": [{"prefix": "x", "name": "5"}, {"prefix": "x", "name": "7"}],
|
||||
"index": None,
|
||||
}
|
||||
}
|
||||
)
|
||||
instr_list = "POP {x5, x7, x9}"
|
||||
reg_list = AttrDict(
|
||||
{
|
||||
"register": {
|
||||
"list": [
|
||||
{"prefix": "x", "name": "5"},
|
||||
{"prefix": "x", "name": "7"},
|
||||
{"prefix": "x", "name": "9"},
|
||||
],
|
||||
"index": None,
|
||||
}
|
||||
}
|
||||
)
|
||||
instr_list = "POP {x5, x6, x7}"
|
||||
instr_range_with_index = "ld4 {v0.S - v3.S}[2]"
|
||||
instr_list_with_index = "ld4 {v0.S, v1.S, v2.S, v3.S}[2]"
|
||||
instr_range_single = "dummy { z1.d }"
|
||||
reg_list = [
|
||||
AttrDict({"register": {"prefix": "x", "name": "5"}}),
|
||||
AttrDict({"register": {"prefix": "x", "name": "6"}}),
|
||||
AttrDict({"register": {"prefix": "x", "name": "7"}}),
|
||||
]
|
||||
reg_list_idx = [
|
||||
AttrDict({"register": {"prefix": "v", "name": "0", "shape": "S", "index": 2}}),
|
||||
AttrDict({"register": {"prefix": "v", "name": "1", "shape": "S", "index": 2}}),
|
||||
AttrDict({"register": {"prefix": "v", "name": "2", "shape": "S", "index": 2}}),
|
||||
AttrDict({"register": {"prefix": "v", "name": "3", "shape": "S", "index": 2}}),
|
||||
]
|
||||
reg_list_single = [AttrDict({"register": {"prefix": "z", "name": "1", "shape": "d"}})]
|
||||
|
||||
prange = self.parser.parse_line(instr_range)
|
||||
plist = self.parser.parse_line(instr_list)
|
||||
p_idx_range = self.parser.parse_line(instr_range_with_index)
|
||||
p_idx_list = self.parser.parse_line(instr_list_with_index)
|
||||
p_single = self.parser.parse_line(instr_range_single)
|
||||
|
||||
self.assertEqual(prange.operands[0], reg_range)
|
||||
self.assertEqual(plist.operands[0], reg_list)
|
||||
self.assertEqual(prange.operands, reg_list)
|
||||
self.assertEqual(plist.operands, reg_list)
|
||||
self.assertEqual(p_idx_range.operands, reg_list_idx)
|
||||
self.assertEqual(p_idx_list.operands, reg_list_idx)
|
||||
self.assertEqual(p_single.operands, reg_list_single)
|
||||
|
||||
def test_reg_dependency(self):
|
||||
reg_1_1 = AttrDict({"prefix": "b", "name": "1"})
|
||||
|
||||
@@ -120,12 +120,12 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertIsNone(parsed_2.comment)
|
||||
|
||||
self.assertEqual(parsed_3.instruction, "movl")
|
||||
self.assertEqual(parsed_3.operands[0].immediate.value, "222")
|
||||
self.assertEqual(parsed_3.operands[0].immediate.value, 222)
|
||||
self.assertEqual(parsed_3.operands[1].register.name, "ebx")
|
||||
self.assertEqual(parsed_3.comment, "IACA END")
|
||||
|
||||
self.assertEqual(parsed_4.instruction, "vmovss")
|
||||
self.assertEqual(parsed_4.operands[1].memory.offset.value, "-4")
|
||||
self.assertEqual(parsed_4.operands[1].memory.offset.value, -4)
|
||||
self.assertEqual(parsed_4.operands[1].memory.base.name, "rsp")
|
||||
self.assertEqual(parsed_4.operands[1].memory.index.name, "rax")
|
||||
self.assertEqual(parsed_4.operands[1].memory.scale, 8)
|
||||
@@ -146,7 +146,7 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(parsed_6.operands[0].memory.scale, 8)
|
||||
self.assertEqual(parsed_6.operands[1].register.name, "rbx")
|
||||
|
||||
self.assertEqual(parsed_7.operands[0].immediate.value, "0x1")
|
||||
self.assertEqual(parsed_7.operands[0].immediate.value, 0x1)
|
||||
self.assertEqual(parsed_7.operands[1].register.name, "xmm0")
|
||||
self.assertEqual(parsed_7.operands[2].register.name, "ymm1")
|
||||
self.assertEqual(parsed_7.operands[3].register.name, "ymm1")
|
||||
@@ -189,7 +189,7 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
"operands": [
|
||||
{
|
||||
"memory": {
|
||||
"offset": {"value": "2"},
|
||||
"offset": {"value": 2},
|
||||
"base": {"name": "rax"},
|
||||
"index": {"name": "rax"},
|
||||
"scale": 1,
|
||||
@@ -240,7 +240,7 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
imd_decimal_1 = {"value": "79"}
|
||||
imd_hex_1 = {"value": "0x4f"}
|
||||
imd_decimal_2 = {"value": "8"}
|
||||
imd_hex_2 = {"value": "0x8"}
|
||||
imd_hex_2 = {"value": "8"}
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
|
||||
)
|
||||
|
||||
@@ -5,15 +5,14 @@ Unit tests for Semantic Analysis
|
||||
|
||||
import os
|
||||
import unittest
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from osaca.osaca import get_unmatched_instruction_ratio
|
||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||
from osaca.semantics import (
|
||||
INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section, ISASemantics
|
||||
)
|
||||
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics,
|
||||
KernelDG, MachineModel, reduce_to_section)
|
||||
|
||||
|
||||
class TestSemanticTools(unittest.TestCase):
|
||||
@@ -30,17 +29,30 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.code_x86 = f.read()
|
||||
with open(cls._find_file("kernel_x86_memdep.s")) as f:
|
||||
cls.code_x86_memdep = f.read()
|
||||
with open(cls._find_file("kernel_x86_long_LCD.s")) as f:
|
||||
cls.code_x86_long_LCD = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_memdep.s")) as f:
|
||||
cls.code_aarch64_memdep = f.read()
|
||||
with open(cls._find_file("kernel_aarch64.s")) as f:
|
||||
cls.code_AArch64 = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
|
||||
cls.code_AArch64_SVE = f.read()
|
||||
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
|
||||
cls.kernel_x86_memdep = reduce_to_section(
|
||||
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
|
||||
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86"
|
||||
)
|
||||
cls.kernel_x86_long_LCD = reduce_to_section(
|
||||
cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86"
|
||||
)
|
||||
cls.kernel_AArch64 = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64"
|
||||
)
|
||||
cls.kernel_aarch64_memdep = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
|
||||
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64"
|
||||
)
|
||||
cls.kernel_aarch64_SVE = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64"
|
||||
)
|
||||
|
||||
# set up machine models
|
||||
cls.machine_model_csx = MachineModel(
|
||||
@@ -49,6 +61,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.machine_model_tx2 = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
|
||||
)
|
||||
cls.machine_model_a64fx = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")
|
||||
)
|
||||
cls.semantics_x86 = ISASemantics("x86")
|
||||
cls.semantics_csx = ArchSemantics(
|
||||
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
|
||||
@@ -58,6 +73,10 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.machine_model_tx2,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
|
||||
)
|
||||
cls.semantics_a64fx = ArchSemantics(
|
||||
cls.machine_model_a64fx,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
|
||||
)
|
||||
cls.machine_model_zen = MachineModel(arch="zen1")
|
||||
|
||||
for i in range(len(cls.kernel_x86)):
|
||||
@@ -66,12 +85,18 @@ class TestSemanticTools(unittest.TestCase):
|
||||
for i in range(len(cls.kernel_x86_memdep)):
|
||||
cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i])
|
||||
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i])
|
||||
for i in range(len(cls.kernel_x86_long_LCD)):
|
||||
cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i])
|
||||
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i])
|
||||
for i in range(len(cls.kernel_AArch64)):
|
||||
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
|
||||
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
|
||||
for i in range(len(cls.kernel_aarch64_memdep)):
|
||||
cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
|
||||
cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
|
||||
for i in range(len(cls.kernel_aarch64_SVE)):
|
||||
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
|
||||
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
|
||||
|
||||
###########
|
||||
# Tests
|
||||
@@ -284,8 +309,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.export_graph(filepath="/dev/null")
|
||||
|
||||
def test_memdependency_x86(self):
|
||||
dg = KernelDG(self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx,
|
||||
self.semantics_csx)
|
||||
dg = KernelDG(
|
||||
self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx
|
||||
)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
|
||||
@@ -295,8 +321,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.export_graph(filepath="/dev/null")
|
||||
|
||||
def test_kernelDG_AArch64(self):
|
||||
dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2,
|
||||
self.semantics_tx2)
|
||||
dg = KernelDG(
|
||||
self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2
|
||||
)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10})
|
||||
@@ -321,6 +348,15 @@ class TestSemanticTools(unittest.TestCase):
|
||||
# test dot creation
|
||||
dg.export_graph(filepath="/dev/null")
|
||||
|
||||
def test_kernelDG_SVE(self):
|
||||
KernelDG(
|
||||
self.kernel_aarch64_SVE,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_a64fx,
|
||||
self.semantics_a64fx,
|
||||
)
|
||||
# TODO check for correct analysis
|
||||
|
||||
def test_hidden_load(self):
|
||||
machine_model_hld = MachineModel(
|
||||
path_to_yaml=self._find_file("hidden_load_machine_model.yml")
|
||||
@@ -353,14 +389,20 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.get_loopcarried_dependencies()
|
||||
|
||||
def test_loop_carried_dependency_aarch64(self):
|
||||
dg = KernelDG(self.kernel_aarch64_memdep, self.parser_AArch64, self.machine_model_tx2,
|
||||
self.semantics_tx2)
|
||||
dg = KernelDG(
|
||||
self.kernel_aarch64_memdep,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_tx2,
|
||||
self.semantics_tx2,
|
||||
)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
# based on line 6
|
||||
self.assertEqual(lc_deps[6]["latency"], 28.0)
|
||||
self.assertEqual([(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)])
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
|
||||
)
|
||||
|
||||
def test_loop_carried_dependency_x86(self):
|
||||
lcd_id = 8
|
||||
@@ -375,7 +417,7 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]["dependencies"][0][0],
|
||||
dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
|
||||
dg.dg.nodes(data=True)[lcd_id]["instruction_form"],
|
||||
)
|
||||
# w/ flag dependencies: ID 9 w/ len=2
|
||||
# w/o flag dependencies: ID 5 w/ len=1
|
||||
@@ -389,6 +431,31 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
)
|
||||
|
||||
def test_timeout_during_loop_carried_dependency(self):
|
||||
start_time = time.perf_counter()
|
||||
KernelDG(
|
||||
self.kernel_x86_long_LCD,
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_x86,
|
||||
timeout=10
|
||||
)
|
||||
end_time = time.perf_counter()
|
||||
time_10 = end_time - start_time
|
||||
start_time = time.perf_counter()
|
||||
KernelDG(
|
||||
self.kernel_x86_long_LCD,
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_x86,
|
||||
timeout=2
|
||||
)
|
||||
end_time = time.perf_counter()
|
||||
time_2 = end_time - start_time
|
||||
self.assertTrue(time_10 > 10)
|
||||
self.assertTrue(2 < time_2)
|
||||
self.assertTrue(time_2 < (time_10 - 7))
|
||||
|
||||
def test_is_read_is_written_x86(self):
|
||||
# independent form HW model
|
||||
dag = KernelDG(self.kernel_x86, self.parser_x86, None, None)
|
||||
|
||||
@@ -232,7 +232,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -284,8 +284,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -296,8 +298,672 @@
|
||||
"ZEN has 156 tests, compiled to 126 unique assembly representations.\n",
|
||||
"ZEN2 has 156 tests, compiled to 126 unique assembly representations.\n",
|
||||
"TX2 has 104 tests, compiled to 78 unique assembly representations.\n",
|
||||
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n"
|
||||
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n",
|
||||
"High-level iterations in assembly block: 16\n",
|
||||
"Measured: 1.1903856655856655\n",
|
||||
"IACA Predicted: 1.96875 TP: 1.875 LCD: None CP: None\n",
|
||||
"Ithemal Predicted: nan TP: None LCD: None CP: None\n",
|
||||
"LLVM-MCA Predicted: 2.240625 TP: 1.948125 LCD: 2.240625 CP: 3.8125\n",
|
||||
"OSACA Predicted: 1.875 TP: 1.875 LCD: 0.5 CP: 2.75\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Open Source Architecture Code Analyzer (OSACA) - 0.3.14\n",
|
||||
"Analyzed file: build/SKX/icc/O3/pi.marked.s\n",
|
||||
"Architecture: SKX\n",
|
||||
"Timestamp: 2021-04-15 12:15:40\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction\n",
|
||||
" * - Instruction micro-ops not bound to a port\n",
|
||||
" X - No throughput/latency information for this instruction in data file\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Combined Analysis Report\n",
|
||||
"------------------------\n",
|
||||
" Port pressure in cycles \n",
|
||||
" | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |\n",
|
||||
"-------------------------------------------------------------------------------------------------\n",
|
||||
" 62 | | | | | | | | || | | # pointer_increment=128 fa3c665ee18e1e5f704c8a6026891c36\n",
|
||||
" 63 | | | | | | | | || | | ..B1.4: # Preds ..B1.4 ..B1.3\n",
|
||||
" 64 | | | | | | | | || | | # Execution count [5.00e+00]\n",
|
||||
" 65 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $32, %ecx #16.5\n",
|
||||
" 66 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm5, %ymm9, %ymm14 #17.9\n",
|
||||
" 67 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm9, %zmm8 #17.14\n",
|
||||
" 68 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm8, %zmm1, %zmm10 #17.18\n",
|
||||
" 69 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm2, %zmm11 #17.25\n",
|
||||
" 70 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm11, %zmm11 #18.38\n",
|
||||
" 71 | | | | | | | | || | | * vmovaps %zmm0, %zmm29 #18.38\n",
|
||||
" 72 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm11, %zmm13 #18.38\n",
|
||||
" 73 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11 #18.38\n",
|
||||
" 74 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm13, %k0 #18.38\n",
|
||||
" 75 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm11, %zmm11, %zmm12 #18.38\n",
|
||||
" 76 | 1.00 | | | | | | | || | | knotw %k0, %k1 #18.38\n",
|
||||
" 77 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm11, %zmm13{%k1} #18.38\n",
|
||||
" 78 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm12, %zmm13{%k1} #18.38\n",
|
||||
" 79 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38\n",
|
||||
" 80 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm14, %ymm20 #17.9\n",
|
||||
" 81 | 0.50 | | | | | 1.50 | | || 7.0 | | vcvtdq2pd %ymm14, %zmm15 #17.14\n",
|
||||
" 82 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddpd %zmm15, %zmm1, %zmm16 #17.18\n",
|
||||
" 83 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm16, %zmm2, %zmm17 #17.25\n",
|
||||
" 84 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm0, %zmm17, %zmm17 #18.38\n",
|
||||
" 85 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm17, %zmm19 #18.38\n",
|
||||
" 86 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17 #18.38\n",
|
||||
" 87 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm19, %k2 #18.38\n",
|
||||
" 88 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm17, %zmm18 #18.38\n",
|
||||
" 89 | 1.00 | | | | | | | || | | knotw %k2, %k3 #18.38\n",
|
||||
" 90 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm19, %zmm17, %zmm19{%k3} #18.38\n",
|
||||
" 91 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm19, %zmm18, %zmm19{%k3} #18.38\n",
|
||||
" 92 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38\n",
|
||||
" 93 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm20, %ymm26 #17.9\n",
|
||||
" 94 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm20, %zmm21 #17.14\n",
|
||||
" 95 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm21, %zmm1, %zmm22 #17.18\n",
|
||||
" 96 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm22, %zmm2, %zmm23 #17.25\n",
|
||||
" 97 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm23, %zmm23 #18.38\n",
|
||||
" 98 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm23, %zmm25 #18.38\n",
|
||||
" 99 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23 #18.38\n",
|
||||
" 100 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm25, %k4 #18.38\n",
|
||||
" 101 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm23, %zmm23, %zmm24 #18.38\n",
|
||||
" 102 | 1.00 | | | | | | | || | | knotw %k4, %k5 #18.38\n",
|
||||
" 103 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm23, %zmm25{%k5} #18.38\n",
|
||||
" 104 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm24, %zmm25{%k5} #18.38\n",
|
||||
" 105 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm25, %zmm6 #18.38\n",
|
||||
" 106 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm26, %zmm27 #17.14\n",
|
||||
" 107 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm26, %ymm9 #17.9\n",
|
||||
" 108 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm27, %zmm1, %zmm28 #17.18\n",
|
||||
" 109 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm28, %zmm2, %zmm8 #17.25\n",
|
||||
" 110 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm8, %zmm8, %zmm29 #18.38\n",
|
||||
" 111 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm29, %zmm31 #18.38\n",
|
||||
" 112 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29 #18.38\n",
|
||||
" 113 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm31, %k6 #18.38\n",
|
||||
" 114 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm29, %zmm29, %zmm30 #18.38\n",
|
||||
" 115 | 1.00 | | | | | | | || | | knotw %k6, %k7 #18.38\n",
|
||||
" 116 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm29, %zmm31{%k7} #18.38\n",
|
||||
" 117 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm30, %zmm31{%k7} #18.38\n",
|
||||
" 118 | 0.00 | | | | | 1.00 | | || 0.0 | 4.0 | vfmadd231pd %zmm4, %zmm31, %zmm3 #18.38\n",
|
||||
" 119 | 0.00 | 0.34 | | | | 0.00 | 0.66 | || | | cmpl %edx, %ecx #16.5\n",
|
||||
" 120 | 0.00 | | | | | | 1.00 | || | | jb ..B1.4 # Prob 82% #16.5\n",
|
||||
"\n",
|
||||
" 30.0 4.34 2.00 2.00 2.00 2.00 30.0 2.66 44 8.0 \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Loop-Carried Dependencies Analysis Report\n",
|
||||
"-----------------------------------------\n",
|
||||
" 92 | 8.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38| [92, 118]\n",
|
||||
" 79 | 8.0 | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38| [79, 105]\n",
|
||||
" 66 | 4.0 | vpaddd %ymm5, %ymm9, %ymm14 #17.9| [66, 80, 93, 107]\n",
|
||||
" 65 | 1.0 | addl $32, %ecx #16.5| [65]\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Iterations: 100\n",
|
||||
"Instructions: 5600\n",
|
||||
"Total Cycles: 3585\n",
|
||||
"Total uOps: 7200\n",
|
||||
"\n",
|
||||
"Dispatch Width: 6\n",
|
||||
"uOps Per Cycle: 2.01\n",
|
||||
"IPC: 1.56\n",
|
||||
"Block RThroughput: 18.0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Instruction Info:\n",
|
||||
"[1]: #uOps\n",
|
||||
"[2]: Latency\n",
|
||||
"[3]: RThroughput\n",
|
||||
"[4]: MayLoad\n",
|
||||
"[5]: MayStore\n",
|
||||
"[6]: HasSideEffects (U)\n",
|
||||
"\n",
|
||||
"[1] [2] [3] [4] [5] [6] Instructions:\n",
|
||||
" 1 1 0.25 addl\t$32, %ecx\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
" 1 1 0.33 vmovaps\t%zmm0, %zmm29\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm11, %zmm13\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
" 1 1 1.00 knotw\t%k0, %k1\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm17, %zmm19\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
" 1 1 1.00 knotw\t%k2, %k3\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm23, %zmm25\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
" 1 1 1.00 knotw\t%k4, %k5\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm29, %zmm31\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
" 1 1 1.00 knotw\t%k6, %k7\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
" 1 1 0.25 cmpl\t%edx, %ecx\n",
|
||||
" 1 1 0.50 jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
"[0] - SKXDivider\n",
|
||||
"[1] - SKXFPDivider\n",
|
||||
"[2] - SKXPort0\n",
|
||||
"[3] - SKXPort1\n",
|
||||
"[4] - SKXPort2\n",
|
||||
"[5] - SKXPort3\n",
|
||||
"[6] - SKXPort4\n",
|
||||
"[7] - SKXPort5\n",
|
||||
"[8] - SKXPort6\n",
|
||||
"[9] - SKXPort7\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Resource pressure per iteration:\n",
|
||||
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] \n",
|
||||
" - - 31.17 5.72 2.00 2.00 - 29.10 2.01 - \n",
|
||||
"\n",
|
||||
"Resource pressure by instruction:\n",
|
||||
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:\n",
|
||||
" - - - 0.80 - - - 0.19 0.01 - addl\t$32, %ecx\n",
|
||||
" - - 0.07 0.92 - - - 0.01 - - vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
" - - 0.42 - - - - 0.58 - - vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
" - - 0.51 - - - - 0.49 - - vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
" - - 0.45 - - - - 0.55 - - vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
" - - - 1.00 - - - - - - vmovaps\t%zmm0, %zmm29\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm11, %zmm13\n",
|
||||
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
" - - 0.49 - - - - 0.51 - - vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k0, %k1\n",
|
||||
" - - 0.44 - - - - 0.56 - - vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
" - - 0.70 - - - - 0.30 - - vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
" - - 0.48 - - - - 0.52 - - vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
" - - 0.42 - - - - 0.58 - - vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
" - - 0.32 - - - - 0.68 - - vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm17, %zmm19\n",
|
||||
" - - 0.32 - 1.00 - - 0.68 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k2, %k3\n",
|
||||
" - - 0.53 - - - - 0.47 - - vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
" - - 0.57 - - - - 0.43 - - vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
" - - 0.52 - - - - 0.48 - - vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm23, %zmm25\n",
|
||||
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
" - - 0.53 - - - - 0.47 - - vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k4, %k5\n",
|
||||
" - - 0.42 - - - - 0.58 - - vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
" - - 0.60 - - - - 0.40 - - vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
" - - 0.26 - - - - 0.74 - - vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
" - - 0.34 - - - - 0.66 - - vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm29, %zmm31\n",
|
||||
" - - 0.34 - 1.00 - - 0.66 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
" - - 0.52 - - - - 0.48 - - vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k6, %k7\n",
|
||||
" - - 0.47 - - - - 0.53 - - vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
" - - 0.66 - - - - 0.34 - - vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
" - - - - - - - - 1.00 - cmpl\t%edx, %ecx\n",
|
||||
" - - - - - - - - 1.00 - jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Timeline view:\n",
|
||||
" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 \n",
|
||||
"Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678\n",
|
||||
"\n",
|
||||
"[0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[0,2] D=eeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[0,3] D========eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[0,4] D============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[0,5] .D===============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[0,6] .DeE------------------R . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[0,7] .D===================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[0,8] . D======================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[0,9] . D======================eeeeE-------R . . . . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[0,10] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[0,11] . D==========================eE----------R . . . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[0,12] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[0,13] . D====================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[0,14] . D========================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[0,15] . DeE-------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[0,16] . DeeeeeeeE-------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[0,17] . D=======eeeeE---------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[0,18] . D==========eeeeE-----------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[0,19] . D==============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[0,20] . D==================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[0,21] . D=====================eeeeeeeeeeeE----------R. . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[0,22] . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[0,23] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[0,24] . D==========================eE---------------R. . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[0,25] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[0,26] . .D===================================eeeeE--R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[0,27] . .D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[0,28] . .DeE------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[0,29] . .DeeeeeeeE------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[0,30] . .D=======eeeeE--------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[0,31] . . D==========eeeeE----------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[0,32] . . D==============eeeeE------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[0,33] . . D==================eeeeE--------------------R . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[0,34] . . D=====================eeeeeeeeeeeE---------R . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[0,35] . . D=====================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[0,36] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[0,37] . . D==========================eE--------------R . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[0,38] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[0,39] . . D===================================eeeeE-R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[0,40] . . D=======================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[0,41] . . DeeeeeeeE------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[0,42] . . DeE------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[0,43] . . D=======eeeeE--------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[0,44] . . D=============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[0,45] . . D=================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[0,46] . . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[0,47] . . .D=========================eeeeeeeeeeeE-----R. . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[0,48] . . .D=========================eeeeE------------R. . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[0,49] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[0,50] . . .D==============================eE----------R. . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[0,51] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[0,52] . . . D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[0,53] . . . D===========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[0,54] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[0,55] . . . D=eE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . jb\t..B1.4\n",
|
||||
"[1,0] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[1,1] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[1,2] . . . D==eeeeeeeE-------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[1,3] . . . D===============eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[1,4] . . . D====================eeeeE----------------------R . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[1,5] . . . D=========================eeeeE-----------------R . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[1,6] . . . DeE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[1,7] . . . D============================eeeeE-------------R . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[1,8] . . . D================================eeeeeeeeeeeE--R . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[1,9] . . . D================================eeeeE---------R . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[1,10] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[1,11] . . . D====================================eE---------R . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[1,12] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[1,13] . . . D==============================================eeeeER . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[1,14] . . . D==================================================eeeeER . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[1,15] . . . DeE-----------------------------------------------------R . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[1,16] . . . .D===eeeeeeeE-------------------------------------------R . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[1,17] . . . .D==============eeeeE-----------------------------------R . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[1,18] . . . .D==================eeeeE-------------------------------R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[1,19] . . . .D======================eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[1,20] . . . . D================================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[1,21] . . . . D====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[1,22] . . . . D=====================================eeeeE-----------R . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[1,23] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[1,24] . . . . D========================================eE----------R . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[1,25] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[1,26] . . . . D==================================================eeeeER. . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[1,27] . . . . D======================================================eeeeER . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[1,28] . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[1,29] . . . . D=================================eeeeeeeE-----------------R . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[1,30] . . . . D========================================eeeeE-------------R . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[1,31] . . . . D===========================================eeeeE---------R . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[1,32] . . . . .D==============================================eeeeE-----R . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[1,33] . . . . . D=================================================eeeeE-R . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[1,34] . . . . . D====================================================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[1,35] . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[1,36] . . . . . D=============================================================eeeeER . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[1,37] . . . . . D======================================================eE----------R . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[1,38] . . . . . .D============================================================eeeeER . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[1,39] . . . . . . D===============================================================eeeeER . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[1,40] . . . . . . D==================================================================eeeeER . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[1,41] . . . . . . D============================eeeeeeeE-----------------------------------R . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[1,42] . . . . . . DeE--------------------------------------------------------------------R . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[1,43] . . . . . . D==================================eeeeE-------------------------------R . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[1,44] . . . . . . D=====================================eeeeE---------------------------R . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[1,45] . . . . . . D===========================================eeeeE---------------------R . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[1,46] . . . . . . D===============================================eeeeE-----------------R . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[1,47] . . . . . . .D==================================================eeeeeeeeeeeE------R . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[1,48] . . . . . . . D=================================================eeeeE-------------R . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[1,49] . . . . . . . D===========================================================eeeeE--R . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[1,50] . . . . . . . D=====================================================eE----------R . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[1,51] . . . . . . . D==========================================================eeeeE-R . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[1,52] . . . . . . . D==============================================================eeeeER . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[1,53] . . . . . . . .D=================================================================eeeeER . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[1,54] . . . . . . . .DeE--------------------------------------------------------------------R . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[1,55] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . jb\t..B1.4\n",
|
||||
"[2,0] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[2,1] . . . . . . . . D=eE------------------------------------------------------------------R . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[2,2] . . . . . . . . D======================eeeeeeeE--------------------------------------R . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[2,3] . . . . . . . . D==============================eeeeE---------------------------------R . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[2,4] . . . . . . . . D===================================eeeeE----------------------------R . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[2,5] . . . . . . . . D========================================eeeeE-----------------------R . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[2,6] . . . . . . . . DeE-----------------------------------------------------------------R . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[2,7] . . . . . . . . D===========================================eeeeE-------------------R . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[2,8] . . . . . . . . D================================================eeeeeeeeeeeE-------R . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[2,9] . . . . . . . . D================================================eeeeE-------------R . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[2,10] . . . . . . . . D==========================================================eeeeE---R . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[2,11] . . . . . . . . .D======================================================eE---------R . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[2,12] . . . . . . . . .D=========================================================eeeeE---R . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[2,13] . . . . . . . . . D============================================================eeeeER . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[2,14] . . . . . . . . . D================================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[2,15] . . . . . . . . . DeE------------------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[2,16] . . . . . . . . . D==================eeeeeeeE-----------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[2,17] . . . . . . . . . D=========================eeeeE-------------------------------------R . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[2,18] . . . . . . . . . D=============================eeeeE--------------------------------R . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[2,19] . . . . . . . . . .D=================================eeeeE---------------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[2,20] . . . . . . . . . . D=====================================eeeeE----------------------R . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[2,21] . . . . . . . . . . D=========================================eeeeeeeeeeeE-----------R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[2,22] . . . . . . . . . . D=========================================eeeeE-----------------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[2,23] . . . . . . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[2,24] . . . . . . . . . . D===============================================eE-------------R . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[2,25] . . . . . . . . . . D=================================================eeeeE-------R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[2,26] . . . . . . . . . . . D===================================================eeeeE---R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[2,27] . . . . . . . . . . . D=======================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[2,28] . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[2,29] . . . . . . . . . . . D============eeeeeeeE--------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[2,30] . . . . . . . . . . . D====================eeeeE---------------------------------R . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[2,31] . . . . . . . . . . . D=========================eeeeE---------------------------R . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[2,32] . . . . . . . . . . . .D=============================eeeeE----------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[2,33] . . . . . . . . . . . . D==================================eeeeE----------------R . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[2,34] . . . . . . . . . . . . D=====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[2,35] . . . . . . . . . . . . D======================================eeeeE-----------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[2,36] . . . . . . . . . . . . D===============================================eeeeE-R . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[2,37] . . . . . . . . . . . . D========================================eE----------R . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[2,38] . . . . . . . . . . . . .D==============================================eeeeER . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[2,39] . . . . . . . . . . . . . D=================================================eeeeER . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[2,40] . . . . . . . . . . . . . D====================================================eeeeER . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[2,41] . . . . . . . . . . . . . D======eeeeeeeE------------------------------------------R . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[2,42] . . . . . . . . . . . . . DeE------------------------------------------------------R . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[2,43] . . . . . . . . . . . . . D===============eeeeE-----------------------------------R . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[2,44] . . . . . . . . . . . . . D========================eeeeE--------------------------R . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[2,45] . . . . . . . . . . . . . D============================eeeeE----------------------R . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[2,46] . . . . . . . . . . . . . .D======================================eeeeE-----------R . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[2,47] . . . . . . . . . . . . . . D=========================================eeeeeeeeeeeER . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[2,48] . . . . . . . . . . . . . . D=========================================eeeeE------R . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[2,49] . . . . . . . . . . . . . . D===================================================eeeeER . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[2,50] . . . . . . . . . . . . . . D============================================eE---------R . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[2,51] . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[2,52] . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[2,53] . . . . . . . . . . . . . . .D========================================================eeeeER . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[2,54] . . . . . . . . . . . . . . . DeE----------------------------------------------------------R . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[2,55] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . jb\t..B1.4\n",
|
||||
"[3,0] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[3,1] . . . . . . . . . . . . . . . DeE--------------------------------------------------------R . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[3,2] . . . . . . . . . . . . . . . D==eeeeeeeE------------------------------------------------R . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[3,3] . . . . . . . . . . . . . . . D=========eeeeE--------------------------------------------R . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[3,4] . . . . . . . . . . . . . . . D================eeeeE-------------------------------------R . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[3,5] . . . . . . . . . . . . . . . D===================eeeeE---------------------------------R . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[3,6] . . . . . . . . . . . . . . . DeE-------------------------------------------------------R . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[3,7] . . . . . . . . . . . . . . . D===================================eeeeE-----------------R . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[3,8] . . . . . . . . . . . . . . . .D======================================eeeeeeeeeeeE------R . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[3,9] . . . . . . . . . . . . . . . .D=======================================eeeeE------------R . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[3,10] . . . . . . . . . . . . . . . .D=================================================eeeeE--R . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[3,11] . . . . . . . . . . . . . . . . D===========================================eE----------R . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[3,12] . . . . . . . . . . . . . . . . D===============================================eeeeE--R . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[3,13] . . . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[3,14] . . . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[3,15] . . . . . . . . . . . . . . . . DeE--------------------------------------------------------R. . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[3,16] . . . . . . . . . . . . . . . . .D===============================eeeeeeeE------------------R. . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[3,17] . . . . . . . . . . . . . . . . .D=======================================eeeeE-------------R. . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[3,18] . . . . . . . . . . . . . . . . .D===========================================eeeeE---------R. . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[3,19] . . . . . . . . . . . . . . . . . D==============================================eeeeE-----R. . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[3,20] . . . . . . . . . . . . . . . . . D==================================================eeeeE-R. . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[3,21] . . . . . . . . . . . . . . . . . D=====================================================eeeeeeeeeeeER. . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[3,22] . . . . . . . . . . . . . . . . . D=====================================================eeeeE------R. . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[3,23] . . . . . . . . . . . . . . . . . D==============================================================eeeeER . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[3,24] . . . . . . . . . . . . . . . . . .D=======================================================eE---------R . . . . knotw\t%k2, %k3\n",
|
||||
"[3,25] . . . . . . . . . . . . . . . . . . D============================================================eeeeER . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[3,26] . . . . . . . . . . . . . . . . . . D================================================================eeeeER . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[3,27] . . . . . . . . . . . . . . . . . . D===================================================================eeeeER . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[3,28] . . . . . . . . . . . . . . . . . . DeE----------------------------------------------------------------------R . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[3,29] . . . . . . . . . . . . . . . . . . D===========================eeeeeeeE------------------------------------R . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[3,30] . . . . . . . . . . . . . . . . . . D==================================eeeeE--------------------------------R . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[3,31] . . . . . . . . . . . . . . . . . . D======================================eeeeE----------------------------R . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[3,32] . . . . . . . . . . . . . . . . . . D=========================================eeeeE------------------------R . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[3,33] . . . . . . . . . . . . . . . . . . D=============================================eeeeE--------------------R . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[3,34] . . . . . . . . . . . . . . . . . . .D================================================eeeeeeeeeeeE---------R . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[3,35] . . . . . . . . . . . . . . . . . . .D=================================================eeeeE---------------R . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[3,36] . . . . . . . . . . . . . . . . . . . D==========================================================eeeeE-----R . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[3,37] . . . . . . . . . . . . . . . . . . . D====================================================eE-------------R . . knotw\t%k4, %k5\n",
|
||||
"[3,38] . . . . . . . . . . . . . . . . . . . D========================================================eeeeE-----R . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[3,39] . . . . . . . . . . . . . . . . . . . D============================================================eeeeE-R . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[3,40] . . . . . . . . . . . . . . . . . . . D===============================================================eeeeER. . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[3,41] . . . . . . . . . . . . . . . . . . . D======================eeeeeeeE--------------------------------------R. . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[3,42] . . . . . . . . . . . . . . . . . . . .DeE-----------------------------------------------------------------R. . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[3,43] . . . . . . . . . . . . . . . . . . . .D============================eeeeE----------------------------------R. . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[3,44] . . . . . . . . . . . . . . . . . . . . D===============================eeeeE------------------------------R. . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[3,45] . . . . . . . . . . . . . . . . . . . . D=====================================eeeeE------------------------R. . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[3,46] . . . . . . . . . . . . . . . . . . . . D=========================================eeeeE--------------------R. . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[3,47] . . . . . . . . . . . . . . . . . . . . D============================================eeeeeeeeeeeE---------R. . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[3,48] . . . . . . . . . . . . . . . . . . . . D===========================================eeeeE----------------R. . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[3,49] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE-----R. . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[3,50] . . . . . . . . . . . . . . . . . . . . D==============================================eE---------------R. . knotw\t%k6, %k7\n",
|
||||
"[3,51] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE----R. . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[3,52] . . . . . . . . . . . . . . . . . . . . .D=========================================================eeeeER. . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[3,53] . . . . . . . . . . . . . . . . . . . . . D============================================================eeeeER vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[3,54] . . . . . . . . . . . . . . . . . . . . . DeE--------------------------------------------------------------R cmpl\t%edx, %ecx\n",
|
||||
"[3,55] . . . . . . . . . . . . . . . . . . . . . DeE-------------------------------------------------------------R jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Average Wait times (based on the timeline view):\n",
|
||||
"[0]: Executions\n",
|
||||
"[1]: Average time spent waiting in a scheduler's queue\n",
|
||||
"[2]: Average time spent waiting in a scheduler's queue while ready\n",
|
||||
"[3]: Average time elapsed from WB until retire stage\n",
|
||||
"\n",
|
||||
" [0] [1] [2] [3]\n",
|
||||
"0. 4 1.0 1.0 42.5 addl\t$32, %ecx\n",
|
||||
"1. 4 1.3 1.3 42.0 vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"2. 4 7.8 7.8 30.8 vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"3. 4 16.5 1.8 26.0 vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"4. 4 21.8 1.3 21.8 vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"5. 4 25.8 0.5 18.3 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"6. 4 1.0 1.0 45.8 vmovaps\t%zmm0, %zmm29\n",
|
||||
"7. 4 32.3 3.0 12.3 vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"8. 4 36.0 0.3 3.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"9. 4 36.3 0.8 10.3 vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"10. 4 46.5 0.0 1.3 vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"11. 4 40.8 1.3 9.5 knotw\t%k0, %k1\n",
|
||||
"12. 4 45.8 0.0 1.3 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"13. 4 49.0 0.0 0.0 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"14. 4 52.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"15. 4 1.0 1.0 54.5 vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"16. 4 14.0 14.0 34.8 vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"17. 4 22.3 1.3 29.5 vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"18. 4 26.0 0.3 25.3 vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"19. 4 29.8 0.3 21.0 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"20. 4 35.3 2.0 15.0 vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"21. 4 38.8 0.0 6.5 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"22. 4 39.3 1.0 12.5 vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"23. 4 48.8 0.0 3.5 vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"24. 4 43.0 0.8 11.8 knotw\t%k2, %k3\n",
|
||||
"25. 4 47.8 0.0 3.5 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"26. 4 51.0 0.0 1.3 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"27. 4 54.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"28. 4 1.0 1.0 56.5 vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"29. 4 19.0 19.0 31.8 vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"30. 4 26.3 0.3 27.5 vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"31. 4 30.0 0.5 23.0 vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"32. 4 33.5 0.3 18.8 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"33. 4 37.5 0.5 14.3 vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"34. 4 40.5 0.0 5.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"35. 4 40.8 0.5 12.3 vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"36. 4 50.5 0.0 2.8 vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"37. 4 44.0 0.5 11.8 knotw\t%k4, %k5\n",
|
||||
"38. 4 49.5 0.3 2.5 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"39. 4 52.8 0.0 0.5 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"40. 4 56.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"41. 4 15.0 15.0 37.8 vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"42. 4 1.0 1.0 57.3 vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"43. 4 22.0 0.8 33.0 vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"44. 4 27.3 2.0 27.0 vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"45. 4 32.3 1.0 22.0 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"46. 4 38.0 2.0 16.0 vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"47. 4 41.0 0.0 5.0 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"48. 4 40.5 0.3 11.8 vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"49. 4 51.0 0.0 2.0 vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"50. 4 44.3 0.8 11.0 knotw\t%k6, %k7\n",
|
||||
"51. 4 50.5 0.5 1.5 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"52. 4 53.8 0.0 0.0 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"53. 4 57.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"54. 4 1.0 1.0 58.5 cmpl\t%edx, %ecx\n",
|
||||
"55. 4 1.3 0.0 57.5 jb\t..B1.4\n",
|
||||
" 4 32.5 1.6 18.4 <total>\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-30;16:57:45\n",
|
||||
"Analyzed File - build/SKX/icc/O3/pi.marked.o\n",
|
||||
"Binary Format - 64Bit\n",
|
||||
"Architecture - SKX\n",
|
||||
"Analysis Type - Throughput\n",
|
||||
"\n",
|
||||
"Throughput Analysis Report\n",
|
||||
"--------------------------\n",
|
||||
"Block Throughput: 31.50 Cycles Throughput Bottleneck: Backend\n",
|
||||
"Loop Count: 103\n",
|
||||
"Port Binding In Cycles Per Iteration:\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"| Cycles | 30.0 0.0 | 4.0 | 2.0 2.0 | 2.0 2.0 | 0.0 | 30.0 | 1.0 | 0.0 |\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"\n",
|
||||
"DV - Divider pipe (on port 0)\n",
|
||||
"D - Data fetch pipe (on ports 2 and 3)\n",
|
||||
"F - Macro Fusion with the previous instruction occurred\n",
|
||||
"* - instruction micro-ops not bound to a port\n",
|
||||
"^ - Micro Fusion occurred\n",
|
||||
"# - ESP Tracking sync uop was issued\n",
|
||||
"@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected\n",
|
||||
"X - instruction not supported, was not accounted in Analysis\n",
|
||||
"\n",
|
||||
"| Num Of | Ports pressure in cycles | |\n",
|
||||
"| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
|
||||
"-----------------------------------------------------------------------------------------\n",
|
||||
"| 1 | | | | | | | 1.0 | | add ecx, 0x20\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm14, ymm9, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm8, ymm9\n",
|
||||
"| 1 | | | | | | 1.0 | | | vaddpd zmm10, zmm1, zmm8\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm11, zmm2, zmm10\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm11, zmm11, zmm0\n",
|
||||
"| 1* | | | | | | | | | vmovaps zmm29, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm13, zmm11\n",
|
||||
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm11, zmm13, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k0, zmm13, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm11, zmm11\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k1, k0\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm13{k1}, zmm11, zmm13\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm13{k1}, zmm12, zmm13\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm13, zmm4\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm20, ymm14, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm15, ymm14\n",
|
||||
"| 1 | 1.0 | | | | | | | | vaddpd zmm16, zmm1, zmm15\n",
|
||||
"| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm2, zmm16\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm17, zmm17, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm19, zmm17\n",
|
||||
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm17, zmm19, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k2, zmm19, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm17, zmm17\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k3, k2\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm17, zmm19\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm18, zmm19\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm4\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm26, ymm20, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm21, ymm20\n",
|
||||
"| 1 | | | | | | 1.0 | | | vaddpd zmm22, zmm1, zmm21\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm23, zmm2, zmm22\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm23, zmm23, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm25, zmm23\n",
|
||||
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm23, zmm25, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k4, zmm25, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm24, zmm23, zmm23\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k5, k4\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm25{k5}, zmm23, zmm25\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm25{k5}, zmm24, zmm25\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm25, zmm4\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm27, ymm26\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm9, ymm26, ymm5\n",
|
||||
"| 1 | 1.0 | | | | | | | | vaddpd zmm28, zmm1, zmm27\n",
|
||||
"| 1 | | | | | | 1.0 | | | vmulpd zmm8, zmm2, zmm28\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm29, zmm8, zmm8\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm31, zmm29\n",
|
||||
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm29, zmm31, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k6, zmm31, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm30, zmm29, zmm29\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k7, k6\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm29, zmm31\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm30, zmm31\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm31, zmm4\n",
|
||||
"| 1* | | | | | | | | | cmp ecx, edx\n",
|
||||
"| 0*F | | | | | | | | | jb 0xfffffffffffffeb3\n",
|
||||
"Total Num Of Uops: 71\n",
|
||||
"Analysis Notes:\n",
|
||||
"Backend allocation was stalled due to unavailable allocation resources.\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -307,7 +973,8 @@
|
||||
" for l in r['analyzed kernel']\n",
|
||||
" if l['instruction']]))\n",
|
||||
"for a in archs:\n",
|
||||
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')"
|
||||
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')\n",
|
||||
"get_info((\"SKX\", \"icc\", \"O3\", \"pi\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -343,7 +1010,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"hideCode": false,
|
||||
"hidePrompt": false,
|
||||
|
||||
Reference in New Issue
Block a user