Compare commits

...

15 Commits

Author SHA1 Message Date
JanLJL
bce837dec9 version bump 2021-06-01 00:13:38 +02:00
JanLJL
090c24ade1 fixed parsing of reg ranges and lists 2021-06-01 00:10:05 +02:00
JanLJL
03a2a1da33 version bump 2021-05-10 12:56:35 +02:00
JanLJL
d59b100fa8 changed immediate type from str to int 2021-05-10 01:12:30 +02:00
JanLJL
5c741a8a2d version bump 2021-05-05 11:16:43 +02:00
JanLJL
2f4849f44e added tests for timeout in LCD analyis 2021-05-02 22:48:22 +02:00
JanLJL
f13a97e5b5 fixed bug in case of no uarch in CLI 2021-05-02 22:39:07 +02:00
JanLJL
66282b0eef fix #73 2021-05-02 22:22:30 +02:00
Julian Hammer
9ec7c161ab added missing testfile for sve instructions 2021-05-02 21:44:17 +02:00
Julian Hammer
8d8eaa8e4f addd LD2 and ST2 instructions to a64fx 2021-04-23 13:33:32 +02:00
Julian Hammer
88d5094bf1 Merge branch 'master' of github.com:RRZE-HPC/OSACA 2021-04-23 13:18:23 +02:00
Julian Hammer
1f32252f91 improved register range and list support on AArch64 2021-04-23 13:12:18 +02:00
JanLJL
1de644cd62 fixed incompatibilty to py3.6 2021-04-20 13:59:56 +02:00
JanLJL
3d1c6aae8d set min requirement to py3.6 2021-04-20 13:59:32 +02:00
JanLJL
dafec70e6e added wheel to pypi publishing 2021-04-19 11:33:29 +02:00
17 changed files with 1223 additions and 105 deletions

View File

@@ -31,11 +31,11 @@ jobs:
- uses: codecov/codecov-action@v1
- name: Build package
run: |
python setup.py build sdist
python setup.py build sdist bdist_wheel
- name: Publish to PyPI
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
skip_existing: true
user: __token__
password: ${{ secrets.pypi_password }}
password: ${{ secrets.pypi_password }}

View File

@@ -1,6 +1,6 @@
"""Open Source Architecture Code Analyzer"""
name = "osaca"
__version__ = "0.4.1"
__version__ = "0.4.4"
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___

View File

@@ -1132,6 +1132,27 @@ instruction_forms:
throughput: 2.0
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
- name: ld2d
operands:
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: p
predication: '*'
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
port_pressure: [[2, '56'], [4, ['5D', '6D']]]
- name: ldp
operands:
- class: register
@@ -1414,6 +1435,22 @@ instruction_forms:
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ld2
operands:
- class: register
prefix: v
- class: register
prefix: v
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 1.0
latency: 11.0 # 1*p56+2*p5D6D
port_pressure: [[1, '56'], [2, ['5D','6D']]]
- name: lsl
operands:
- class: register
@@ -1980,6 +2017,43 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p5+1*p6+1*p0
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
- name: st2d
operands:
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: p
predication: '*'
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 1*p5+1*p6+1*p0
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
- name: st2
operands:
- class: register
prefix: v
- class: register
prefix: v
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 1.0
latency: 11.0 # 1*p56+2*p5D6D
port_pressure: [[1, '5'], [1, ['6']], [1, '0']]
- name: sub
operands:
- class: register

View File

@@ -351,9 +351,9 @@ class Frontend(object):
lcd_text = (
"-------------------------------- WARNING: LCD analysis timed out "
"-------------------------------\n While searching for all dependency chains"
" the analysis timed out.\n Decrease the number of instructions or set the "
"timeout threshold with --lcd-timeout.\n See --help for more "
"information.\n" + dashed_line
" the analysis timed out and might be\n incomplete. Decrease the number of "
"instructions or set the timeout threshold\n with --lcd-timeout. See --help"
" for more information.\n" + dashed_line
)
warnings = "\n"
warnings += lcd_text if lcd_warning else ""

View File

@@ -183,7 +183,7 @@ def check_arguments(args, parser):
supported_import_files = ["ibench", "asmbench"]
# manually set CLX to CSX to support both abbreviations
if args.arch.upper() == "CLX":
if args.arch and args.arch.upper() == "CLX":
args.arch = "CSX"
if args.arch is None and (args.check_db or "import_data" in args):
parser.error(

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python3
from copy import deepcopy
import pyparsing as pp
from osaca.parser import AttrDict, BaseParser
@@ -257,7 +256,9 @@ class ParserAArch64(BaseParser):
# 2. Parse label
if result is None:
try:
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
result = self.process_operand(
self.label.parseString(line, parseAll=True).asDict()
)
result = AttrDict.convert_dict(result)
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
if self.COMMENT_ID in result[self.LABEL_ID]:
@@ -292,7 +293,6 @@ class ParserAArch64(BaseParser):
try:
result = self.parse_instruction(line)
except (pp.ParseException, KeyError) as e:
raise e
raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
@@ -313,19 +313,24 @@ class ParserAArch64(BaseParser):
# Add operands to list
# Check first operand
if "operand1" in result:
operands.append(self.process_operand(result["operand1"]))
operand = self.process_operand(result["operand1"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check second operand
if "operand2" in result:
operands.append(self.process_operand(result["operand2"]))
operand = self.process_operand(result["operand2"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check third operand
if "operand3" in result:
operands.append(self.process_operand(result["operand3"]))
operand = self.process_operand(result["operand3"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check fourth operand
if "operand4" in result:
operands.append(self.process_operand(result["operand4"]))
operand = self.process_operand(result["operand4"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
# Check fifth operand
if "operand5" in result:
operands.append(self.process_operand(result["operand5"]))
operand = self.process_operand(result["operand5"])
operands.extend(operand) if isinstance(operand, list) else operands.append(operand)
return_dict = AttrDict(
{
@@ -347,8 +352,8 @@ class ParserAArch64(BaseParser):
if self.REGISTER_ID in operand and (
"list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID]
):
# TODO: discuss if ranges should be converted to lists
return self.process_register_list(operand[self.REGISTER_ID])
# resolve ranges and lists
return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID]))
if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"] == "sp":
return self.process_sp_register(operand[self.REGISTER_ID])
# add value attribute to floating point immediates without exponent
@@ -366,6 +371,8 @@ class ParserAArch64(BaseParser):
offset = memory_address.get("offset", None)
if isinstance(offset, list) and len(offset) == 1:
offset = offset[0]
if offset is not None and "value" in offset:
offset["value"] = int(offset["value"], 0)
base = memory_address.get("base", None)
index = memory_address.get("index", None)
scale = 1
@@ -382,7 +389,12 @@ class ParserAArch64(BaseParser):
if "pre_indexed" in memory_address:
new_dict["pre_indexed"] = True
if "post_indexed" in memory_address:
new_dict["post_indexed"] = memory_address["post_indexed"]
if "value" in memory_address["post_indexed"]:
new_dict["post_indexed"] = {"value": int(
memory_address["post_indexed"]["value"], 0
)}
else:
new_dict["post_indexed"] = memory_address["post_indexed"]
return AttrDict({self.MEMORY_ID: new_dict})
def process_sp_register(self, register):
@@ -391,6 +403,37 @@ class ParserAArch64(BaseParser):
reg["prefix"] = "x"
return AttrDict({self.REGISTER_ID: reg})
def resolve_range_list(self, operand):
"""
Resolve range or list register operand to list of registers.
Returns None if neither list nor range
"""
if 'register' in operand:
if 'list' in operand.register:
index = operand.register.get('index')
range_list = []
for reg in operand.register.list:
reg = deepcopy(reg)
if index is not None:
reg['index'] = int(index, 0)
range_list.append(AttrDict({self.REGISTER_ID: reg}))
return range_list
elif 'range' in operand.register:
base_register = operand.register.range[0]
index = operand.register.get('index')
range_list = []
start_name = base_register.name
end_name = operand.register.range[1].name
for name in range(int(start_name), int(end_name) + 1):
reg = deepcopy(base_register)
if index is not None:
reg['index'] = int(index, 0)
reg['name'] = str(name)
range_list.append(AttrDict({self.REGISTER_ID: reg}))
return range_list
# neither register list nor range, return unmodified
return operand
def process_register_list(self, register_list):
"""Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})"""
# Remove unnecessarily created dictionary entries during parsing
@@ -419,11 +462,13 @@ class ParserAArch64(BaseParser):
if "value" in immediate:
# normal integer value
immediate["type"] = "int"
# convert hex/bin immediates to dec
immediate["value"] = self.normalize_imd(immediate)
return AttrDict({self.IMMEDIATE_ID: immediate})
if "base_immediate" in immediate:
# arithmetic immediate, add calculated value as value
immediate["shift"] = immediate["shift"][0]
immediate["value"] = int(immediate["base_immediate"]["value"], 0) << int(
immediate["value"] = self.normalize_imd(immediate["base_immediate"]) << int(
immediate["shift"]["value"]
)
immediate["type"] = "int"
@@ -471,10 +516,11 @@ class ParserAArch64(BaseParser):
def normalize_imd(self, imd):
"""Normalize immediate to decimal based representation"""
if "value" in imd:
if imd["value"].lower().startswith("0x"):
# hex, return decimal
return int(imd["value"], 16)
return int(imd["value"], 10)
if isinstance(imd["value"], str):
# hex or bin, return decimal
return int(imd["value"], 0)
else:
return imd["value"]
elif "float" in imd:
return self.ieee_to_float(imd["float"])
elif "double" in imd:

View File

@@ -108,7 +108,8 @@ class ParserX86ATT(BaseParser):
)
)
memory_segmentation = (
self.register.setResultsName("base")
pp.Optional(pp.Suppress(pp.Literal("*")))
+ self.register.setResultsName("base")
+ pp.Literal(":")
+ segment_extension.setResultsName(self.SEGMENT_EXT_ID)
)
@@ -326,9 +327,14 @@ class ParserX86ATT(BaseParser):
offset = memory_address.get("offset", None)
base = memory_address.get("base", None)
index = memory_address.get("index", None)
scale = 1 if "scale" not in memory_address else int(memory_address["scale"])
scale = 1 if "scale" not in memory_address else int(memory_address["scale"], 0)
if isinstance(offset, str) and base is None and index is None:
offset = {"value": offset}
try:
offset = {"value": int(offset, 0)}
except ValueError:
offset = {"value": offset}
elif offset is not None and "value" in offset:
offset["value"] = int(offset["value"], 0)
new_dict = AttrDict({"offset": offset, "base": base, "index": index, "scale": scale})
# Add segmentation extension if existing
if self.SEGMENT_EXT_ID in memory_address:
@@ -346,7 +352,8 @@ class ParserX86ATT(BaseParser):
if "identifier" in immediate:
# actually an identifier, change declaration
return immediate
# otherwise nothing to do
# otherwise just make sure the immediate is a decimal
immediate["value"] = int(immediate["value"], 0)
return AttrDict({self.IMMEDIATE_ID: immediate})
def get_full_reg_name(self, register):
@@ -357,10 +364,11 @@ class ParserX86ATT(BaseParser):
def normalize_imd(self, imd):
"""Normalize immediate to decimal based representation"""
if "value" in imd:
if imd["value"].lower().startswith("0x"):
# hex, return decimal
return int(imd["value"], 16)
return int(imd["value"], 10)
if isinstance(imd["value"], str):
# return decimal
return int(imd["value"], 0)
else:
return imd["value"]
# identifier
return imd

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python3
from itertools import chain
from copy import deepcopy
from osaca import utils
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
@@ -122,6 +123,7 @@ class ISASemantics(object):
"pre_indexed": pre_indexed,
"post_indexed": post_indexed})
)
# store operand list in dict and reassign operand key/value pair
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
# assign LD/ST flags
@@ -130,6 +132,7 @@ class ISASemantics(object):
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
if self._has_store(instruction_form):
instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
def get_reg_changes(self, instruction_form, only_postindexed=False):
"""
@@ -160,16 +163,16 @@ class ISASemantics(object):
if only_postindexed:
for o in instruction_form.operands:
if 'post_indexed' in o.get('memory', {}):
base_name = o.memory.base.get('prefix', '')+o.memory.base.name
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
return {base_name: {
'name': o.memory.base.get('prefix', '')+o.memory.base.name,
'value': int(o.memory.post_indexed.value)
'name': o.memory.base.get('prefix', '') + o.memory.base.name,
'value': o.memory.post_indexed.value
}}
return {}
reg_operand_names = {} # e.g., {'rax': 'op1'}
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
for o in instruction_form.operands:
if 'pre_indexed' in o.get('memory', {}):
# Assuming no isa_data.operation
@@ -177,24 +180,24 @@ class ISASemantics(object):
raise ValueError(
"ISA information for pre-indexed instruction {!r} has operation set."
"This is currently not supprted.".format(instruction_form.line))
base_name = o.memory.base.get('prefix', '')+o.memory.base.name
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
reg_operand_names = {base_name: 'op1'}
operand_state = {'op1': {
'name': base_name,
'value': int(o.memory.offset.value)
'value': o.memory.offset.value
}}
if isa_data is not None and 'operation' in isa_data:
for i, o in enumerate(instruction_form.operands):
operand_name = "op{}".format(i+1)
operand_name = "op{}".format(i + 1)
if "register" in o:
o_reg_name = o["register"].get('prefix', '')+o["register"]["name"]
o_reg_name = o["register"].get('prefix', '') + o["register"]["name"]
reg_operand_names[o_reg_name] = operand_name
operand_state[operand_name] = {
'name': o_reg_name,
'value': 0}
elif "immediate" in o:
operand_state[operand_name] = {'value': int(o["immediate"]["value"])}
operand_state[operand_name] = {'value': o["immediate"]["value"]}
elif "memory" in o:
# TODO lea needs some thinking about
pass
@@ -209,7 +212,7 @@ class ISASemantics(object):
"""
Create operand dictionary containing src/dst operands out of the ISA data entry and
the oeprands of an instruction form
If breaks_pedendency_on_equal_operands is True (configuted per instruction in ISA db)
and all operands are equal, place operand into destination only.

View File

@@ -1,6 +1,8 @@
#!/usr/bin/env python3
import copy
import os
import signal
import time
from itertools import chain
from multiprocessing import Manager, Process, cpu_count
@@ -140,7 +142,9 @@ class KernelDG(nx.DiGraph):
# terminate running processes
for p in processes:
if p.is_alive():
p.kill()
# Python 3.6 does not support Process.kill().
# Can be changed to `p.kill()` after EoL (01/22) of Py3.6
os.kill(p.pid, signal.SIGKILL)
p.join()
all_paths = list(all_paths)
else:
@@ -378,9 +382,9 @@ class KernelDG(nx.DiGraph):
# determine absolute address change
addr_change = 0
if src.offset and "value" in src.offset:
addr_change += int(src.offset.value, 0)
addr_change += src.offset.value
if mem.offset:
addr_change -= int(mem.offset.value, 0)
addr_change -= mem.offset.value
if mem.base and src.base:
base_change = register_changes.get(
src.base.get('prefix', '') + src.base.name,

View File

@@ -91,7 +91,6 @@ setup(
# Specify the Python versions you support here. In particular, ensure
# that you indicate wheter you support Python2, Python 3 or both.
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
@@ -107,7 +106,7 @@ setup(
# requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=["networkx", "pyparsing>=2.3.1", "ruamel.yaml>=0.15.71"],
python_requires=">=3.5",
python_requires=">=3.6",
# List additional groups of dependencies here (e.g. development
# dependencies). You can install these using the following syntax,
# for example:

View File

@@ -183,14 +183,38 @@ class TestCLI(unittest.TestCase):
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for length
self.assertTrue(output.getvalue().count("WARNING") == 1)
self.assertTrue(
output.getvalue().count(
"WARNING: You are analyzing a large amount of instruction forms"
)
== 1
)
# WARNING for arch
args = parser.parse_args(
["--lines", "100-199", "--ignore-unknown", self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
# WARNING for arch
self.assertTrue(output.getvalue().count("WARNING") == 1)
self.assertTrue(
output.getvalue().count("WARNING: No micro-architecture was specified") == 1
)
# WARNING for timeout
args = parser.parse_args(
["--ignore-unknown", "--lcd-timeout", "0", self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
self.assertTrue(
output.getvalue().count("WARNING: LCD analysis timed out") == 1
)
args = parser.parse_args(
["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
)
output = StringIO()
osaca.run(args, output_file=output)
self.assertTrue(
output.getvalue().count("WARNING: LCD analysis timed out") == 0
)
def test_lines_arg(self):
# Run tests with --lines option

View File

@@ -0,0 +1,32 @@
// OSACA-BEGIN
.L5:
add x10, x1, x11
add x6, x1, x8
ld2d {z0.d - z1.d}, p1/z, [x10]
ld2d {z2.d - z3.d}, p1/z, [x6]
mov z5.d, z1.d
fadd z20.d, z3.d, z3.d
mov z1.d, z0.d
add x6, x1, x7
fadd z2.d, z2.d, z2.d
ld2d {z6.d - z7.d}, p1/z, [x6]
fmul z4.d, z5.d, z20.d
add x10, x1, x12
mov z0.d, z7.d
ld2d {z16.d - z17.d}, p1/z, [x10]
mov z3.d, z4.d
fmls z3.d, p0/m, z0.d, z17.d
fmul z0.d, z0.d, z16.d
fmla z3.d, p0/m, z6.d, z16.d
fmla z0.d, p0/m, z6.d, z17.d
fmls z3.d, p0/m, z1.d, z2.d
fmls z0.d, p0/m, z1.d, z20.d
mov z18.d, z3.d
fmsb z5.d, p0/m, z2.d, z0.d
mov z19.d, z5.d
st2d {z18.d - z19.d}, p1, [x6]
add x5, x5, 8
add x1, x1, 128
whilelo p1.d, x5, x9
bne .L5
// OSACA-END

View File

@@ -0,0 +1,192 @@
# OSACA-BEGIN
push %r12
push %r13
push %r14
push %r15
push %rbp
mov %ecx,%r12d
mov %esi,%r14d
mov %r12d,%ecx
mov %r14d,%esi
mov %rdx,%r13
mov %rdi,%rbp
callq 0x4210d0
mov %rdx,%r8
movzbl (%rdi),%r9d
movslq %esi,%rsi
movslq %ecx,%rcx
movzbl (%r8),%r10d
vmovd %r9d,%xmm13
movzbl 0x4(%r8),%r9d
vpinsrb $0x1,(%rsi,%rdi,1),%xmm13,%xmm14
lea (%rsi,%rsi,2),%rdx
vmovd %r10d,%xmm1
vpinsrb $0x1,(%rcx,%r8,1),%xmm1,%xmm0
vmovd %r9d,%xmm7
vpinsrb $0x1,0x4(%rcx,%r8,1),%xmm7,%xmm5
vpinsrb $0x2,(%rdi,%rsi,2),%xmm14,%xmm15
vpinsrb $0x2,(%r8,%rcx,2),%xmm0,%xmm6
vpinsrb $0x2,0x4(%r8,%rcx,2),%xmm5,%xmm9
vpinsrb $0x3,(%rdx,%rdi,1),%xmm15,%xmm4
movzbl 0x4(%rdi),%r11d
lea (%rcx,%rcx,2),%rax
vpinsrb $0x3,(%rax,%r8,1),%xmm6,%xmm10
vpinsrb $0x3,0x4(%rax,%r8,1),%xmm9,%xmm11
vmovd %r11d,%xmm2
vpinsrb $0x1,0x4(%rsi,%rdi,1),%xmm2,%xmm8
vpinsrb $0x2,0x4(%rdi,%rsi,2),%xmm8,%xmm3
movzbl 0x1(%rdi),%r10d
movzbl 0x5(%rdi),%r9d
movzbl 0x1(%r8),%r11d
vmovd %r10d,%xmm1
movzbl 0x5(%r8),%r10d
vmovd %r9d,%xmm7
vpmovzxbd %xmm4,%xmm4
vmovd %r11d,%xmm2
vpmovzxbd %xmm10,%xmm10
vpinsrb $0x3,0x4(%rdx,%rdi,1),%xmm3,%xmm12
vpsubd %xmm10,%xmm4,%xmm14
vpinsrb $0x1,0x5(%rsi,%rdi,1),%xmm7,%xmm5
vmovd %r10d,%xmm4
vpinsrb $0x1,0x5(%rcx,%r8,1),%xmm4,%xmm10
vpinsrb $0x1,0x1(%rcx,%r8,1),%xmm2,%xmm8
vpinsrb $0x1,0x1(%rsi,%rdi,1),%xmm1,%xmm0
vpinsrb $0x2,0x5(%rdi,%rsi,2),%xmm5,%xmm9
vpinsrb $0x2,0x1(%r8,%rcx,2),%xmm8,%xmm3
vpinsrb $0x2,0x1(%rdi,%rsi,2),%xmm0,%xmm6
vpmovzxbd %xmm12,%xmm12
vpmovzxbd %xmm11,%xmm11
vpsubd %xmm11,%xmm12,%xmm13
vpinsrb $0x2,0x5(%r8,%rcx,2),%xmm10,%xmm11
vpslld $0x10,%xmm13,%xmm15
vpinsrb $0x3,0x1(%rdx,%rdi,1),%xmm6,%xmm13
vpaddd %xmm15,%xmm14,%xmm12
vpinsrb $0x3,0x5(%rdx,%rdi,1),%xmm9,%xmm15
vpinsrb $0x3,0x1(%rax,%r8,1),%xmm3,%xmm14
vpinsrb $0x3,0x5(%rax,%r8,1),%xmm11,%xmm1
movzbl 0x2(%rdi),%r11d
movzbl 0x2(%r8),%r9d
vpmovzxbd %xmm15,%xmm15
vmovd %r11d,%xmm8
vmovd %r9d,%xmm5
vpinsrb $0x1,0x2(%rsi,%rdi,1),%xmm8,%xmm3
vpinsrb $0x1,0x2(%rcx,%r8,1),%xmm5,%xmm9
vpinsrb $0x2,0x2(%rdi,%rsi,2),%xmm3,%xmm7
vpinsrb $0x2,0x2(%r8,%rcx,2),%xmm9,%xmm4
vpinsrb $0x3,0x2(%rdx,%rdi,1),%xmm7,%xmm3
vpinsrb $0x3,0x2(%rax,%r8,1),%xmm4,%xmm7
vpmovzxbd %xmm1,%xmm1
movzbl 0x6(%r8),%r11d
vpsubd %xmm1,%xmm15,%xmm0
vpmovzxbd %xmm13,%xmm13
vpslld $0x10,%xmm0,%xmm2
vpmovzxbd %xmm14,%xmm14
vpsubd %xmm14,%xmm13,%xmm6
vpaddd %xmm2,%xmm6,%xmm11
vmovd %r11d,%xmm6
vpinsrb $0x1,0x6(%rcx,%r8,1),%xmm6,%xmm2
movzbl 0x6(%rdi),%r10d
vpinsrb $0x2,0x6(%r8,%rcx,2),%xmm2,%xmm8
vmovd %r10d,%xmm10
vpinsrb $0x1,0x6(%rsi,%rdi,1),%xmm10,%xmm1
vpinsrb $0x3,0x6(%rax,%r8,1),%xmm8,%xmm9
vpinsrb $0x2,0x6(%rdi,%rsi,2),%xmm1,%xmm0
movzbl 0x3(%rdi),%r9d
movzbl 0x7(%rdi),%r11d
vpmovzxbd %xmm3,%xmm3
vpmovzxbd %xmm7,%xmm7
vmovd %r9d,%xmm14
vmovd %r11d,%xmm8
vpsubd %xmm7,%xmm3,%xmm10
vpinsrb $0x1,0x3(%rsi,%rdi,1),%xmm14,%xmm15
vpinsrb $0x1,0x7(%rsi,%rdi,1),%xmm8,%xmm3
vpinsrb $0x3,0x6(%rdx,%rdi,1),%xmm0,%xmm5
vpinsrb $0x2,0x3(%rdi,%rsi,2),%xmm15,%xmm1
vpinsrb $0x2,0x7(%rdi,%rsi,2),%xmm3,%xmm7
vpaddd %xmm11,%xmm12,%xmm3
vpmovzxbd %xmm5,%xmm5
vpmovzxbd %xmm9,%xmm9
vpsubd %xmm9,%xmm5,%xmm4
vpslld $0x10,%xmm4,%xmm13
vpinsrb $0x3,0x7(%rdx,%rdi,1),%xmm7,%xmm15
vpaddd %xmm13,%xmm10,%xmm10
vpinsrb $0x3,0x3(%rdx,%rdi,1),%xmm1,%xmm13
movzbl 0x7(%r8),%edx
movzbl 0x3(%r8),%r10d
vpmovzxbd %xmm15,%xmm15
vmovd %edx,%xmm5
vpinsrb $0x1,0x7(%rcx,%r8,1),%xmm5,%xmm9
vmovd %r10d,%xmm0
vpinsrb $0x1,0x3(%rcx,%r8,1),%xmm0,%xmm6
vpinsrb $0x2,0x7(%r8,%rcx,2),%xmm9,%xmm4
vpinsrb $0x2,0x3(%r8,%rcx,2),%xmm6,%xmm2
vpinsrb $0x3,0x7(%rax,%r8,1),%xmm4,%xmm1
vpinsrb $0x3,0x3(%rax,%r8,1),%xmm2,%xmm14
vpmovzxbd %xmm1,%xmm1
vpmovzxbd %xmm13,%xmm13
vpsubd %xmm1,%xmm15,%xmm0
vpmovzxbd %xmm14,%xmm14
vpslld $0x10,%xmm0,%xmm2
vpsubd %xmm14,%xmm13,%xmm6
vpsubd %xmm11,%xmm12,%xmm1
vpaddd %xmm2,%xmm6,%xmm8
vpaddd %xmm8,%xmm10,%xmm12
vpsubd %xmm8,%xmm10,%xmm0
vpaddd %xmm12,%xmm3,%xmm8
vpaddd %xmm0,%xmm1,%xmm7
vpsubd %xmm12,%xmm3,%xmm3
vpsubd %xmm0,%xmm1,%xmm5
vunpcklps %xmm7,%xmm8,%xmm6
vunpcklps %xmm5,%xmm3,%xmm2
vunpckhps %xmm7,%xmm8,%xmm9
vunpckhps %xmm5,%xmm3,%xmm4
vunpcklpd %xmm2,%xmm6,%xmm10
vunpckhpd %xmm2,%xmm6,%xmm11
vunpcklpd %xmm4,%xmm9,%xmm12
vpaddd %xmm11,%xmm10,%xmm14
vunpckhpd %xmm4,%xmm9,%xmm13
vpsubd %xmm11,%xmm10,%xmm1
vpaddd %xmm13,%xmm12,%xmm15
vpsubd %xmm13,%xmm12,%xmm0
vpaddd %xmm15,%xmm14,%xmm9
vpaddd %xmm0,%xmm1,%xmm7
vpsubd %xmm15,%xmm14,%xmm8
vpsubd %xmm0,%xmm1,%xmm6
vmovdqu 0x279d68(%rip),%xmm15
vpsrld $0xf,%xmm9,%xmm2
vpsrld $0xf,%xmm7,%xmm10
vpand %xmm15,%xmm2,%xmm3
vmovdqu 0x279d40(%rip),%xmm4
vpand %xmm15,%xmm10,%xmm11
vpsrld $0xf,%xmm8,%xmm12
vpsrld $0xf,%xmm6,%xmm14
vpmulld %xmm3,%xmm4,%xmm5
vpand %xmm15,%xmm12,%xmm13
vpmulld %xmm11,%xmm4,%xmm3
vpand %xmm15,%xmm14,%xmm1
vpmulld %xmm13,%xmm4,%xmm2
vpaddd %xmm3,%xmm7,%xmm7
vpmulld %xmm1,%xmm4,%xmm0
vpaddd %xmm5,%xmm9,%xmm4
vpxor %xmm5,%xmm4,%xmm5
vpxor %xmm3,%xmm7,%xmm9
vpaddd %xmm2,%xmm8,%xmm8
vpaddd %xmm9,%xmm5,%xmm3
vpxor %xmm2,%xmm8,%xmm2
vpaddd %xmm0,%xmm6,%xmm6
vpaddd %xmm2,%xmm3,%xmm4
vpxor %xmm0,%xmm6,%xmm0
vpaddd %xmm0,%xmm4,%xmm2
vpxor %xmm1,%xmm1,%xmm1
vpaddd %xmm2,%xmm1,%xmm1
vpsrldq $0x8,%xmm1,%xmm3
vpaddd %xmm3,%xmm1,%xmm5
vpsrlq $0x20,%xmm5,%xmm6
vpaddd %xmm6,%xmm5,%xmm7
vmovd %xmm7,%ecx
movzwl %cx,%eax
shr $0x10,%ecx
add %ecx,%eax
shr %eax
retq
# OSACA-END

View File

@@ -102,7 +102,7 @@ class TestParserAArch64(unittest.TestCase):
self.assertEqual(parsed_3.instruction, "mov")
self.assertEqual(parsed_3.operands[0].register.name, "2")
self.assertEqual(parsed_3.operands[0].register.prefix, "x")
self.assertEqual(parsed_3.operands[1].immediate.value, "0x222")
self.assertEqual(parsed_3.operands[1].immediate.value, int("0x222", 0))
self.assertEqual(parsed_3.comment, "NOT IACA END")
self.assertEqual(parsed_4.instruction, "str")
@@ -208,7 +208,7 @@ class TestParserAArch64(unittest.TestCase):
{"prfop": {"type": ["PLD"], "target": ["L1"], "policy": ["KEEP"]}},
{
"memory": {
"offset": {"value": "2048"},
"offset": {"value": 2048},
"base": {"prefix": "x", "name": "26"},
"index": None,
"scale": 1,
@@ -228,7 +228,7 @@ class TestParserAArch64(unittest.TestCase):
{"register": {"prefix": "x", "name": "30"}},
{
"memory": {
"offset": {"value": "-16"},
"offset": {"value": -16},
"base": {"name": "sp", "prefix": "x"},
"index": None,
"scale": 1,
@@ -253,7 +253,7 @@ class TestParserAArch64(unittest.TestCase):
"base": {"prefix": "x", "name": "11"},
"index": None,
"scale": 1,
"post_indexed": {"value": "64"},
"post_indexed": {"value": 64},
}
},
],
@@ -270,7 +270,7 @@ class TestParserAArch64(unittest.TestCase):
{"register": {"prefix": "p", "name": "0", "predication": "m"}},
{"register": {"prefix": "z", "name": "29", "shape": "d"}},
{"register": {"prefix": "z", "name": "21", "shape": "d"}},
{"immediate": {"value": "90", "type": "int"}},
{"immediate": {"value": 90, "type": "int"}},
],
"directive": None,
"comment": None,
@@ -326,32 +326,34 @@ class TestParserAArch64(unittest.TestCase):
def test_multiple_regs(self):
instr_range = "PUSH {x5-x7}"
reg_range = AttrDict(
{
"register": {
"range": [{"prefix": "x", "name": "5"}, {"prefix": "x", "name": "7"}],
"index": None,
}
}
)
instr_list = "POP {x5, x7, x9}"
reg_list = AttrDict(
{
"register": {
"list": [
{"prefix": "x", "name": "5"},
{"prefix": "x", "name": "7"},
{"prefix": "x", "name": "9"},
],
"index": None,
}
}
)
instr_list = "POP {x5, x6, x7}"
instr_range_with_index = "ld4 {v0.S - v3.S}[2]"
instr_list_with_index = "ld4 {v0.S, v1.S, v2.S, v3.S}[2]"
instr_range_single = "dummy { z1.d }"
reg_list = [
AttrDict({"register": {"prefix": "x", "name": "5"}}),
AttrDict({"register": {"prefix": "x", "name": "6"}}),
AttrDict({"register": {"prefix": "x", "name": "7"}}),
]
reg_list_idx = [
AttrDict({"register": {"prefix": "v", "name": "0", "shape": "S", "index": 2}}),
AttrDict({"register": {"prefix": "v", "name": "1", "shape": "S", "index": 2}}),
AttrDict({"register": {"prefix": "v", "name": "2", "shape": "S", "index": 2}}),
AttrDict({"register": {"prefix": "v", "name": "3", "shape": "S", "index": 2}}),
]
reg_list_single = [AttrDict({"register": {"prefix": "z", "name": "1", "shape": "d"}})]
prange = self.parser.parse_line(instr_range)
plist = self.parser.parse_line(instr_list)
p_idx_range = self.parser.parse_line(instr_range_with_index)
p_idx_list = self.parser.parse_line(instr_list_with_index)
p_single = self.parser.parse_line(instr_range_single)
self.assertEqual(prange.operands[0], reg_range)
self.assertEqual(plist.operands[0], reg_list)
self.assertEqual(prange.operands, reg_list)
self.assertEqual(plist.operands, reg_list)
self.assertEqual(p_idx_range.operands, reg_list_idx)
self.assertEqual(p_idx_list.operands, reg_list_idx)
self.assertEqual(p_single.operands, reg_list_single)
def test_reg_dependency(self):
reg_1_1 = AttrDict({"prefix": "b", "name": "1"})

View File

@@ -120,12 +120,12 @@ class TestParserX86ATT(unittest.TestCase):
self.assertIsNone(parsed_2.comment)
self.assertEqual(parsed_3.instruction, "movl")
self.assertEqual(parsed_3.operands[0].immediate.value, "222")
self.assertEqual(parsed_3.operands[0].immediate.value, 222)
self.assertEqual(parsed_3.operands[1].register.name, "ebx")
self.assertEqual(parsed_3.comment, "IACA END")
self.assertEqual(parsed_4.instruction, "vmovss")
self.assertEqual(parsed_4.operands[1].memory.offset.value, "-4")
self.assertEqual(parsed_4.operands[1].memory.offset.value, -4)
self.assertEqual(parsed_4.operands[1].memory.base.name, "rsp")
self.assertEqual(parsed_4.operands[1].memory.index.name, "rax")
self.assertEqual(parsed_4.operands[1].memory.scale, 8)
@@ -146,7 +146,7 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(parsed_6.operands[0].memory.scale, 8)
self.assertEqual(parsed_6.operands[1].register.name, "rbx")
self.assertEqual(parsed_7.operands[0].immediate.value, "0x1")
self.assertEqual(parsed_7.operands[0].immediate.value, 0x1)
self.assertEqual(parsed_7.operands[1].register.name, "xmm0")
self.assertEqual(parsed_7.operands[2].register.name, "ymm1")
self.assertEqual(parsed_7.operands[3].register.name, "ymm1")
@@ -189,7 +189,7 @@ class TestParserX86ATT(unittest.TestCase):
"operands": [
{
"memory": {
"offset": {"value": "2"},
"offset": {"value": 2},
"base": {"name": "rax"},
"index": {"name": "rax"},
"scale": 1,
@@ -240,7 +240,7 @@ class TestParserX86ATT(unittest.TestCase):
imd_decimal_1 = {"value": "79"}
imd_hex_1 = {"value": "0x4f"}
imd_decimal_2 = {"value": "8"}
imd_hex_2 = {"value": "0x8"}
imd_hex_2 = {"value": "8"}
self.assertEqual(
self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
)

View File

@@ -5,15 +5,14 @@ Unit tests for Semantic Analysis
import os
import unittest
import time
from copy import deepcopy
import networkx as nx
from osaca.osaca import get_unmatched_instruction_ratio
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from osaca.semantics import (
INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section, ISASemantics
)
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics,
KernelDG, MachineModel, reduce_to_section)
class TestSemanticTools(unittest.TestCase):
@@ -30,17 +29,30 @@ class TestSemanticTools(unittest.TestCase):
cls.code_x86 = f.read()
with open(cls._find_file("kernel_x86_memdep.s")) as f:
cls.code_x86_memdep = f.read()
with open(cls._find_file("kernel_x86_long_LCD.s")) as f:
cls.code_x86_long_LCD = f.read()
with open(cls._find_file("kernel_aarch64_memdep.s")) as f:
cls.code_aarch64_memdep = f.read()
with open(cls._find_file("kernel_aarch64.s")) as f:
cls.code_AArch64 = f.read()
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
cls.code_AArch64_SVE = f.read()
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
cls.kernel_x86_memdep = reduce_to_section(
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86"
)
cls.kernel_x86_long_LCD = reduce_to_section(
cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86"
)
cls.kernel_AArch64 = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64"
)
cls.kernel_aarch64_memdep = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64"
)
cls.kernel_aarch64_SVE = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64"
)
# set up machine models
cls.machine_model_csx = MachineModel(
@@ -49,6 +61,9 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
)
cls.machine_model_a64fx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")
)
cls.semantics_x86 = ISASemantics("x86")
cls.semantics_csx = ArchSemantics(
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
@@ -58,6 +73,10 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_tx2,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
)
cls.semantics_a64fx = ArchSemantics(
cls.machine_model_a64fx,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
)
cls.machine_model_zen = MachineModel(arch="zen1")
for i in range(len(cls.kernel_x86)):
@@ -66,12 +85,18 @@ class TestSemanticTools(unittest.TestCase):
for i in range(len(cls.kernel_x86_memdep)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i])
for i in range(len(cls.kernel_x86_long_LCD)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i])
for i in range(len(cls.kernel_AArch64)):
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
for i in range(len(cls.kernel_aarch64_memdep)):
cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
for i in range(len(cls.kernel_aarch64_SVE)):
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
###########
# Tests
@@ -284,8 +309,9 @@ class TestSemanticTools(unittest.TestCase):
dg.export_graph(filepath="/dev/null")
def test_memdependency_x86(self):
dg = KernelDG(self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx,
self.semantics_csx)
dg = KernelDG(
self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx
)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
@@ -295,8 +321,9 @@ class TestSemanticTools(unittest.TestCase):
dg.export_graph(filepath="/dev/null")
def test_kernelDG_AArch64(self):
dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2,
self.semantics_tx2)
dg = KernelDG(
self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2
)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10})
@@ -321,6 +348,15 @@ class TestSemanticTools(unittest.TestCase):
# test dot creation
dg.export_graph(filepath="/dev/null")
def test_kernelDG_SVE(self):
KernelDG(
self.kernel_aarch64_SVE,
self.parser_AArch64,
self.machine_model_a64fx,
self.semantics_a64fx,
)
# TODO check for correct analysis
def test_hidden_load(self):
machine_model_hld = MachineModel(
path_to_yaml=self._find_file("hidden_load_machine_model.yml")
@@ -353,14 +389,20 @@ class TestSemanticTools(unittest.TestCase):
dg.get_loopcarried_dependencies()
def test_loop_carried_dependency_aarch64(self):
dg = KernelDG(self.kernel_aarch64_memdep, self.parser_AArch64, self.machine_model_tx2,
self.semantics_tx2)
dg = KernelDG(
self.kernel_aarch64_memdep,
self.parser_AArch64,
self.machine_model_tx2,
self.semantics_tx2,
)
lc_deps = dg.get_loopcarried_dependencies()
self.assertEqual(len(lc_deps), 2)
# based on line 6
self.assertEqual(lc_deps[6]["latency"], 28.0)
self.assertEqual([(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)])
self.assertEqual(
[(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
)
def test_loop_carried_dependency_x86(self):
lcd_id = 8
@@ -375,7 +417,7 @@ class TestSemanticTools(unittest.TestCase):
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
self.assertEqual(
lc_deps[lcd_id]["dependencies"][0][0],
dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
dg.dg.nodes(data=True)[lcd_id]["instruction_form"],
)
# w/ flag dependencies: ID 9 w/ len=2
# w/o flag dependencies: ID 5 w/ len=1
@@ -389,6 +431,31 @@ class TestSemanticTools(unittest.TestCase):
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
)
def test_timeout_during_loop_carried_dependency(self):
start_time = time.perf_counter()
KernelDG(
self.kernel_x86_long_LCD,
self.parser_x86,
self.machine_model_csx,
self.semantics_x86,
timeout=10
)
end_time = time.perf_counter()
time_10 = end_time - start_time
start_time = time.perf_counter()
KernelDG(
self.kernel_x86_long_LCD,
self.parser_x86,
self.machine_model_csx,
self.semantics_x86,
timeout=2
)
end_time = time.perf_counter()
time_2 = end_time - start_time
self.assertTrue(time_10 > 10)
self.assertTrue(2 < time_2)
self.assertTrue(time_2 < (time_10 - 7))
def test_is_read_is_written_x86(self):
# independent form HW model
dag = KernelDG(self.kernel_x86, self.parser_x86, None, None)

View File

@@ -232,7 +232,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -284,8 +284,10 @@
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"execution_count": 27,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
@@ -296,8 +298,672 @@
"ZEN has 156 tests, compiled to 126 unique assembly representations.\n",
"ZEN2 has 156 tests, compiled to 126 unique assembly representations.\n",
"TX2 has 104 tests, compiled to 78 unique assembly representations.\n",
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n"
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n",
"High-level iterations in assembly block: 16\n",
"Measured: 1.1903856655856655\n",
"IACA Predicted: 1.96875 TP: 1.875 LCD: None CP: None\n",
"Ithemal Predicted: nan TP: None LCD: None CP: None\n",
"LLVM-MCA Predicted: 2.240625 TP: 1.948125 LCD: 2.240625 CP: 3.8125\n",
"OSACA Predicted: 1.875 TP: 1.875 LCD: 0.5 CP: 2.75\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space: pre !important;\">Open Source Architecture Code Analyzer (OSACA) - 0.3.14\n",
"Analyzed file: build/SKX/icc/O3/pi.marked.s\n",
"Architecture: SKX\n",
"Timestamp: 2021-04-15 12:15:40\n",
"\n",
"\n",
" P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction\n",
" * - Instruction micro-ops not bound to a port\n",
" X - No throughput/latency information for this instruction in data file\n",
"\n",
"\n",
"Combined Analysis Report\n",
"------------------------\n",
" Port pressure in cycles \n",
" | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |\n",
"-------------------------------------------------------------------------------------------------\n",
" 62 | | | | | | | | || | | # pointer_increment=128 fa3c665ee18e1e5f704c8a6026891c36\n",
" 63 | | | | | | | | || | | ..B1.4: # Preds ..B1.4 ..B1.3\n",
" 64 | | | | | | | | || | | # Execution count [5.00e+00]\n",
" 65 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $32, %ecx #16.5\n",
" 66 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm5, %ymm9, %ymm14 #17.9\n",
" 67 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm9, %zmm8 #17.14\n",
" 68 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm8, %zmm1, %zmm10 #17.18\n",
" 69 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm2, %zmm11 #17.25\n",
" 70 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm11, %zmm11 #18.38\n",
" 71 | | | | | | | | || | | * vmovaps %zmm0, %zmm29 #18.38\n",
" 72 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm11, %zmm13 #18.38\n",
" 73 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11 #18.38\n",
" 74 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm13, %k0 #18.38\n",
" 75 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm11, %zmm11, %zmm12 #18.38\n",
" 76 | 1.00 | | | | | | | || | | knotw %k0, %k1 #18.38\n",
" 77 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm11, %zmm13{%k1} #18.38\n",
" 78 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm12, %zmm13{%k1} #18.38\n",
" 79 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38\n",
" 80 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm14, %ymm20 #17.9\n",
" 81 | 0.50 | | | | | 1.50 | | || 7.0 | | vcvtdq2pd %ymm14, %zmm15 #17.14\n",
" 82 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddpd %zmm15, %zmm1, %zmm16 #17.18\n",
" 83 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm16, %zmm2, %zmm17 #17.25\n",
" 84 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm0, %zmm17, %zmm17 #18.38\n",
" 85 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm17, %zmm19 #18.38\n",
" 86 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17 #18.38\n",
" 87 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm19, %k2 #18.38\n",
" 88 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm17, %zmm18 #18.38\n",
" 89 | 1.00 | | | | | | | || | | knotw %k2, %k3 #18.38\n",
" 90 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm19, %zmm17, %zmm19{%k3} #18.38\n",
" 91 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm19, %zmm18, %zmm19{%k3} #18.38\n",
" 92 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38\n",
" 93 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm20, %ymm26 #17.9\n",
" 94 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm20, %zmm21 #17.14\n",
" 95 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm21, %zmm1, %zmm22 #17.18\n",
" 96 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm22, %zmm2, %zmm23 #17.25\n",
" 97 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm23, %zmm23 #18.38\n",
" 98 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm23, %zmm25 #18.38\n",
" 99 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23 #18.38\n",
" 100 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm25, %k4 #18.38\n",
" 101 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm23, %zmm23, %zmm24 #18.38\n",
" 102 | 1.00 | | | | | | | || | | knotw %k4, %k5 #18.38\n",
" 103 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm23, %zmm25{%k5} #18.38\n",
" 104 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm24, %zmm25{%k5} #18.38\n",
" 105 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm25, %zmm6 #18.38\n",
" 106 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm26, %zmm27 #17.14\n",
" 107 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm26, %ymm9 #17.9\n",
" 108 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm27, %zmm1, %zmm28 #17.18\n",
" 109 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm28, %zmm2, %zmm8 #17.25\n",
" 110 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm8, %zmm8, %zmm29 #18.38\n",
" 111 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm29, %zmm31 #18.38\n",
" 112 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29 #18.38\n",
" 113 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm31, %k6 #18.38\n",
" 114 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm29, %zmm29, %zmm30 #18.38\n",
" 115 | 1.00 | | | | | | | || | | knotw %k6, %k7 #18.38\n",
" 116 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm29, %zmm31{%k7} #18.38\n",
" 117 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm30, %zmm31{%k7} #18.38\n",
" 118 | 0.00 | | | | | 1.00 | | || 0.0 | 4.0 | vfmadd231pd %zmm4, %zmm31, %zmm3 #18.38\n",
" 119 | 0.00 | 0.34 | | | | 0.00 | 0.66 | || | | cmpl %edx, %ecx #16.5\n",
" 120 | 0.00 | | | | | | 1.00 | || | | jb ..B1.4 # Prob 82% #16.5\n",
"\n",
" 30.0 4.34 2.00 2.00 2.00 2.00 30.0 2.66 44 8.0 \n",
"\n",
"\n",
"Loop-Carried Dependencies Analysis Report\n",
"-----------------------------------------\n",
" 92 | 8.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38| [92, 118]\n",
" 79 | 8.0 | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38| [79, 105]\n",
" 66 | 4.0 | vpaddd %ymm5, %ymm9, %ymm14 #17.9| [66, 80, 93, 107]\n",
" 65 | 1.0 | addl $32, %ecx #16.5| [65]\n",
"</pre>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space: pre !important;\">Iterations: 100\n",
"Instructions: 5600\n",
"Total Cycles: 3585\n",
"Total uOps: 7200\n",
"\n",
"Dispatch Width: 6\n",
"uOps Per Cycle: 2.01\n",
"IPC: 1.56\n",
"Block RThroughput: 18.0\n",
"\n",
"\n",
"Instruction Info:\n",
"[1]: #uOps\n",
"[2]: Latency\n",
"[3]: RThroughput\n",
"[4]: MayLoad\n",
"[5]: MayStore\n",
"[6]: HasSideEffects (U)\n",
"\n",
"[1] [2] [3] [4] [5] [6] Instructions:\n",
" 1 1 0.25 addl\t$32, %ecx\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm9, %ymm14\n",
" 2 7 1.00 vcvtdq2pd\t%ymm9, %zmm8\n",
" 1 4 0.50 vaddpd\t%zmm8, %zmm1, %zmm10\n",
" 1 4 0.50 vmulpd\t%zmm10, %zmm2, %zmm11\n",
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
" 1 1 0.33 vmovaps\t%zmm0, %zmm29\n",
" 3 4 2.00 vrcp14pd\t%zmm11, %zmm13\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm13, %k0\n",
" 1 4 0.50 vmulpd\t%zmm11, %zmm11, %zmm12\n",
" 1 1 1.00 knotw\t%k0, %k1\n",
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm14, %ymm20\n",
" 2 7 1.00 vcvtdq2pd\t%ymm14, %zmm15\n",
" 1 4 0.50 vaddpd\t%zmm15, %zmm1, %zmm16\n",
" 1 4 0.50 vmulpd\t%zmm16, %zmm2, %zmm17\n",
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
" 3 4 2.00 vrcp14pd\t%zmm17, %zmm19\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm19, %k2\n",
" 1 4 0.50 vmulpd\t%zmm17, %zmm17, %zmm18\n",
" 1 1 1.00 knotw\t%k2, %k3\n",
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm20, %ymm26\n",
" 2 7 1.00 vcvtdq2pd\t%ymm20, %zmm21\n",
" 1 4 0.50 vaddpd\t%zmm21, %zmm1, %zmm22\n",
" 1 4 0.50 vmulpd\t%zmm22, %zmm2, %zmm23\n",
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
" 3 4 2.00 vrcp14pd\t%zmm23, %zmm25\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm25, %k4\n",
" 1 4 0.50 vmulpd\t%zmm23, %zmm23, %zmm24\n",
" 1 1 1.00 knotw\t%k4, %k5\n",
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
" 2 7 1.00 vcvtdq2pd\t%ymm26, %zmm27\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm26, %ymm9\n",
" 1 4 0.50 vaddpd\t%zmm27, %zmm1, %zmm28\n",
" 1 4 0.50 vmulpd\t%zmm28, %zmm2, %zmm8\n",
" 1 4 0.50 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
" 3 4 2.00 vrcp14pd\t%zmm29, %zmm31\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm31, %k6\n",
" 1 4 0.50 vmulpd\t%zmm29, %zmm29, %zmm30\n",
" 1 1 1.00 knotw\t%k6, %k7\n",
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
" 1 1 0.25 cmpl\t%edx, %ecx\n",
" 1 1 0.50 jb\t..B1.4\n",
"\n",
"\n",
"Resources:\n",
"[0] - SKXDivider\n",
"[1] - SKXFPDivider\n",
"[2] - SKXPort0\n",
"[3] - SKXPort1\n",
"[4] - SKXPort2\n",
"[5] - SKXPort3\n",
"[6] - SKXPort4\n",
"[7] - SKXPort5\n",
"[8] - SKXPort6\n",
"[9] - SKXPort7\n",
"\n",
"\n",
"Resource pressure per iteration:\n",
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] \n",
" - - 31.17 5.72 2.00 2.00 - 29.10 2.01 - \n",
"\n",
"Resource pressure by instruction:\n",
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:\n",
" - - - 0.80 - - - 0.19 0.01 - addl\t$32, %ecx\n",
" - - 0.07 0.92 - - - 0.01 - - vpaddd\t%ymm5, %ymm9, %ymm14\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm9, %zmm8\n",
" - - 0.42 - - - - 0.58 - - vaddpd\t%zmm8, %zmm1, %zmm10\n",
" - - 0.51 - - - - 0.49 - - vmulpd\t%zmm10, %zmm2, %zmm11\n",
" - - 0.45 - - - - 0.55 - - vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
" - - - 1.00 - - - - - - vmovaps\t%zmm0, %zmm29\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm11, %zmm13\n",
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm13, %k0\n",
" - - 0.49 - - - - 0.51 - - vmulpd\t%zmm11, %zmm11, %zmm12\n",
" - - 1.00 - - - - - - - knotw\t%k0, %k1\n",
" - - 0.44 - - - - 0.56 - - vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
" - - 0.70 - - - - 0.30 - - vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm14, %ymm20\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm14, %zmm15\n",
" - - 0.48 - - - - 0.52 - - vaddpd\t%zmm15, %zmm1, %zmm16\n",
" - - 0.42 - - - - 0.58 - - vmulpd\t%zmm16, %zmm2, %zmm17\n",
" - - 0.32 - - - - 0.68 - - vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm17, %zmm19\n",
" - - 0.32 - 1.00 - - 0.68 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm19, %k2\n",
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm17, %zmm17, %zmm18\n",
" - - 1.00 - - - - - - - knotw\t%k2, %k3\n",
" - - 0.53 - - - - 0.47 - - vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
" - - 0.57 - - - - 0.43 - - vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm20, %ymm26\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm20, %zmm21\n",
" - - 0.52 - - - - 0.48 - - vaddpd\t%zmm21, %zmm1, %zmm22\n",
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm22, %zmm2, %zmm23\n",
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm23, %zmm25\n",
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm25, %k4\n",
" - - 0.53 - - - - 0.47 - - vmulpd\t%zmm23, %zmm23, %zmm24\n",
" - - 1.00 - - - - - - - knotw\t%k4, %k5\n",
" - - 0.42 - - - - 0.58 - - vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
" - - 0.60 - - - - 0.40 - - vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm26, %zmm27\n",
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm26, %ymm9\n",
" - - 0.26 - - - - 0.74 - - vaddpd\t%zmm27, %zmm1, %zmm28\n",
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm28, %zmm2, %zmm8\n",
" - - 0.34 - - - - 0.66 - - vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm29, %zmm31\n",
" - - 0.34 - 1.00 - - 0.66 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm31, %k6\n",
" - - 0.52 - - - - 0.48 - - vmulpd\t%zmm29, %zmm29, %zmm30\n",
" - - 1.00 - - - - - - - knotw\t%k6, %k7\n",
" - - 0.47 - - - - 0.53 - - vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
" - - 0.66 - - - - 0.34 - - vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
" - - - - - - - - 1.00 - cmpl\t%edx, %ecx\n",
" - - - - - - - - 1.00 - jb\t..B1.4\n",
"\n",
"\n",
"Timeline view:\n",
" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 \n",
"Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678\n",
"\n",
"[0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
"[0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[0,2] D=eeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[0,3] D========eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[0,4] D============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[0,5] .D===============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[0,6] .DeE------------------R . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[0,7] .D===================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[0,8] . D======================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[0,9] . D======================eeeeE-------R . . . . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[0,10] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[0,11] . D==========================eE----------R . . . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
"[0,12] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[0,13] . D====================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[0,14] . D========================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[0,15] . DeE-------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[0,16] . DeeeeeeeE-------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[0,17] . D=======eeeeE---------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[0,18] . D==========eeeeE-----------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[0,19] . D==============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[0,20] . D==================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[0,21] . D=====================eeeeeeeeeeeE----------R. . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[0,22] . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[0,23] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[0,24] . D==========================eE---------------R. . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
"[0,25] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[0,26] . .D===================================eeeeE--R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[0,27] . .D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[0,28] . .DeE------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[0,29] . .DeeeeeeeE------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[0,30] . .D=======eeeeE--------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[0,31] . . D==========eeeeE----------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[0,32] . . D==============eeeeE------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[0,33] . . D==================eeeeE--------------------R . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
"[0,34] . . D=====================eeeeeeeeeeeE---------R . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[0,35] . . D=====================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
"[0,36] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[0,37] . . D==========================eE--------------R . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
"[0,38] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[0,39] . . D===================================eeeeE-R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[0,40] . . D=======================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[0,41] . . DeeeeeeeE------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
"[0,42] . . DeE------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[0,43] . . D=======eeeeE--------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[0,44] . . D=============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[0,45] . . D=================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[0,46] . . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
"[0,47] . . .D=========================eeeeeeeeeeeE-----R. . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[0,48] . . .D=========================eeeeE------------R. . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
"[0,49] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[0,50] . . .D==============================eE----------R. . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
"[0,51] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[0,52] . . . D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[0,53] . . . D===========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[0,54] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
"[0,55] . . . D=eE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . jb\t..B1.4\n",
"[1,0] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
"[1,1] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[1,2] . . . D==eeeeeeeE-------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[1,3] . . . D===============eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[1,4] . . . D====================eeeeE----------------------R . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[1,5] . . . D=========================eeeeE-----------------R . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[1,6] . . . DeE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[1,7] . . . D============================eeeeE-------------R . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[1,8] . . . D================================eeeeeeeeeeeE--R . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[1,9] . . . D================================eeeeE---------R . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[1,10] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[1,11] . . . D====================================eE---------R . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
"[1,12] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[1,13] . . . D==============================================eeeeER . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[1,14] . . . D==================================================eeeeER . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[1,15] . . . DeE-----------------------------------------------------R . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[1,16] . . . .D===eeeeeeeE-------------------------------------------R . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[1,17] . . . .D==============eeeeE-----------------------------------R . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[1,18] . . . .D==================eeeeE-------------------------------R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[1,19] . . . .D======================eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[1,20] . . . . D================================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[1,21] . . . . D====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[1,22] . . . . D=====================================eeeeE-----------R . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[1,23] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[1,24] . . . . D========================================eE----------R . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
"[1,25] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[1,26] . . . . D==================================================eeeeER. . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[1,27] . . . . D======================================================eeeeER . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[1,28] . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[1,29] . . . . D=================================eeeeeeeE-----------------R . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[1,30] . . . . D========================================eeeeE-------------R . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[1,31] . . . . D===========================================eeeeE---------R . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[1,32] . . . . .D==============================================eeeeE-----R . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[1,33] . . . . . D=================================================eeeeE-R . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
"[1,34] . . . . . D====================================================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[1,35] . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
"[1,36] . . . . . D=============================================================eeeeER . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[1,37] . . . . . D======================================================eE----------R . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
"[1,38] . . . . . .D============================================================eeeeER . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[1,39] . . . . . . D===============================================================eeeeER . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[1,40] . . . . . . D==================================================================eeeeER . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[1,41] . . . . . . D============================eeeeeeeE-----------------------------------R . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
"[1,42] . . . . . . DeE--------------------------------------------------------------------R . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[1,43] . . . . . . D==================================eeeeE-------------------------------R . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[1,44] . . . . . . D=====================================eeeeE---------------------------R . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[1,45] . . . . . . D===========================================eeeeE---------------------R . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[1,46] . . . . . . D===============================================eeeeE-----------------R . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
"[1,47] . . . . . . .D==================================================eeeeeeeeeeeE------R . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[1,48] . . . . . . . D=================================================eeeeE-------------R . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
"[1,49] . . . . . . . D===========================================================eeeeE--R . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[1,50] . . . . . . . D=====================================================eE----------R . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
"[1,51] . . . . . . . D==========================================================eeeeE-R . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[1,52] . . . . . . . D==============================================================eeeeER . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[1,53] . . . . . . . .D=================================================================eeeeER . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[1,54] . . . . . . . .DeE--------------------------------------------------------------------R . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
"[1,55] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . jb\t..B1.4\n",
"[2,0] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . addl\t$32, %ecx\n",
"[2,1] . . . . . . . . D=eE------------------------------------------------------------------R . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[2,2] . . . . . . . . D======================eeeeeeeE--------------------------------------R . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[2,3] . . . . . . . . D==============================eeeeE---------------------------------R . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[2,4] . . . . . . . . D===================================eeeeE----------------------------R . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[2,5] . . . . . . . . D========================================eeeeE-----------------------R . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[2,6] . . . . . . . . DeE-----------------------------------------------------------------R . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[2,7] . . . . . . . . D===========================================eeeeE-------------------R . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[2,8] . . . . . . . . D================================================eeeeeeeeeeeE-------R . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[2,9] . . . . . . . . D================================================eeeeE-------------R . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[2,10] . . . . . . . . D==========================================================eeeeE---R . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[2,11] . . . . . . . . .D======================================================eE---------R . . . . . . . . . . . . . knotw\t%k0, %k1\n",
"[2,12] . . . . . . . . .D=========================================================eeeeE---R . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[2,13] . . . . . . . . . D============================================================eeeeER . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[2,14] . . . . . . . . . D================================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[2,15] . . . . . . . . . DeE------------------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[2,16] . . . . . . . . . D==================eeeeeeeE-----------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[2,17] . . . . . . . . . D=========================eeeeE-------------------------------------R . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[2,18] . . . . . . . . . D=============================eeeeE--------------------------------R . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[2,19] . . . . . . . . . .D=================================eeeeE---------------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[2,20] . . . . . . . . . . D=====================================eeeeE----------------------R . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[2,21] . . . . . . . . . . D=========================================eeeeeeeeeeeE-----------R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[2,22] . . . . . . . . . . D=========================================eeeeE-----------------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[2,23] . . . . . . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[2,24] . . . . . . . . . . D===============================================eE-------------R . . . . . . . . . . . . knotw\t%k2, %k3\n",
"[2,25] . . . . . . . . . . D=================================================eeeeE-------R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[2,26] . . . . . . . . . . . D===================================================eeeeE---R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[2,27] . . . . . . . . . . . D=======================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[2,28] . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[2,29] . . . . . . . . . . . D============eeeeeeeE--------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[2,30] . . . . . . . . . . . D====================eeeeE---------------------------------R . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[2,31] . . . . . . . . . . . D=========================eeeeE---------------------------R . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[2,32] . . . . . . . . . . . .D=============================eeeeE----------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[2,33] . . . . . . . . . . . . D==================================eeeeE----------------R . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
"[2,34] . . . . . . . . . . . . D=====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[2,35] . . . . . . . . . . . . D======================================eeeeE-----------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
"[2,36] . . . . . . . . . . . . D===============================================eeeeE-R . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[2,37] . . . . . . . . . . . . D========================================eE----------R . . . . . . . . . . . . knotw\t%k4, %k5\n",
"[2,38] . . . . . . . . . . . . .D==============================================eeeeER . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[2,39] . . . . . . . . . . . . . D=================================================eeeeER . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[2,40] . . . . . . . . . . . . . D====================================================eeeeER . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[2,41] . . . . . . . . . . . . . D======eeeeeeeE------------------------------------------R . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
"[2,42] . . . . . . . . . . . . . DeE------------------------------------------------------R . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[2,43] . . . . . . . . . . . . . D===============eeeeE-----------------------------------R . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[2,44] . . . . . . . . . . . . . D========================eeeeE--------------------------R . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[2,45] . . . . . . . . . . . . . D============================eeeeE----------------------R . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[2,46] . . . . . . . . . . . . . .D======================================eeeeE-----------R . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
"[2,47] . . . . . . . . . . . . . . D=========================================eeeeeeeeeeeER . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[2,48] . . . . . . . . . . . . . . D=========================================eeeeE------R . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
"[2,49] . . . . . . . . . . . . . . D===================================================eeeeER . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[2,50] . . . . . . . . . . . . . . D============================================eE---------R . . . . . . . . . knotw\t%k6, %k7\n",
"[2,51] . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[2,52] . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[2,53] . . . . . . . . . . . . . . .D========================================================eeeeER . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[2,54] . . . . . . . . . . . . . . . DeE----------------------------------------------------------R . . . . . . . . cmpl\t%edx, %ecx\n",
"[2,55] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . jb\t..B1.4\n",
"[3,0] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . addl\t$32, %ecx\n",
"[3,1] . . . . . . . . . . . . . . . DeE--------------------------------------------------------R . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[3,2] . . . . . . . . . . . . . . . D==eeeeeeeE------------------------------------------------R . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[3,3] . . . . . . . . . . . . . . . D=========eeeeE--------------------------------------------R . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[3,4] . . . . . . . . . . . . . . . D================eeeeE-------------------------------------R . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[3,5] . . . . . . . . . . . . . . . D===================eeeeE---------------------------------R . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[3,6] . . . . . . . . . . . . . . . DeE-------------------------------------------------------R . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[3,7] . . . . . . . . . . . . . . . D===================================eeeeE-----------------R . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[3,8] . . . . . . . . . . . . . . . .D======================================eeeeeeeeeeeE------R . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[3,9] . . . . . . . . . . . . . . . .D=======================================eeeeE------------R . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[3,10] . . . . . . . . . . . . . . . .D=================================================eeeeE--R . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[3,11] . . . . . . . . . . . . . . . . D===========================================eE----------R . . . . . . . . knotw\t%k0, %k1\n",
"[3,12] . . . . . . . . . . . . . . . . D===============================================eeeeE--R . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[3,13] . . . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[3,14] . . . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[3,15] . . . . . . . . . . . . . . . . DeE--------------------------------------------------------R. . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[3,16] . . . . . . . . . . . . . . . . .D===============================eeeeeeeE------------------R. . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[3,17] . . . . . . . . . . . . . . . . .D=======================================eeeeE-------------R. . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[3,18] . . . . . . . . . . . . . . . . .D===========================================eeeeE---------R. . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[3,19] . . . . . . . . . . . . . . . . . D==============================================eeeeE-----R. . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[3,20] . . . . . . . . . . . . . . . . . D==================================================eeeeE-R. . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[3,21] . . . . . . . . . . . . . . . . . D=====================================================eeeeeeeeeeeER. . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[3,22] . . . . . . . . . . . . . . . . . D=====================================================eeeeE------R. . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[3,23] . . . . . . . . . . . . . . . . . D==============================================================eeeeER . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[3,24] . . . . . . . . . . . . . . . . . .D=======================================================eE---------R . . . . knotw\t%k2, %k3\n",
"[3,25] . . . . . . . . . . . . . . . . . . D============================================================eeeeER . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[3,26] . . . . . . . . . . . . . . . . . . D================================================================eeeeER . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[3,27] . . . . . . . . . . . . . . . . . . D===================================================================eeeeER . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[3,28] . . . . . . . . . . . . . . . . . . DeE----------------------------------------------------------------------R . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[3,29] . . . . . . . . . . . . . . . . . . D===========================eeeeeeeE------------------------------------R . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[3,30] . . . . . . . . . . . . . . . . . . D==================================eeeeE--------------------------------R . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[3,31] . . . . . . . . . . . . . . . . . . D======================================eeeeE----------------------------R . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[3,32] . . . . . . . . . . . . . . . . . . D=========================================eeeeE------------------------R . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[3,33] . . . . . . . . . . . . . . . . . . D=============================================eeeeE--------------------R . . vrcp14pd\t%zmm23, %zmm25\n",
"[3,34] . . . . . . . . . . . . . . . . . . .D================================================eeeeeeeeeeeE---------R . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[3,35] . . . . . . . . . . . . . . . . . . .D=================================================eeeeE---------------R . . vfpclasspd\t$30, %zmm25, %k4\n",
"[3,36] . . . . . . . . . . . . . . . . . . . D==========================================================eeeeE-----R . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[3,37] . . . . . . . . . . . . . . . . . . . D====================================================eE-------------R . . knotw\t%k4, %k5\n",
"[3,38] . . . . . . . . . . . . . . . . . . . D========================================================eeeeE-----R . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[3,39] . . . . . . . . . . . . . . . . . . . D============================================================eeeeE-R . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[3,40] . . . . . . . . . . . . . . . . . . . D===============================================================eeeeER. . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[3,41] . . . . . . . . . . . . . . . . . . . D======================eeeeeeeE--------------------------------------R. . vcvtdq2pd\t%ymm26, %zmm27\n",
"[3,42] . . . . . . . . . . . . . . . . . . . .DeE-----------------------------------------------------------------R. . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[3,43] . . . . . . . . . . . . . . . . . . . .D============================eeeeE----------------------------------R. . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[3,44] . . . . . . . . . . . . . . . . . . . . D===============================eeeeE------------------------------R. . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[3,45] . . . . . . . . . . . . . . . . . . . . D=====================================eeeeE------------------------R. . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[3,46] . . . . . . . . . . . . . . . . . . . . D=========================================eeeeE--------------------R. . vrcp14pd\t%zmm29, %zmm31\n",
"[3,47] . . . . . . . . . . . . . . . . . . . . D============================================eeeeeeeeeeeE---------R. . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[3,48] . . . . . . . . . . . . . . . . . . . . D===========================================eeeeE----------------R. . vfpclasspd\t$30, %zmm31, %k6\n",
"[3,49] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE-----R. . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[3,50] . . . . . . . . . . . . . . . . . . . . D==============================================eE---------------R. . knotw\t%k6, %k7\n",
"[3,51] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE----R. . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[3,52] . . . . . . . . . . . . . . . . . . . . .D=========================================================eeeeER. . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[3,53] . . . . . . . . . . . . . . . . . . . . . D============================================================eeeeER vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[3,54] . . . . . . . . . . . . . . . . . . . . . DeE--------------------------------------------------------------R cmpl\t%edx, %ecx\n",
"[3,55] . . . . . . . . . . . . . . . . . . . . . DeE-------------------------------------------------------------R jb\t..B1.4\n",
"\n",
"\n",
"Average Wait times (based on the timeline view):\n",
"[0]: Executions\n",
"[1]: Average time spent waiting in a scheduler's queue\n",
"[2]: Average time spent waiting in a scheduler's queue while ready\n",
"[3]: Average time elapsed from WB until retire stage\n",
"\n",
" [0] [1] [2] [3]\n",
"0. 4 1.0 1.0 42.5 addl\t$32, %ecx\n",
"1. 4 1.3 1.3 42.0 vpaddd\t%ymm5, %ymm9, %ymm14\n",
"2. 4 7.8 7.8 30.8 vcvtdq2pd\t%ymm9, %zmm8\n",
"3. 4 16.5 1.8 26.0 vaddpd\t%zmm8, %zmm1, %zmm10\n",
"4. 4 21.8 1.3 21.8 vmulpd\t%zmm10, %zmm2, %zmm11\n",
"5. 4 25.8 0.5 18.3 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"6. 4 1.0 1.0 45.8 vmovaps\t%zmm0, %zmm29\n",
"7. 4 32.3 3.0 12.3 vrcp14pd\t%zmm11, %zmm13\n",
"8. 4 36.0 0.3 3.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"9. 4 36.3 0.8 10.3 vfpclasspd\t$30, %zmm13, %k0\n",
"10. 4 46.5 0.0 1.3 vmulpd\t%zmm11, %zmm11, %zmm12\n",
"11. 4 40.8 1.3 9.5 knotw\t%k0, %k1\n",
"12. 4 45.8 0.0 1.3 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"13. 4 49.0 0.0 0.0 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"14. 4 52.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"15. 4 1.0 1.0 54.5 vpaddd\t%ymm5, %ymm14, %ymm20\n",
"16. 4 14.0 14.0 34.8 vcvtdq2pd\t%ymm14, %zmm15\n",
"17. 4 22.3 1.3 29.5 vaddpd\t%zmm15, %zmm1, %zmm16\n",
"18. 4 26.0 0.3 25.3 vmulpd\t%zmm16, %zmm2, %zmm17\n",
"19. 4 29.8 0.3 21.0 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"20. 4 35.3 2.0 15.0 vrcp14pd\t%zmm17, %zmm19\n",
"21. 4 38.8 0.0 6.5 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"22. 4 39.3 1.0 12.5 vfpclasspd\t$30, %zmm19, %k2\n",
"23. 4 48.8 0.0 3.5 vmulpd\t%zmm17, %zmm17, %zmm18\n",
"24. 4 43.0 0.8 11.8 knotw\t%k2, %k3\n",
"25. 4 47.8 0.0 3.5 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"26. 4 51.0 0.0 1.3 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"27. 4 54.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"28. 4 1.0 1.0 56.5 vpaddd\t%ymm5, %ymm20, %ymm26\n",
"29. 4 19.0 19.0 31.8 vcvtdq2pd\t%ymm20, %zmm21\n",
"30. 4 26.3 0.3 27.5 vaddpd\t%zmm21, %zmm1, %zmm22\n",
"31. 4 30.0 0.5 23.0 vmulpd\t%zmm22, %zmm2, %zmm23\n",
"32. 4 33.5 0.3 18.8 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"33. 4 37.5 0.5 14.3 vrcp14pd\t%zmm23, %zmm25\n",
"34. 4 40.5 0.0 5.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"35. 4 40.8 0.5 12.3 vfpclasspd\t$30, %zmm25, %k4\n",
"36. 4 50.5 0.0 2.8 vmulpd\t%zmm23, %zmm23, %zmm24\n",
"37. 4 44.0 0.5 11.8 knotw\t%k4, %k5\n",
"38. 4 49.5 0.3 2.5 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"39. 4 52.8 0.0 0.5 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"40. 4 56.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"41. 4 15.0 15.0 37.8 vcvtdq2pd\t%ymm26, %zmm27\n",
"42. 4 1.0 1.0 57.3 vpaddd\t%ymm5, %ymm26, %ymm9\n",
"43. 4 22.0 0.8 33.0 vaddpd\t%zmm27, %zmm1, %zmm28\n",
"44. 4 27.3 2.0 27.0 vmulpd\t%zmm28, %zmm2, %zmm8\n",
"45. 4 32.3 1.0 22.0 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"46. 4 38.0 2.0 16.0 vrcp14pd\t%zmm29, %zmm31\n",
"47. 4 41.0 0.0 5.0 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"48. 4 40.5 0.3 11.8 vfpclasspd\t$30, %zmm31, %k6\n",
"49. 4 51.0 0.0 2.0 vmulpd\t%zmm29, %zmm29, %zmm30\n",
"50. 4 44.3 0.8 11.0 knotw\t%k6, %k7\n",
"51. 4 50.5 0.5 1.5 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"52. 4 53.8 0.0 0.0 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"53. 4 57.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"54. 4 1.0 1.0 58.5 cmpl\t%edx, %ecx\n",
"55. 4 1.3 0.0 57.5 jb\t..B1.4\n",
" 4 32.5 1.6 18.4 <total>\n",
"</pre>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space: pre !important;\">Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-30;16:57:45\n",
"Analyzed File - build/SKX/icc/O3/pi.marked.o\n",
"Binary Format - 64Bit\n",
"Architecture - SKX\n",
"Analysis Type - Throughput\n",
"\n",
"Throughput Analysis Report\n",
"--------------------------\n",
"Block Throughput: 31.50 Cycles Throughput Bottleneck: Backend\n",
"Loop Count: 103\n",
"Port Binding In Cycles Per Iteration:\n",
"--------------------------------------------------------------------------------------------------\n",
"| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
"--------------------------------------------------------------------------------------------------\n",
"| Cycles | 30.0 0.0 | 4.0 | 2.0 2.0 | 2.0 2.0 | 0.0 | 30.0 | 1.0 | 0.0 |\n",
"--------------------------------------------------------------------------------------------------\n",
"\n",
"DV - Divider pipe (on port 0)\n",
"D - Data fetch pipe (on ports 2 and 3)\n",
"F - Macro Fusion with the previous instruction occurred\n",
"* - instruction micro-ops not bound to a port\n",
"^ - Micro Fusion occurred\n",
"# - ESP Tracking sync uop was issued\n",
"@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected\n",
"X - instruction not supported, was not accounted in Analysis\n",
"\n",
"| Num Of | Ports pressure in cycles | |\n",
"| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
"-----------------------------------------------------------------------------------------\n",
"| 1 | | | | | | | 1.0 | | add ecx, 0x20\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm14, ymm9, ymm5\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm8, ymm9\n",
"| 1 | | | | | | 1.0 | | | vaddpd zmm10, zmm1, zmm8\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm11, zmm2, zmm10\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm11, zmm11, zmm0\n",
"| 1* | | | | | | | | | vmovaps zmm29, zmm0\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm13, zmm11\n",
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm11, zmm13, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k0, zmm13, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm11, zmm11\n",
"| 1 | 1.0 | | | | | | | | knotw k1, k0\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm13{k1}, zmm11, zmm13\n",
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm13{k1}, zmm12, zmm13\n",
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm13, zmm4\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm20, ymm14, ymm5\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm15, ymm14\n",
"| 1 | 1.0 | | | | | | | | vaddpd zmm16, zmm1, zmm15\n",
"| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm2, zmm16\n",
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm17, zmm17, zmm0\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm19, zmm17\n",
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm17, zmm19, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k2, zmm19, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm17, zmm17\n",
"| 1 | 1.0 | | | | | | | | knotw k3, k2\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm17, zmm19\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm18, zmm19\n",
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm4\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm26, ymm20, ymm5\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm21, ymm20\n",
"| 1 | | | | | | 1.0 | | | vaddpd zmm22, zmm1, zmm21\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm23, zmm2, zmm22\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm23, zmm23, zmm0\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm25, zmm23\n",
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm23, zmm25, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k4, zmm25, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm24, zmm23, zmm23\n",
"| 1 | 1.0 | | | | | | | | knotw k5, k4\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm25{k5}, zmm23, zmm25\n",
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm25{k5}, zmm24, zmm25\n",
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm25, zmm4\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm27, ymm26\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm9, ymm26, ymm5\n",
"| 1 | 1.0 | | | | | | | | vaddpd zmm28, zmm1, zmm27\n",
"| 1 | | | | | | 1.0 | | | vmulpd zmm8, zmm2, zmm28\n",
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm29, zmm8, zmm8\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm31, zmm29\n",
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm29, zmm31, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k6, zmm31, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm30, zmm29, zmm29\n",
"| 1 | 1.0 | | | | | | | | knotw k7, k6\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm29, zmm31\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm30, zmm31\n",
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm31, zmm4\n",
"| 1* | | | | | | | | | cmp ecx, edx\n",
"| 0*F | | | | | | | | | jb 0xfffffffffffffeb3\n",
"Total Num Of Uops: 71\n",
"Analysis Notes:\n",
"Backend allocation was stalled due to unavailable allocation resources.\n",
"</pre>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -307,7 +973,8 @@
" for l in r['analyzed kernel']\n",
" if l['instruction']]))\n",
"for a in archs:\n",
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')"
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')\n",
"get_info((\"SKX\", \"icc\", \"O3\", \"pi\"))"
]
},
{
@@ -343,7 +1010,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 25,
"metadata": {
"hideCode": false,
"hidePrompt": false,