improved register range and list support on AArch64

This commit is contained in:
Julian Hammer
2021-04-23 13:08:26 +02:00
parent 0c295dc847
commit 1f32252f91
5 changed files with 786 additions and 26 deletions

View File

@@ -1132,6 +1132,27 @@ instruction_forms:
throughput: 2.0
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
- name: ld2d
operands:
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: p
predication: '*'
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
port_pressure: [[2, '56'], [4, ['5D', '6D']]]
- name: ldp
operands:
- class: register
@@ -1980,6 +2001,27 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p5+1*p6+1*p0
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
- name: st2d
operands:
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: 'z'
shape: 'd'
- class: register
prefix: p
predication: '*'
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 1*p5+1*p6+1*p0
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
- name: sub
operands:
- class: register

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python3
from copy import deepcopy
import pyparsing as pp
from osaca.parser import AttrDict, BaseParser
@@ -240,7 +239,7 @@ class ParserAArch64(BaseParser):
# 1. Parse comment
try:
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())[0]
result = AttrDict.convert_dict(result)
instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID])
except pp.ParseException:
@@ -249,7 +248,7 @@ class ParserAArch64(BaseParser):
try:
result = self.process_operand(
self.llvm_markers.parseString(line, parseAll=True).asDict()
)
)[0]
result = AttrDict.convert_dict(result)
instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID])
except pp.ParseException:
@@ -257,7 +256,7 @@ class ParserAArch64(BaseParser):
# 2. Parse label
if result is None:
try:
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())[0]
result = AttrDict.convert_dict(result)
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
if self.COMMENT_ID in result[self.LABEL_ID]:
@@ -272,7 +271,7 @@ class ParserAArch64(BaseParser):
try:
result = self.process_operand(
self.directive.parseString(line, parseAll=True).asDict()
)
)[0]
result = AttrDict.convert_dict(result)
instruction_form[self.DIRECTIVE_ID] = AttrDict(
{
@@ -292,7 +291,6 @@ class ParserAArch64(BaseParser):
try:
result = self.parse_instruction(line)
except (pp.ParseException, KeyError) as e:
raise e
raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
@@ -313,19 +311,19 @@ class ParserAArch64(BaseParser):
# Add operands to list
# Check first operand
if "operand1" in result:
operands.append(self.process_operand(result["operand1"]))
operands += self.process_operand(result["operand1"])
# Check second operand
if "operand2" in result:
operands.append(self.process_operand(result["operand2"]))
operands += self.process_operand(result["operand2"])
# Check third operand
if "operand3" in result:
operands.append(self.process_operand(result["operand3"]))
operands += self.process_operand(result["operand3"])
# Check fourth operand
if "operand4" in result:
operands.append(self.process_operand(result["operand4"]))
operands += self.process_operand(result["operand4"])
# Check fifth operand
if "operand5" in result:
operands.append(self.process_operand(result["operand5"]))
operands += self.process_operand(result["operand5"])
return_dict = AttrDict(
{
@@ -342,23 +340,23 @@ class ParserAArch64(BaseParser):
"""Post-process operand"""
# structure memory addresses
if self.MEMORY_ID in operand:
return self.process_memory_address(operand[self.MEMORY_ID])
return [self.process_memory_address(operand[self.MEMORY_ID])]
# structure register lists
if self.REGISTER_ID in operand and (
"list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID]
):
# TODO: discuss if ranges should be converted to lists
return self.process_register_list(operand[self.REGISTER_ID])
# resolve ranges and lists
return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID]))
if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"] == "sp":
return self.process_sp_register(operand[self.REGISTER_ID])
return [self.process_sp_register(operand[self.REGISTER_ID])]
# add value attribute to floating point immediates without exponent
if self.IMMEDIATE_ID in operand:
return self.process_immediate(operand[self.IMMEDIATE_ID])
return [self.process_immediate(operand[self.IMMEDIATE_ID])]
if self.LABEL_ID in operand:
return self.process_label(operand[self.LABEL_ID])
return [self.process_label(operand[self.LABEL_ID])]
if self.IDENTIFIER_ID in operand:
return self.process_identifier(operand[self.IDENTIFIER_ID])
return operand
return [self.process_identifier(operand[self.IDENTIFIER_ID])]
return [operand]
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
@@ -391,6 +389,36 @@ class ParserAArch64(BaseParser):
reg["prefix"] = "x"
return AttrDict({self.REGISTER_ID: reg})
def resolve_range_list(self, operand):
"""
Resolve range or list register operand to list of registers.
Returns None if neither list nor range
"""
if 'register' in operand:
if 'list' in operand.register:
index = operand.register.get('index')
l = []
for reg in operand.register.list:
reg = deepcopy(reg)
if index is not None:
reg.index = index
l.append(AttrDict({self.REGISTER_ID: reg}))
return l
elif 'range' in operand.register:
base_register = operand.register.range[0]
index = operand.register.get('index')
l = []
start_name = base_register.name
end_name = operand.register.range[1].name
for name in range(int(start_name), int(end_name)+1):
reg = deepcopy(base_register)
if index is not None:
reg['index'] = operand.register.range.index
reg['name'] = str(name)
l.append(AttrDict({self.REGISTER_ID: reg}))
return l
def process_register_list(self, register_list):
"""Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})"""
# Remove unnecessarily created dictionary entries during parsing

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python3
from itertools import chain
from copy import deepcopy
from osaca import utils
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
@@ -122,6 +123,7 @@ class ISASemantics(object):
"pre_indexed": pre_indexed,
"post_indexed": post_indexed})
)
# store operand list in dict and reassign operand key/value pair
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
# assign LD/ST flags
@@ -130,6 +132,7 @@ class ISASemantics(object):
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
if self._has_store(instruction_form):
instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
def get_reg_changes(self, instruction_form, only_postindexed=False):
"""

View File

@@ -34,6 +34,8 @@ class TestSemanticTools(unittest.TestCase):
cls.code_aarch64_memdep = f.read()
with open(cls._find_file("kernel_aarch64.s")) as f:
cls.code_AArch64 = f.read()
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
cls.code_AArch64_SVE = f.read()
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
cls.kernel_x86_memdep = reduce_to_section(
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
@@ -41,6 +43,8 @@ class TestSemanticTools(unittest.TestCase):
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
cls.kernel_aarch64_memdep = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
cls.kernel_aarch64_SVE = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64")
# set up machine models
cls.machine_model_csx = MachineModel(
@@ -49,6 +53,9 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
)
cls.machine_model_a64fx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")
)
cls.semantics_x86 = ISASemantics("x86")
cls.semantics_csx = ArchSemantics(
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
@@ -58,6 +65,10 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_tx2,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
)
cls.semantics_a64fx = ArchSemantics(
cls.machine_model_a64fx,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
)
cls.machine_model_zen = MachineModel(arch="zen1")
for i in range(len(cls.kernel_x86)):
@@ -72,6 +83,9 @@ class TestSemanticTools(unittest.TestCase):
for i in range(len(cls.kernel_aarch64_memdep)):
cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
for i in range(len(cls.kernel_aarch64_SVE)):
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
###########
# Tests
@@ -320,6 +334,11 @@ class TestSemanticTools(unittest.TestCase):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath="/dev/null")
def test_kernelDG_SVE(self):
dg = KernelDG(self.kernel_aarch64_SVE, self.parser_AArch64, self.machine_model_a64fx,
self.semantics_a64fx)
# TODO check for correct analysis
def test_hidden_load(self):
machine_model_hld = MachineModel(
@@ -421,6 +440,7 @@ class TestSemanticTools(unittest.TestCase):
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
def test_is_read_is_written_AArch64(self):
# independent form HW model

View File

@@ -232,7 +232,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -284,8 +284,10 @@
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"execution_count": 27,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
@@ -296,8 +298,672 @@
"ZEN has 156 tests, compiled to 126 unique assembly representations.\n",
"ZEN2 has 156 tests, compiled to 126 unique assembly representations.\n",
"TX2 has 104 tests, compiled to 78 unique assembly representations.\n",
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n"
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n",
"High-level iterations in assembly block: 16\n",
"Measured: 1.1903856655856655\n",
"IACA Predicted: 1.96875 TP: 1.875 LCD: None CP: None\n",
"Ithemal Predicted: nan TP: None LCD: None CP: None\n",
"LLVM-MCA Predicted: 2.240625 TP: 1.948125 LCD: 2.240625 CP: 3.8125\n",
"OSACA Predicted: 1.875 TP: 1.875 LCD: 0.5 CP: 2.75\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space: pre !important;\">Open Source Architecture Code Analyzer (OSACA) - 0.3.14\n",
"Analyzed file: build/SKX/icc/O3/pi.marked.s\n",
"Architecture: SKX\n",
"Timestamp: 2021-04-15 12:15:40\n",
"\n",
"\n",
" P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction\n",
" * - Instruction micro-ops not bound to a port\n",
" X - No throughput/latency information for this instruction in data file\n",
"\n",
"\n",
"Combined Analysis Report\n",
"------------------------\n",
" Port pressure in cycles \n",
" | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |\n",
"-------------------------------------------------------------------------------------------------\n",
" 62 | | | | | | | | || | | # pointer_increment=128 fa3c665ee18e1e5f704c8a6026891c36\n",
" 63 | | | | | | | | || | | ..B1.4: # Preds ..B1.4 ..B1.3\n",
" 64 | | | | | | | | || | | # Execution count [5.00e+00]\n",
" 65 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $32, %ecx #16.5\n",
" 66 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm5, %ymm9, %ymm14 #17.9\n",
" 67 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm9, %zmm8 #17.14\n",
" 68 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm8, %zmm1, %zmm10 #17.18\n",
" 69 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm2, %zmm11 #17.25\n",
" 70 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm11, %zmm11 #18.38\n",
" 71 | | | | | | | | || | | * vmovaps %zmm0, %zmm29 #18.38\n",
" 72 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm11, %zmm13 #18.38\n",
" 73 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11 #18.38\n",
" 74 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm13, %k0 #18.38\n",
" 75 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm11, %zmm11, %zmm12 #18.38\n",
" 76 | 1.00 | | | | | | | || | | knotw %k0, %k1 #18.38\n",
" 77 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm11, %zmm13{%k1} #18.38\n",
" 78 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm12, %zmm13{%k1} #18.38\n",
" 79 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38\n",
" 80 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm14, %ymm20 #17.9\n",
" 81 | 0.50 | | | | | 1.50 | | || 7.0 | | vcvtdq2pd %ymm14, %zmm15 #17.14\n",
" 82 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddpd %zmm15, %zmm1, %zmm16 #17.18\n",
" 83 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm16, %zmm2, %zmm17 #17.25\n",
" 84 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm0, %zmm17, %zmm17 #18.38\n",
" 85 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm17, %zmm19 #18.38\n",
" 86 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17 #18.38\n",
" 87 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm19, %k2 #18.38\n",
" 88 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm17, %zmm18 #18.38\n",
" 89 | 1.00 | | | | | | | || | | knotw %k2, %k3 #18.38\n",
" 90 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm19, %zmm17, %zmm19{%k3} #18.38\n",
" 91 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm19, %zmm18, %zmm19{%k3} #18.38\n",
" 92 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38\n",
" 93 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm20, %ymm26 #17.9\n",
" 94 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm20, %zmm21 #17.14\n",
" 95 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm21, %zmm1, %zmm22 #17.18\n",
" 96 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm22, %zmm2, %zmm23 #17.25\n",
" 97 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm23, %zmm23 #18.38\n",
" 98 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm23, %zmm25 #18.38\n",
" 99 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23 #18.38\n",
" 100 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm25, %k4 #18.38\n",
" 101 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm23, %zmm23, %zmm24 #18.38\n",
" 102 | 1.00 | | | | | | | || | | knotw %k4, %k5 #18.38\n",
" 103 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm23, %zmm25{%k5} #18.38\n",
" 104 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm24, %zmm25{%k5} #18.38\n",
" 105 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm25, %zmm6 #18.38\n",
" 106 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm26, %zmm27 #17.14\n",
" 107 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm26, %ymm9 #17.9\n",
" 108 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm27, %zmm1, %zmm28 #17.18\n",
" 109 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm28, %zmm2, %zmm8 #17.25\n",
" 110 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm8, %zmm8, %zmm29 #18.38\n",
" 111 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm29, %zmm31 #18.38\n",
" 112 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29 #18.38\n",
" 113 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm31, %k6 #18.38\n",
" 114 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm29, %zmm29, %zmm30 #18.38\n",
" 115 | 1.00 | | | | | | | || | | knotw %k6, %k7 #18.38\n",
" 116 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm29, %zmm31{%k7} #18.38\n",
" 117 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm30, %zmm31{%k7} #18.38\n",
" 118 | 0.00 | | | | | 1.00 | | || 0.0 | 4.0 | vfmadd231pd %zmm4, %zmm31, %zmm3 #18.38\n",
" 119 | 0.00 | 0.34 | | | | 0.00 | 0.66 | || | | cmpl %edx, %ecx #16.5\n",
" 120 | 0.00 | | | | | | 1.00 | || | | jb ..B1.4 # Prob 82% #16.5\n",
"\n",
" 30.0 4.34 2.00 2.00 2.00 2.00 30.0 2.66 44 8.0 \n",
"\n",
"\n",
"Loop-Carried Dependencies Analysis Report\n",
"-----------------------------------------\n",
" 92 | 8.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38| [92, 118]\n",
" 79 | 8.0 | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38| [79, 105]\n",
" 66 | 4.0 | vpaddd %ymm5, %ymm9, %ymm14 #17.9| [66, 80, 93, 107]\n",
" 65 | 1.0 | addl $32, %ecx #16.5| [65]\n",
"</pre>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space: pre !important;\">Iterations: 100\n",
"Instructions: 5600\n",
"Total Cycles: 3585\n",
"Total uOps: 7200\n",
"\n",
"Dispatch Width: 6\n",
"uOps Per Cycle: 2.01\n",
"IPC: 1.56\n",
"Block RThroughput: 18.0\n",
"\n",
"\n",
"Instruction Info:\n",
"[1]: #uOps\n",
"[2]: Latency\n",
"[3]: RThroughput\n",
"[4]: MayLoad\n",
"[5]: MayStore\n",
"[6]: HasSideEffects (U)\n",
"\n",
"[1] [2] [3] [4] [5] [6] Instructions:\n",
" 1 1 0.25 addl\t$32, %ecx\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm9, %ymm14\n",
" 2 7 1.00 vcvtdq2pd\t%ymm9, %zmm8\n",
" 1 4 0.50 vaddpd\t%zmm8, %zmm1, %zmm10\n",
" 1 4 0.50 vmulpd\t%zmm10, %zmm2, %zmm11\n",
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
" 1 1 0.33 vmovaps\t%zmm0, %zmm29\n",
" 3 4 2.00 vrcp14pd\t%zmm11, %zmm13\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm13, %k0\n",
" 1 4 0.50 vmulpd\t%zmm11, %zmm11, %zmm12\n",
" 1 1 1.00 knotw\t%k0, %k1\n",
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm14, %ymm20\n",
" 2 7 1.00 vcvtdq2pd\t%ymm14, %zmm15\n",
" 1 4 0.50 vaddpd\t%zmm15, %zmm1, %zmm16\n",
" 1 4 0.50 vmulpd\t%zmm16, %zmm2, %zmm17\n",
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
" 3 4 2.00 vrcp14pd\t%zmm17, %zmm19\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm19, %k2\n",
" 1 4 0.50 vmulpd\t%zmm17, %zmm17, %zmm18\n",
" 1 1 1.00 knotw\t%k2, %k3\n",
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm20, %ymm26\n",
" 2 7 1.00 vcvtdq2pd\t%ymm20, %zmm21\n",
" 1 4 0.50 vaddpd\t%zmm21, %zmm1, %zmm22\n",
" 1 4 0.50 vmulpd\t%zmm22, %zmm2, %zmm23\n",
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
" 3 4 2.00 vrcp14pd\t%zmm23, %zmm25\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm25, %k4\n",
" 1 4 0.50 vmulpd\t%zmm23, %zmm23, %zmm24\n",
" 1 1 1.00 knotw\t%k4, %k5\n",
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
" 2 7 1.00 vcvtdq2pd\t%ymm26, %zmm27\n",
" 1 1 0.33 vpaddd\t%ymm5, %ymm26, %ymm9\n",
" 1 4 0.50 vaddpd\t%zmm27, %zmm1, %zmm28\n",
" 1 4 0.50 vmulpd\t%zmm28, %zmm2, %zmm8\n",
" 1 4 0.50 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
" 3 4 2.00 vrcp14pd\t%zmm29, %zmm31\n",
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
" 1 4 1.00 vfpclasspd\t$30, %zmm31, %k6\n",
" 1 4 0.50 vmulpd\t%zmm29, %zmm29, %zmm30\n",
" 1 1 1.00 knotw\t%k6, %k7\n",
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
" 1 1 0.25 cmpl\t%edx, %ecx\n",
" 1 1 0.50 jb\t..B1.4\n",
"\n",
"\n",
"Resources:\n",
"[0] - SKXDivider\n",
"[1] - SKXFPDivider\n",
"[2] - SKXPort0\n",
"[3] - SKXPort1\n",
"[4] - SKXPort2\n",
"[5] - SKXPort3\n",
"[6] - SKXPort4\n",
"[7] - SKXPort5\n",
"[8] - SKXPort6\n",
"[9] - SKXPort7\n",
"\n",
"\n",
"Resource pressure per iteration:\n",
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] \n",
" - - 31.17 5.72 2.00 2.00 - 29.10 2.01 - \n",
"\n",
"Resource pressure by instruction:\n",
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:\n",
" - - - 0.80 - - - 0.19 0.01 - addl\t$32, %ecx\n",
" - - 0.07 0.92 - - - 0.01 - - vpaddd\t%ymm5, %ymm9, %ymm14\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm9, %zmm8\n",
" - - 0.42 - - - - 0.58 - - vaddpd\t%zmm8, %zmm1, %zmm10\n",
" - - 0.51 - - - - 0.49 - - vmulpd\t%zmm10, %zmm2, %zmm11\n",
" - - 0.45 - - - - 0.55 - - vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
" - - - 1.00 - - - - - - vmovaps\t%zmm0, %zmm29\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm11, %zmm13\n",
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm13, %k0\n",
" - - 0.49 - - - - 0.51 - - vmulpd\t%zmm11, %zmm11, %zmm12\n",
" - - 1.00 - - - - - - - knotw\t%k0, %k1\n",
" - - 0.44 - - - - 0.56 - - vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
" - - 0.70 - - - - 0.30 - - vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm14, %ymm20\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm14, %zmm15\n",
" - - 0.48 - - - - 0.52 - - vaddpd\t%zmm15, %zmm1, %zmm16\n",
" - - 0.42 - - - - 0.58 - - vmulpd\t%zmm16, %zmm2, %zmm17\n",
" - - 0.32 - - - - 0.68 - - vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm17, %zmm19\n",
" - - 0.32 - 1.00 - - 0.68 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm19, %k2\n",
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm17, %zmm17, %zmm18\n",
" - - 1.00 - - - - - - - knotw\t%k2, %k3\n",
" - - 0.53 - - - - 0.47 - - vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
" - - 0.57 - - - - 0.43 - - vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm20, %ymm26\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm20, %zmm21\n",
" - - 0.52 - - - - 0.48 - - vaddpd\t%zmm21, %zmm1, %zmm22\n",
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm22, %zmm2, %zmm23\n",
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm23, %zmm25\n",
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm25, %k4\n",
" - - 0.53 - - - - 0.47 - - vmulpd\t%zmm23, %zmm23, %zmm24\n",
" - - 1.00 - - - - - - - knotw\t%k4, %k5\n",
" - - 0.42 - - - - 0.58 - - vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
" - - 0.60 - - - - 0.40 - - vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm26, %zmm27\n",
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm26, %ymm9\n",
" - - 0.26 - - - - 0.74 - - vaddpd\t%zmm27, %zmm1, %zmm28\n",
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm28, %zmm2, %zmm8\n",
" - - 0.34 - - - - 0.66 - - vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm29, %zmm31\n",
" - - 0.34 - 1.00 - - 0.66 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm31, %k6\n",
" - - 0.52 - - - - 0.48 - - vmulpd\t%zmm29, %zmm29, %zmm30\n",
" - - 1.00 - - - - - - - knotw\t%k6, %k7\n",
" - - 0.47 - - - - 0.53 - - vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
" - - 0.66 - - - - 0.34 - - vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
" - - - - - - - - 1.00 - cmpl\t%edx, %ecx\n",
" - - - - - - - - 1.00 - jb\t..B1.4\n",
"\n",
"\n",
"Timeline view:\n",
" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 \n",
"Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678\n",
"\n",
"[0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
"[0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[0,2] D=eeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[0,3] D========eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[0,4] D============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[0,5] .D===============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[0,6] .DeE------------------R . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[0,7] .D===================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[0,8] . D======================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[0,9] . D======================eeeeE-------R . . . . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[0,10] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[0,11] . D==========================eE----------R . . . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
"[0,12] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[0,13] . D====================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[0,14] . D========================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[0,15] . DeE-------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[0,16] . DeeeeeeeE-------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[0,17] . D=======eeeeE---------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[0,18] . D==========eeeeE-----------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[0,19] . D==============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[0,20] . D==================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[0,21] . D=====================eeeeeeeeeeeE----------R. . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[0,22] . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[0,23] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[0,24] . D==========================eE---------------R. . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
"[0,25] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[0,26] . .D===================================eeeeE--R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[0,27] . .D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[0,28] . .DeE------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[0,29] . .DeeeeeeeE------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[0,30] . .D=======eeeeE--------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[0,31] . . D==========eeeeE----------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[0,32] . . D==============eeeeE------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[0,33] . . D==================eeeeE--------------------R . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
"[0,34] . . D=====================eeeeeeeeeeeE---------R . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[0,35] . . D=====================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
"[0,36] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[0,37] . . D==========================eE--------------R . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
"[0,38] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[0,39] . . D===================================eeeeE-R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[0,40] . . D=======================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[0,41] . . DeeeeeeeE------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
"[0,42] . . DeE------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[0,43] . . D=======eeeeE--------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[0,44] . . D=============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[0,45] . . D=================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[0,46] . . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
"[0,47] . . .D=========================eeeeeeeeeeeE-----R. . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[0,48] . . .D=========================eeeeE------------R. . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
"[0,49] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[0,50] . . .D==============================eE----------R. . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
"[0,51] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[0,52] . . . D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[0,53] . . . D===========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[0,54] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
"[0,55] . . . D=eE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . jb\t..B1.4\n",
"[1,0] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
"[1,1] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[1,2] . . . D==eeeeeeeE-------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[1,3] . . . D===============eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[1,4] . . . D====================eeeeE----------------------R . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[1,5] . . . D=========================eeeeE-----------------R . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[1,6] . . . DeE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[1,7] . . . D============================eeeeE-------------R . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[1,8] . . . D================================eeeeeeeeeeeE--R . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[1,9] . . . D================================eeeeE---------R . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[1,10] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[1,11] . . . D====================================eE---------R . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
"[1,12] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[1,13] . . . D==============================================eeeeER . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[1,14] . . . D==================================================eeeeER . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[1,15] . . . DeE-----------------------------------------------------R . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[1,16] . . . .D===eeeeeeeE-------------------------------------------R . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[1,17] . . . .D==============eeeeE-----------------------------------R . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[1,18] . . . .D==================eeeeE-------------------------------R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[1,19] . . . .D======================eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[1,20] . . . . D================================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[1,21] . . . . D====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[1,22] . . . . D=====================================eeeeE-----------R . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[1,23] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[1,24] . . . . D========================================eE----------R . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
"[1,25] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[1,26] . . . . D==================================================eeeeER. . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[1,27] . . . . D======================================================eeeeER . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[1,28] . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[1,29] . . . . D=================================eeeeeeeE-----------------R . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[1,30] . . . . D========================================eeeeE-------------R . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[1,31] . . . . D===========================================eeeeE---------R . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[1,32] . . . . .D==============================================eeeeE-----R . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[1,33] . . . . . D=================================================eeeeE-R . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
"[1,34] . . . . . D====================================================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[1,35] . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
"[1,36] . . . . . D=============================================================eeeeER . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[1,37] . . . . . D======================================================eE----------R . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
"[1,38] . . . . . .D============================================================eeeeER . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[1,39] . . . . . . D===============================================================eeeeER . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[1,40] . . . . . . D==================================================================eeeeER . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[1,41] . . . . . . D============================eeeeeeeE-----------------------------------R . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
"[1,42] . . . . . . DeE--------------------------------------------------------------------R . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[1,43] . . . . . . D==================================eeeeE-------------------------------R . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[1,44] . . . . . . D=====================================eeeeE---------------------------R . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[1,45] . . . . . . D===========================================eeeeE---------------------R . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[1,46] . . . . . . D===============================================eeeeE-----------------R . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
"[1,47] . . . . . . .D==================================================eeeeeeeeeeeE------R . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[1,48] . . . . . . . D=================================================eeeeE-------------R . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
"[1,49] . . . . . . . D===========================================================eeeeE--R . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[1,50] . . . . . . . D=====================================================eE----------R . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
"[1,51] . . . . . . . D==========================================================eeeeE-R . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[1,52] . . . . . . . D==============================================================eeeeER . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[1,53] . . . . . . . .D=================================================================eeeeER . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[1,54] . . . . . . . .DeE--------------------------------------------------------------------R . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
"[1,55] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . jb\t..B1.4\n",
"[2,0] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . addl\t$32, %ecx\n",
"[2,1] . . . . . . . . D=eE------------------------------------------------------------------R . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[2,2] . . . . . . . . D======================eeeeeeeE--------------------------------------R . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[2,3] . . . . . . . . D==============================eeeeE---------------------------------R . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[2,4] . . . . . . . . D===================================eeeeE----------------------------R . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[2,5] . . . . . . . . D========================================eeeeE-----------------------R . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[2,6] . . . . . . . . DeE-----------------------------------------------------------------R . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[2,7] . . . . . . . . D===========================================eeeeE-------------------R . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[2,8] . . . . . . . . D================================================eeeeeeeeeeeE-------R . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[2,9] . . . . . . . . D================================================eeeeE-------------R . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[2,10] . . . . . . . . D==========================================================eeeeE---R . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[2,11] . . . . . . . . .D======================================================eE---------R . . . . . . . . . . . . . knotw\t%k0, %k1\n",
"[2,12] . . . . . . . . .D=========================================================eeeeE---R . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[2,13] . . . . . . . . . D============================================================eeeeER . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[2,14] . . . . . . . . . D================================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[2,15] . . . . . . . . . DeE------------------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[2,16] . . . . . . . . . D==================eeeeeeeE-----------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[2,17] . . . . . . . . . D=========================eeeeE-------------------------------------R . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[2,18] . . . . . . . . . D=============================eeeeE--------------------------------R . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[2,19] . . . . . . . . . .D=================================eeeeE---------------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[2,20] . . . . . . . . . . D=====================================eeeeE----------------------R . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[2,21] . . . . . . . . . . D=========================================eeeeeeeeeeeE-----------R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[2,22] . . . . . . . . . . D=========================================eeeeE-----------------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[2,23] . . . . . . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[2,24] . . . . . . . . . . D===============================================eE-------------R . . . . . . . . . . . . knotw\t%k2, %k3\n",
"[2,25] . . . . . . . . . . D=================================================eeeeE-------R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[2,26] . . . . . . . . . . . D===================================================eeeeE---R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[2,27] . . . . . . . . . . . D=======================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[2,28] . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[2,29] . . . . . . . . . . . D============eeeeeeeE--------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[2,30] . . . . . . . . . . . D====================eeeeE---------------------------------R . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[2,31] . . . . . . . . . . . D=========================eeeeE---------------------------R . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[2,32] . . . . . . . . . . . .D=============================eeeeE----------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[2,33] . . . . . . . . . . . . D==================================eeeeE----------------R . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
"[2,34] . . . . . . . . . . . . D=====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[2,35] . . . . . . . . . . . . D======================================eeeeE-----------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
"[2,36] . . . . . . . . . . . . D===============================================eeeeE-R . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[2,37] . . . . . . . . . . . . D========================================eE----------R . . . . . . . . . . . . knotw\t%k4, %k5\n",
"[2,38] . . . . . . . . . . . . .D==============================================eeeeER . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[2,39] . . . . . . . . . . . . . D=================================================eeeeER . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[2,40] . . . . . . . . . . . . . D====================================================eeeeER . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[2,41] . . . . . . . . . . . . . D======eeeeeeeE------------------------------------------R . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
"[2,42] . . . . . . . . . . . . . DeE------------------------------------------------------R . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[2,43] . . . . . . . . . . . . . D===============eeeeE-----------------------------------R . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[2,44] . . . . . . . . . . . . . D========================eeeeE--------------------------R . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[2,45] . . . . . . . . . . . . . D============================eeeeE----------------------R . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[2,46] . . . . . . . . . . . . . .D======================================eeeeE-----------R . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
"[2,47] . . . . . . . . . . . . . . D=========================================eeeeeeeeeeeER . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[2,48] . . . . . . . . . . . . . . D=========================================eeeeE------R . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
"[2,49] . . . . . . . . . . . . . . D===================================================eeeeER . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[2,50] . . . . . . . . . . . . . . D============================================eE---------R . . . . . . . . . knotw\t%k6, %k7\n",
"[2,51] . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[2,52] . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[2,53] . . . . . . . . . . . . . . .D========================================================eeeeER . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[2,54] . . . . . . . . . . . . . . . DeE----------------------------------------------------------R . . . . . . . . cmpl\t%edx, %ecx\n",
"[2,55] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . jb\t..B1.4\n",
"[3,0] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . addl\t$32, %ecx\n",
"[3,1] . . . . . . . . . . . . . . . DeE--------------------------------------------------------R . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
"[3,2] . . . . . . . . . . . . . . . D==eeeeeeeE------------------------------------------------R . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
"[3,3] . . . . . . . . . . . . . . . D=========eeeeE--------------------------------------------R . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
"[3,4] . . . . . . . . . . . . . . . D================eeeeE-------------------------------------R . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
"[3,5] . . . . . . . . . . . . . . . D===================eeeeE---------------------------------R . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"[3,6] . . . . . . . . . . . . . . . DeE-------------------------------------------------------R . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
"[3,7] . . . . . . . . . . . . . . . D===================================eeeeE-----------------R . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
"[3,8] . . . . . . . . . . . . . . . .D======================================eeeeeeeeeeeE------R . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"[3,9] . . . . . . . . . . . . . . . .D=======================================eeeeE------------R . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
"[3,10] . . . . . . . . . . . . . . . .D=================================================eeeeE--R . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
"[3,11] . . . . . . . . . . . . . . . . D===========================================eE----------R . . . . . . . . knotw\t%k0, %k1\n",
"[3,12] . . . . . . . . . . . . . . . . D===============================================eeeeE--R . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"[3,13] . . . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"[3,14] . . . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"[3,15] . . . . . . . . . . . . . . . . DeE--------------------------------------------------------R. . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
"[3,16] . . . . . . . . . . . . . . . . .D===============================eeeeeeeE------------------R. . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
"[3,17] . . . . . . . . . . . . . . . . .D=======================================eeeeE-------------R. . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
"[3,18] . . . . . . . . . . . . . . . . .D===========================================eeeeE---------R. . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
"[3,19] . . . . . . . . . . . . . . . . . D==============================================eeeeE-----R. . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"[3,20] . . . . . . . . . . . . . . . . . D==================================================eeeeE-R. . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
"[3,21] . . . . . . . . . . . . . . . . . D=====================================================eeeeeeeeeeeER. . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"[3,22] . . . . . . . . . . . . . . . . . D=====================================================eeeeE------R. . . . . vfpclasspd\t$30, %zmm19, %k2\n",
"[3,23] . . . . . . . . . . . . . . . . . D==============================================================eeeeER . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
"[3,24] . . . . . . . . . . . . . . . . . .D=======================================================eE---------R . . . . knotw\t%k2, %k3\n",
"[3,25] . . . . . . . . . . . . . . . . . . D============================================================eeeeER . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"[3,26] . . . . . . . . . . . . . . . . . . D================================================================eeeeER . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"[3,27] . . . . . . . . . . . . . . . . . . D===================================================================eeeeER . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"[3,28] . . . . . . . . . . . . . . . . . . DeE----------------------------------------------------------------------R . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
"[3,29] . . . . . . . . . . . . . . . . . . D===========================eeeeeeeE------------------------------------R . . vcvtdq2pd\t%ymm20, %zmm21\n",
"[3,30] . . . . . . . . . . . . . . . . . . D==================================eeeeE--------------------------------R . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
"[3,31] . . . . . . . . . . . . . . . . . . D======================================eeeeE----------------------------R . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
"[3,32] . . . . . . . . . . . . . . . . . . D=========================================eeeeE------------------------R . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"[3,33] . . . . . . . . . . . . . . . . . . D=============================================eeeeE--------------------R . . vrcp14pd\t%zmm23, %zmm25\n",
"[3,34] . . . . . . . . . . . . . . . . . . .D================================================eeeeeeeeeeeE---------R . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"[3,35] . . . . . . . . . . . . . . . . . . .D=================================================eeeeE---------------R . . vfpclasspd\t$30, %zmm25, %k4\n",
"[3,36] . . . . . . . . . . . . . . . . . . . D==========================================================eeeeE-----R . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
"[3,37] . . . . . . . . . . . . . . . . . . . D====================================================eE-------------R . . knotw\t%k4, %k5\n",
"[3,38] . . . . . . . . . . . . . . . . . . . D========================================================eeeeE-----R . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"[3,39] . . . . . . . . . . . . . . . . . . . D============================================================eeeeE-R . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"[3,40] . . . . . . . . . . . . . . . . . . . D===============================================================eeeeER. . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"[3,41] . . . . . . . . . . . . . . . . . . . D======================eeeeeeeE--------------------------------------R. . vcvtdq2pd\t%ymm26, %zmm27\n",
"[3,42] . . . . . . . . . . . . . . . . . . . .DeE-----------------------------------------------------------------R. . vpaddd\t%ymm5, %ymm26, %ymm9\n",
"[3,43] . . . . . . . . . . . . . . . . . . . .D============================eeeeE----------------------------------R. . vaddpd\t%zmm27, %zmm1, %zmm28\n",
"[3,44] . . . . . . . . . . . . . . . . . . . . D===============================eeeeE------------------------------R. . vmulpd\t%zmm28, %zmm2, %zmm8\n",
"[3,45] . . . . . . . . . . . . . . . . . . . . D=====================================eeeeE------------------------R. . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"[3,46] . . . . . . . . . . . . . . . . . . . . D=========================================eeeeE--------------------R. . vrcp14pd\t%zmm29, %zmm31\n",
"[3,47] . . . . . . . . . . . . . . . . . . . . D============================================eeeeeeeeeeeE---------R. . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"[3,48] . . . . . . . . . . . . . . . . . . . . D===========================================eeeeE----------------R. . vfpclasspd\t$30, %zmm31, %k6\n",
"[3,49] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE-----R. . vmulpd\t%zmm29, %zmm29, %zmm30\n",
"[3,50] . . . . . . . . . . . . . . . . . . . . D==============================================eE---------------R. . knotw\t%k6, %k7\n",
"[3,51] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE----R. . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"[3,52] . . . . . . . . . . . . . . . . . . . . .D=========================================================eeeeER. . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"[3,53] . . . . . . . . . . . . . . . . . . . . . D============================================================eeeeER vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"[3,54] . . . . . . . . . . . . . . . . . . . . . DeE--------------------------------------------------------------R cmpl\t%edx, %ecx\n",
"[3,55] . . . . . . . . . . . . . . . . . . . . . DeE-------------------------------------------------------------R jb\t..B1.4\n",
"\n",
"\n",
"Average Wait times (based on the timeline view):\n",
"[0]: Executions\n",
"[1]: Average time spent waiting in a scheduler's queue\n",
"[2]: Average time spent waiting in a scheduler's queue while ready\n",
"[3]: Average time elapsed from WB until retire stage\n",
"\n",
" [0] [1] [2] [3]\n",
"0. 4 1.0 1.0 42.5 addl\t$32, %ecx\n",
"1. 4 1.3 1.3 42.0 vpaddd\t%ymm5, %ymm9, %ymm14\n",
"2. 4 7.8 7.8 30.8 vcvtdq2pd\t%ymm9, %zmm8\n",
"3. 4 16.5 1.8 26.0 vaddpd\t%zmm8, %zmm1, %zmm10\n",
"4. 4 21.8 1.3 21.8 vmulpd\t%zmm10, %zmm2, %zmm11\n",
"5. 4 25.8 0.5 18.3 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
"6. 4 1.0 1.0 45.8 vmovaps\t%zmm0, %zmm29\n",
"7. 4 32.3 3.0 12.3 vrcp14pd\t%zmm11, %zmm13\n",
"8. 4 36.0 0.3 3.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
"9. 4 36.3 0.8 10.3 vfpclasspd\t$30, %zmm13, %k0\n",
"10. 4 46.5 0.0 1.3 vmulpd\t%zmm11, %zmm11, %zmm12\n",
"11. 4 40.8 1.3 9.5 knotw\t%k0, %k1\n",
"12. 4 45.8 0.0 1.3 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
"13. 4 49.0 0.0 0.0 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
"14. 4 52.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
"15. 4 1.0 1.0 54.5 vpaddd\t%ymm5, %ymm14, %ymm20\n",
"16. 4 14.0 14.0 34.8 vcvtdq2pd\t%ymm14, %zmm15\n",
"17. 4 22.3 1.3 29.5 vaddpd\t%zmm15, %zmm1, %zmm16\n",
"18. 4 26.0 0.3 25.3 vmulpd\t%zmm16, %zmm2, %zmm17\n",
"19. 4 29.8 0.3 21.0 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
"20. 4 35.3 2.0 15.0 vrcp14pd\t%zmm17, %zmm19\n",
"21. 4 38.8 0.0 6.5 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
"22. 4 39.3 1.0 12.5 vfpclasspd\t$30, %zmm19, %k2\n",
"23. 4 48.8 0.0 3.5 vmulpd\t%zmm17, %zmm17, %zmm18\n",
"24. 4 43.0 0.8 11.8 knotw\t%k2, %k3\n",
"25. 4 47.8 0.0 3.5 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
"26. 4 51.0 0.0 1.3 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
"27. 4 54.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
"28. 4 1.0 1.0 56.5 vpaddd\t%ymm5, %ymm20, %ymm26\n",
"29. 4 19.0 19.0 31.8 vcvtdq2pd\t%ymm20, %zmm21\n",
"30. 4 26.3 0.3 27.5 vaddpd\t%zmm21, %zmm1, %zmm22\n",
"31. 4 30.0 0.5 23.0 vmulpd\t%zmm22, %zmm2, %zmm23\n",
"32. 4 33.5 0.3 18.8 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
"33. 4 37.5 0.5 14.3 vrcp14pd\t%zmm23, %zmm25\n",
"34. 4 40.5 0.0 5.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
"35. 4 40.8 0.5 12.3 vfpclasspd\t$30, %zmm25, %k4\n",
"36. 4 50.5 0.0 2.8 vmulpd\t%zmm23, %zmm23, %zmm24\n",
"37. 4 44.0 0.5 11.8 knotw\t%k4, %k5\n",
"38. 4 49.5 0.3 2.5 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
"39. 4 52.8 0.0 0.5 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
"40. 4 56.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
"41. 4 15.0 15.0 37.8 vcvtdq2pd\t%ymm26, %zmm27\n",
"42. 4 1.0 1.0 57.3 vpaddd\t%ymm5, %ymm26, %ymm9\n",
"43. 4 22.0 0.8 33.0 vaddpd\t%zmm27, %zmm1, %zmm28\n",
"44. 4 27.3 2.0 27.0 vmulpd\t%zmm28, %zmm2, %zmm8\n",
"45. 4 32.3 1.0 22.0 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
"46. 4 38.0 2.0 16.0 vrcp14pd\t%zmm29, %zmm31\n",
"47. 4 41.0 0.0 5.0 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
"48. 4 40.5 0.3 11.8 vfpclasspd\t$30, %zmm31, %k6\n",
"49. 4 51.0 0.0 2.0 vmulpd\t%zmm29, %zmm29, %zmm30\n",
"50. 4 44.3 0.8 11.0 knotw\t%k6, %k7\n",
"51. 4 50.5 0.5 1.5 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
"52. 4 53.8 0.0 0.0 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
"53. 4 57.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
"54. 4 1.0 1.0 58.5 cmpl\t%edx, %ecx\n",
"55. 4 1.3 0.0 57.5 jb\t..B1.4\n",
" 4 32.5 1.6 18.4 <total>\n",
"</pre>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space: pre !important;\">Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-30;16:57:45\n",
"Analyzed File - build/SKX/icc/O3/pi.marked.o\n",
"Binary Format - 64Bit\n",
"Architecture - SKX\n",
"Analysis Type - Throughput\n",
"\n",
"Throughput Analysis Report\n",
"--------------------------\n",
"Block Throughput: 31.50 Cycles Throughput Bottleneck: Backend\n",
"Loop Count: 103\n",
"Port Binding In Cycles Per Iteration:\n",
"--------------------------------------------------------------------------------------------------\n",
"| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
"--------------------------------------------------------------------------------------------------\n",
"| Cycles | 30.0 0.0 | 4.0 | 2.0 2.0 | 2.0 2.0 | 0.0 | 30.0 | 1.0 | 0.0 |\n",
"--------------------------------------------------------------------------------------------------\n",
"\n",
"DV - Divider pipe (on port 0)\n",
"D - Data fetch pipe (on ports 2 and 3)\n",
"F - Macro Fusion with the previous instruction occurred\n",
"* - instruction micro-ops not bound to a port\n",
"^ - Micro Fusion occurred\n",
"# - ESP Tracking sync uop was issued\n",
"@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected\n",
"X - instruction not supported, was not accounted in Analysis\n",
"\n",
"| Num Of | Ports pressure in cycles | |\n",
"| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
"-----------------------------------------------------------------------------------------\n",
"| 1 | | | | | | | 1.0 | | add ecx, 0x20\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm14, ymm9, ymm5\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm8, ymm9\n",
"| 1 | | | | | | 1.0 | | | vaddpd zmm10, zmm1, zmm8\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm11, zmm2, zmm10\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm11, zmm11, zmm0\n",
"| 1* | | | | | | | | | vmovaps zmm29, zmm0\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm13, zmm11\n",
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm11, zmm13, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k0, zmm13, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm11, zmm11\n",
"| 1 | 1.0 | | | | | | | | knotw k1, k0\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm13{k1}, zmm11, zmm13\n",
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm13{k1}, zmm12, zmm13\n",
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm13, zmm4\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm20, ymm14, ymm5\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm15, ymm14\n",
"| 1 | 1.0 | | | | | | | | vaddpd zmm16, zmm1, zmm15\n",
"| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm2, zmm16\n",
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm17, zmm17, zmm0\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm19, zmm17\n",
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm17, zmm19, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k2, zmm19, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm17, zmm17\n",
"| 1 | 1.0 | | | | | | | | knotw k3, k2\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm17, zmm19\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm18, zmm19\n",
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm4\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm26, ymm20, ymm5\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm21, ymm20\n",
"| 1 | | | | | | 1.0 | | | vaddpd zmm22, zmm1, zmm21\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm23, zmm2, zmm22\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm23, zmm23, zmm0\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm25, zmm23\n",
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm23, zmm25, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k4, zmm25, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm24, zmm23, zmm23\n",
"| 1 | 1.0 | | | | | | | | knotw k5, k4\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm25{k5}, zmm23, zmm25\n",
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm25{k5}, zmm24, zmm25\n",
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm25, zmm4\n",
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm27, ymm26\n",
"| 1 | | 1.0 | | | | | | | vpaddd ymm9, ymm26, ymm5\n",
"| 1 | 1.0 | | | | | | | | vaddpd zmm28, zmm1, zmm27\n",
"| 1 | | | | | | 1.0 | | | vmulpd zmm8, zmm2, zmm28\n",
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm29, zmm8, zmm8\n",
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm31, zmm29\n",
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm29, zmm31, qword ptr [rip]{1to8}\n",
"| 1 | | | | | | 1.0 | | | vfpclasspd k6, zmm31, 0x1e\n",
"| 1 | 1.0 | | | | | | | | vmulpd zmm30, zmm29, zmm29\n",
"| 1 | 1.0 | | | | | | | | knotw k7, k6\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm29, zmm31\n",
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm30, zmm31\n",
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm31, zmm4\n",
"| 1* | | | | | | | | | cmp ecx, edx\n",
"| 0*F | | | | | | | | | jb 0xfffffffffffffeb3\n",
"Total Num Of Uops: 71\n",
"Analysis Notes:\n",
"Backend allocation was stalled due to unavailable allocation resources.\n",
"</pre>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -307,7 +973,8 @@
" for l in r['analyzed kernel']\n",
" if l['instruction']]))\n",
"for a in archs:\n",
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')"
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')\n",
"get_info((\"SKX\", \"icc\", \"O3\", \"pi\"))"
]
},
{
@@ -343,7 +1010,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 25,
"metadata": {
"hideCode": false,
"hidePrompt": false,