mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
improved register range and list support on AArch64
This commit is contained in:
@@ -1132,6 +1132,27 @@ instruction_forms:
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[1, '0'],[1, '3'],[4, '56'], [4, ['5D', '6D']]] # not sure if we also have 4 data accesses
|
||||
- name: ld2d
|
||||
operands:
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 11.0 # 1*p0+1*p3+4*p56+1*p5D6D
|
||||
port_pressure: [[2, '56'], [4, ['5D', '6D']]]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
@@ -1980,6 +2001,27 @@ instruction_forms:
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p5+1*p6+1*p0
|
||||
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
|
||||
- name: st2d
|
||||
operands:
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: 'z'
|
||||
shape: 'd'
|
||||
- class: register
|
||||
prefix: p
|
||||
predication: '*'
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p5+1*p6+1*p0
|
||||
port_pressure: [[1, '5'], [1, '6'], [1, '0']]
|
||||
- name: sub
|
||||
operands:
|
||||
- class: register
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
from copy import deepcopy
|
||||
import pyparsing as pp
|
||||
|
||||
from osaca.parser import AttrDict, BaseParser
|
||||
@@ -240,7 +239,7 @@ class ParserAArch64(BaseParser):
|
||||
|
||||
# 1. Parse comment
|
||||
try:
|
||||
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())
|
||||
result = self.process_operand(self.comment.parseString(line, parseAll=True).asDict())[0]
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID])
|
||||
except pp.ParseException:
|
||||
@@ -249,7 +248,7 @@ class ParserAArch64(BaseParser):
|
||||
try:
|
||||
result = self.process_operand(
|
||||
self.llvm_markers.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
)[0]
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.COMMENT_ID] = " ".join(result[self.COMMENT_ID])
|
||||
except pp.ParseException:
|
||||
@@ -257,7 +256,7 @@ class ParserAArch64(BaseParser):
|
||||
# 2. Parse label
|
||||
if result is None:
|
||||
try:
|
||||
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
||||
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())[0]
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
|
||||
if self.COMMENT_ID in result[self.LABEL_ID]:
|
||||
@@ -272,7 +271,7 @@ class ParserAArch64(BaseParser):
|
||||
try:
|
||||
result = self.process_operand(
|
||||
self.directive.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
)[0]
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.DIRECTIVE_ID] = AttrDict(
|
||||
{
|
||||
@@ -292,7 +291,6 @@ class ParserAArch64(BaseParser):
|
||||
try:
|
||||
result = self.parse_instruction(line)
|
||||
except (pp.ParseException, KeyError) as e:
|
||||
raise e
|
||||
raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
|
||||
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
||||
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
||||
@@ -313,19 +311,19 @@ class ParserAArch64(BaseParser):
|
||||
# Add operands to list
|
||||
# Check first operand
|
||||
if "operand1" in result:
|
||||
operands.append(self.process_operand(result["operand1"]))
|
||||
operands += self.process_operand(result["operand1"])
|
||||
# Check second operand
|
||||
if "operand2" in result:
|
||||
operands.append(self.process_operand(result["operand2"]))
|
||||
operands += self.process_operand(result["operand2"])
|
||||
# Check third operand
|
||||
if "operand3" in result:
|
||||
operands.append(self.process_operand(result["operand3"]))
|
||||
operands += self.process_operand(result["operand3"])
|
||||
# Check fourth operand
|
||||
if "operand4" in result:
|
||||
operands.append(self.process_operand(result["operand4"]))
|
||||
operands += self.process_operand(result["operand4"])
|
||||
# Check fifth operand
|
||||
if "operand5" in result:
|
||||
operands.append(self.process_operand(result["operand5"]))
|
||||
operands += self.process_operand(result["operand5"])
|
||||
|
||||
return_dict = AttrDict(
|
||||
{
|
||||
@@ -342,23 +340,23 @@ class ParserAArch64(BaseParser):
|
||||
"""Post-process operand"""
|
||||
# structure memory addresses
|
||||
if self.MEMORY_ID in operand:
|
||||
return self.process_memory_address(operand[self.MEMORY_ID])
|
||||
return [self.process_memory_address(operand[self.MEMORY_ID])]
|
||||
# structure register lists
|
||||
if self.REGISTER_ID in operand and (
|
||||
"list" in operand[self.REGISTER_ID] or "range" in operand[self.REGISTER_ID]
|
||||
):
|
||||
# TODO: discuss if ranges should be converted to lists
|
||||
return self.process_register_list(operand[self.REGISTER_ID])
|
||||
# resolve ranges and lists
|
||||
return self.resolve_range_list(self.process_register_list(operand[self.REGISTER_ID]))
|
||||
if self.REGISTER_ID in operand and operand[self.REGISTER_ID]["name"] == "sp":
|
||||
return self.process_sp_register(operand[self.REGISTER_ID])
|
||||
return [self.process_sp_register(operand[self.REGISTER_ID])]
|
||||
# add value attribute to floating point immediates without exponent
|
||||
if self.IMMEDIATE_ID in operand:
|
||||
return self.process_immediate(operand[self.IMMEDIATE_ID])
|
||||
return [self.process_immediate(operand[self.IMMEDIATE_ID])]
|
||||
if self.LABEL_ID in operand:
|
||||
return self.process_label(operand[self.LABEL_ID])
|
||||
return [self.process_label(operand[self.LABEL_ID])]
|
||||
if self.IDENTIFIER_ID in operand:
|
||||
return self.process_identifier(operand[self.IDENTIFIER_ID])
|
||||
return operand
|
||||
return [self.process_identifier(operand[self.IDENTIFIER_ID])]
|
||||
return [operand]
|
||||
|
||||
def process_memory_address(self, memory_address):
|
||||
"""Post-process memory address operand"""
|
||||
@@ -391,6 +389,36 @@ class ParserAArch64(BaseParser):
|
||||
reg["prefix"] = "x"
|
||||
return AttrDict({self.REGISTER_ID: reg})
|
||||
|
||||
def resolve_range_list(self, operand):
|
||||
"""
|
||||
Resolve range or list register operand to list of registers.
|
||||
|
||||
Returns None if neither list nor range
|
||||
"""
|
||||
if 'register' in operand:
|
||||
if 'list' in operand.register:
|
||||
index = operand.register.get('index')
|
||||
l = []
|
||||
for reg in operand.register.list:
|
||||
reg = deepcopy(reg)
|
||||
if index is not None:
|
||||
reg.index = index
|
||||
l.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return l
|
||||
elif 'range' in operand.register:
|
||||
base_register = operand.register.range[0]
|
||||
index = operand.register.get('index')
|
||||
l = []
|
||||
start_name = base_register.name
|
||||
end_name = operand.register.range[1].name
|
||||
for name in range(int(start_name), int(end_name)+1):
|
||||
reg = deepcopy(base_register)
|
||||
if index is not None:
|
||||
reg['index'] = operand.register.range.index
|
||||
reg['name'] = str(name)
|
||||
l.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return l
|
||||
|
||||
def process_register_list(self, register_list):
|
||||
"""Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})"""
|
||||
# Remove unnecessarily created dictionary entries during parsing
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
from itertools import chain
|
||||
from copy import deepcopy
|
||||
|
||||
from osaca import utils
|
||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||
@@ -122,6 +123,7 @@ class ISASemantics(object):
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed})
|
||||
)
|
||||
|
||||
# store operand list in dict and reassign operand key/value pair
|
||||
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
|
||||
# assign LD/ST flags
|
||||
@@ -130,6 +132,7 @@ class ISASemantics(object):
|
||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
|
||||
if self._has_store(instruction_form):
|
||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
|
||||
|
||||
|
||||
def get_reg_changes(self, instruction_form, only_postindexed=False):
|
||||
"""
|
||||
|
||||
@@ -34,6 +34,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.code_aarch64_memdep = f.read()
|
||||
with open(cls._find_file("kernel_aarch64.s")) as f:
|
||||
cls.code_AArch64 = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
|
||||
cls.code_AArch64_SVE = f.read()
|
||||
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
|
||||
cls.kernel_x86_memdep = reduce_to_section(
|
||||
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
|
||||
@@ -41,6 +43,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
|
||||
cls.kernel_aarch64_memdep = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
|
||||
cls.kernel_aarch64_SVE = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64")
|
||||
|
||||
# set up machine models
|
||||
cls.machine_model_csx = MachineModel(
|
||||
@@ -49,6 +53,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.machine_model_tx2 = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
|
||||
)
|
||||
cls.machine_model_a64fx = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")
|
||||
)
|
||||
cls.semantics_x86 = ISASemantics("x86")
|
||||
cls.semantics_csx = ArchSemantics(
|
||||
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
|
||||
@@ -58,6 +65,10 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.machine_model_tx2,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
|
||||
)
|
||||
cls.semantics_a64fx = ArchSemantics(
|
||||
cls.machine_model_a64fx,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
|
||||
)
|
||||
cls.machine_model_zen = MachineModel(arch="zen1")
|
||||
|
||||
for i in range(len(cls.kernel_x86)):
|
||||
@@ -72,6 +83,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
for i in range(len(cls.kernel_aarch64_memdep)):
|
||||
cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
|
||||
cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
|
||||
for i in range(len(cls.kernel_aarch64_SVE)):
|
||||
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
|
||||
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
|
||||
|
||||
###########
|
||||
# Tests
|
||||
@@ -320,6 +334,11 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg.get_dependent_instruction_forms()
|
||||
# test dot creation
|
||||
dg.export_graph(filepath="/dev/null")
|
||||
|
||||
def test_kernelDG_SVE(self):
|
||||
dg = KernelDG(self.kernel_aarch64_SVE, self.parser_AArch64, self.machine_model_a64fx,
|
||||
self.semantics_a64fx)
|
||||
# TODO check for correct analysis
|
||||
|
||||
def test_hidden_load(self):
|
||||
machine_model_hld = MachineModel(
|
||||
@@ -421,6 +440,7 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
|
||||
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
|
||||
|
||||
|
||||
def test_is_read_is_written_AArch64(self):
|
||||
# independent form HW model
|
||||
|
||||
@@ -232,7 +232,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -284,8 +284,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -296,8 +298,672 @@
|
||||
"ZEN has 156 tests, compiled to 126 unique assembly representations.\n",
|
||||
"ZEN2 has 156 tests, compiled to 126 unique assembly representations.\n",
|
||||
"TX2 has 104 tests, compiled to 78 unique assembly representations.\n",
|
||||
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n"
|
||||
"A64FX has 104 tests, compiled to 81 unique assembly representations.\n",
|
||||
"High-level iterations in assembly block: 16\n",
|
||||
"Measured: 1.1903856655856655\n",
|
||||
"IACA Predicted: 1.96875 TP: 1.875 LCD: None CP: None\n",
|
||||
"Ithemal Predicted: nan TP: None LCD: None CP: None\n",
|
||||
"LLVM-MCA Predicted: 2.240625 TP: 1.948125 LCD: 2.240625 CP: 3.8125\n",
|
||||
"OSACA Predicted: 1.875 TP: 1.875 LCD: 0.5 CP: 2.75\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Open Source Architecture Code Analyzer (OSACA) - 0.3.14\n",
|
||||
"Analyzed file: build/SKX/icc/O3/pi.marked.s\n",
|
||||
"Architecture: SKX\n",
|
||||
"Timestamp: 2021-04-15 12:15:40\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction\n",
|
||||
" * - Instruction micro-ops not bound to a port\n",
|
||||
" X - No throughput/latency information for this instruction in data file\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Combined Analysis Report\n",
|
||||
"------------------------\n",
|
||||
" Port pressure in cycles \n",
|
||||
" | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |\n",
|
||||
"-------------------------------------------------------------------------------------------------\n",
|
||||
" 62 | | | | | | | | || | | # pointer_increment=128 fa3c665ee18e1e5f704c8a6026891c36\n",
|
||||
" 63 | | | | | | | | || | | ..B1.4: # Preds ..B1.4 ..B1.3\n",
|
||||
" 64 | | | | | | | | || | | # Execution count [5.00e+00]\n",
|
||||
" 65 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $32, %ecx #16.5\n",
|
||||
" 66 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm5, %ymm9, %ymm14 #17.9\n",
|
||||
" 67 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm9, %zmm8 #17.14\n",
|
||||
" 68 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm8, %zmm1, %zmm10 #17.18\n",
|
||||
" 69 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm2, %zmm11 #17.25\n",
|
||||
" 70 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm11, %zmm11 #18.38\n",
|
||||
" 71 | | | | | | | | || | | * vmovaps %zmm0, %zmm29 #18.38\n",
|
||||
" 72 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm11, %zmm13 #18.38\n",
|
||||
" 73 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11 #18.38\n",
|
||||
" 74 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm13, %k0 #18.38\n",
|
||||
" 75 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm11, %zmm11, %zmm12 #18.38\n",
|
||||
" 76 | 1.00 | | | | | | | || | | knotw %k0, %k1 #18.38\n",
|
||||
" 77 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm11, %zmm13{%k1} #18.38\n",
|
||||
" 78 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm13, %zmm12, %zmm13{%k1} #18.38\n",
|
||||
" 79 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38\n",
|
||||
" 80 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm14, %ymm20 #17.9\n",
|
||||
" 81 | 0.50 | | | | | 1.50 | | || 7.0 | | vcvtdq2pd %ymm14, %zmm15 #17.14\n",
|
||||
" 82 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddpd %zmm15, %zmm1, %zmm16 #17.18\n",
|
||||
" 83 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm16, %zmm2, %zmm17 #17.25\n",
|
||||
" 84 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm0, %zmm17, %zmm17 #18.38\n",
|
||||
" 85 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm17, %zmm19 #18.38\n",
|
||||
" 86 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17 #18.38\n",
|
||||
" 87 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm19, %k2 #18.38\n",
|
||||
" 88 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm17, %zmm18 #18.38\n",
|
||||
" 89 | 1.00 | | | | | | | || | | knotw %k2, %k3 #18.38\n",
|
||||
" 90 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm19, %zmm17, %zmm19{%k3} #18.38\n",
|
||||
" 91 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm19, %zmm18, %zmm19{%k3} #18.38\n",
|
||||
" 92 | 0.50 | | | | | 0.50 | | || 4.0 | 4.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38\n",
|
||||
" 93 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm20, %ymm26 #17.9\n",
|
||||
" 94 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm20, %zmm21 #17.14\n",
|
||||
" 95 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm21, %zmm1, %zmm22 #17.18\n",
|
||||
" 96 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm22, %zmm2, %zmm23 #17.25\n",
|
||||
" 97 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm0, %zmm23, %zmm23 #18.38\n",
|
||||
" 98 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm23, %zmm25 #18.38\n",
|
||||
" 99 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23 #18.38\n",
|
||||
" 100 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm25, %k4 #18.38\n",
|
||||
" 101 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm23, %zmm23, %zmm24 #18.38\n",
|
||||
" 102 | 1.00 | | | | | | | || | | knotw %k4, %k5 #18.38\n",
|
||||
" 103 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm23, %zmm25{%k5} #18.38\n",
|
||||
" 104 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm25, %zmm24, %zmm25{%k5} #18.38\n",
|
||||
" 105 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm4, %zmm25, %zmm6 #18.38\n",
|
||||
" 106 | 0.50 | | | | | 1.50 | | || | | vcvtdq2pd %ymm26, %zmm27 #17.14\n",
|
||||
" 107 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm5, %ymm26, %ymm9 #17.9\n",
|
||||
" 108 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm27, %zmm1, %zmm28 #17.18\n",
|
||||
" 109 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm28, %zmm2, %zmm8 #17.25\n",
|
||||
" 110 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm8, %zmm8, %zmm29 #18.38\n",
|
||||
" 111 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm29, %zmm31 #18.38\n",
|
||||
" 112 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vfnmadd213pd .L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29 #18.38\n",
|
||||
" 113 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm31, %k6 #18.38\n",
|
||||
" 114 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm29, %zmm29, %zmm30 #18.38\n",
|
||||
" 115 | 1.00 | | | | | | | || | | knotw %k6, %k7 #18.38\n",
|
||||
" 116 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm29, %zmm31{%k7} #18.38\n",
|
||||
" 117 | 0.00 | | | | | 1.00 | | || | | vfmadd213pd %zmm31, %zmm30, %zmm31{%k7} #18.38\n",
|
||||
" 118 | 0.00 | | | | | 1.00 | | || 0.0 | 4.0 | vfmadd231pd %zmm4, %zmm31, %zmm3 #18.38\n",
|
||||
" 119 | 0.00 | 0.34 | | | | 0.00 | 0.66 | || | | cmpl %edx, %ecx #16.5\n",
|
||||
" 120 | 0.00 | | | | | | 1.00 | || | | jb ..B1.4 # Prob 82% #16.5\n",
|
||||
"\n",
|
||||
" 30.0 4.34 2.00 2.00 2.00 2.00 30.0 2.66 44 8.0 \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Loop-Carried Dependencies Analysis Report\n",
|
||||
"-----------------------------------------\n",
|
||||
" 92 | 8.0 | vfmadd231pd %zmm4, %zmm19, %zmm3 #18.38| [92, 118]\n",
|
||||
" 79 | 8.0 | vfmadd231pd %zmm4, %zmm13, %zmm6 #18.38| [79, 105]\n",
|
||||
" 66 | 4.0 | vpaddd %ymm5, %ymm9, %ymm14 #17.9| [66, 80, 93, 107]\n",
|
||||
" 65 | 1.0 | addl $32, %ecx #16.5| [65]\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Iterations: 100\n",
|
||||
"Instructions: 5600\n",
|
||||
"Total Cycles: 3585\n",
|
||||
"Total uOps: 7200\n",
|
||||
"\n",
|
||||
"Dispatch Width: 6\n",
|
||||
"uOps Per Cycle: 2.01\n",
|
||||
"IPC: 1.56\n",
|
||||
"Block RThroughput: 18.0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Instruction Info:\n",
|
||||
"[1]: #uOps\n",
|
||||
"[2]: Latency\n",
|
||||
"[3]: RThroughput\n",
|
||||
"[4]: MayLoad\n",
|
||||
"[5]: MayStore\n",
|
||||
"[6]: HasSideEffects (U)\n",
|
||||
"\n",
|
||||
"[1] [2] [3] [4] [5] [6] Instructions:\n",
|
||||
" 1 1 0.25 addl\t$32, %ecx\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
" 1 1 0.33 vmovaps\t%zmm0, %zmm29\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm11, %zmm13\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
" 1 1 1.00 knotw\t%k0, %k1\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm17, %zmm19\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
" 1 1 1.00 knotw\t%k2, %k3\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm23, %zmm25\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
" 1 1 1.00 knotw\t%k4, %k5\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
" 2 7 1.00 vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
" 1 1 0.33 vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
" 1 4 0.50 vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
" 3 4 2.00 vrcp14pd\t%zmm29, %zmm31\n",
|
||||
" 2 11 0.50 * vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
" 1 4 1.00 vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
" 1 4 0.50 vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
" 1 1 1.00 knotw\t%k6, %k7\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
" 1 4 0.50 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
" 1 4 0.50 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
" 1 1 0.25 cmpl\t%edx, %ecx\n",
|
||||
" 1 1 0.50 jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
"[0] - SKXDivider\n",
|
||||
"[1] - SKXFPDivider\n",
|
||||
"[2] - SKXPort0\n",
|
||||
"[3] - SKXPort1\n",
|
||||
"[4] - SKXPort2\n",
|
||||
"[5] - SKXPort3\n",
|
||||
"[6] - SKXPort4\n",
|
||||
"[7] - SKXPort5\n",
|
||||
"[8] - SKXPort6\n",
|
||||
"[9] - SKXPort7\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Resource pressure per iteration:\n",
|
||||
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] \n",
|
||||
" - - 31.17 5.72 2.00 2.00 - 29.10 2.01 - \n",
|
||||
"\n",
|
||||
"Resource pressure by instruction:\n",
|
||||
"[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:\n",
|
||||
" - - - 0.80 - - - 0.19 0.01 - addl\t$32, %ecx\n",
|
||||
" - - 0.07 0.92 - - - 0.01 - - vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
" - - 0.42 - - - - 0.58 - - vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
" - - 0.51 - - - - 0.49 - - vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
" - - 0.45 - - - - 0.55 - - vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
" - - - 1.00 - - - - - - vmovaps\t%zmm0, %zmm29\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm11, %zmm13\n",
|
||||
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
" - - 0.49 - - - - 0.51 - - vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k0, %k1\n",
|
||||
" - - 0.44 - - - - 0.56 - - vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
" - - 0.70 - - - - 0.30 - - vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
" - - 0.48 - - - - 0.52 - - vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
" - - 0.42 - - - - 0.58 - - vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
" - - 0.32 - - - - 0.68 - - vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm17, %zmm19\n",
|
||||
" - - 0.32 - 1.00 - - 0.68 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k2, %k3\n",
|
||||
" - - 0.53 - - - - 0.47 - - vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
" - - 0.57 - - - - 0.43 - - vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
" - - 0.52 - - - - 0.48 - - vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm23, %zmm25\n",
|
||||
" - - 0.40 - - 1.00 - 0.60 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
" - - 0.53 - - - - 0.47 - - vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k4, %k5\n",
|
||||
" - - 0.42 - - - - 0.58 - - vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
" - - 0.54 - - - - 0.46 - - vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
" - - 0.60 - - - - 0.40 - - vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
" - - 1.00 - - - - 1.00 - - vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
" - - - 1.00 - - - - - - vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
" - - 0.26 - - - - 0.74 - - vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
" - - 0.47 - - - - 0.53 - - vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
" - - 0.34 - - - - 0.66 - - vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
" - - 2.00 - - - - 1.00 - - vrcp14pd\t%zmm29, %zmm31\n",
|
||||
" - - 0.34 - 1.00 - - 0.66 - - vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
" - - - - - - - 1.00 - - vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
" - - 0.52 - - - - 0.48 - - vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
" - - 1.00 - - - - - - - knotw\t%k6, %k7\n",
|
||||
" - - 0.47 - - - - 0.53 - - vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
" - - 0.48 - - - - 0.52 - - vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
" - - 0.66 - - - - 0.34 - - vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
" - - - - - - - - 1.00 - cmpl\t%edx, %ecx\n",
|
||||
" - - - - - - - - 1.00 - jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Timeline view:\n",
|
||||
" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 \n",
|
||||
"Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678\n",
|
||||
"\n",
|
||||
"[0,0] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[0,1] DeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[0,2] D=eeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[0,3] D========eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[0,4] D============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[0,5] .D===============eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[0,6] .DeE------------------R . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[0,7] .D===================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[0,8] . D======================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[0,9] . D======================eeeeE-------R . . . . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[0,10] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[0,11] . D==========================eE----------R . . . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[0,12] . D=================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[0,13] . D====================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[0,14] . D========================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[0,15] . DeE-------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[0,16] . DeeeeeeeE-------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[0,17] . D=======eeeeE---------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[0,18] . D==========eeeeE-----------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[0,19] . D==============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[0,20] . D==================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[0,21] . D=====================eeeeeeeeeeeE----------R. . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[0,22] . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[0,23] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[0,24] . D==========================eE---------------R. . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[0,25] . D================================eeeeE------R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[0,26] . .D===================================eeeeE--R. . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[0,27] . .D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[0,28] . .DeE------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[0,29] . .DeeeeeeeE------------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[0,30] . .D=======eeeeE--------------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[0,31] . . D==========eeeeE----------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[0,32] . . D==============eeeeE------------------------R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[0,33] . . D==================eeeeE--------------------R . . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[0,34] . . D=====================eeeeeeeeeeeE---------R . . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[0,35] . . D=====================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[0,36] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[0,37] . . D==========================eE--------------R . . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[0,38] . . D================================eeeeE-----R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[0,39] . . D===================================eeeeE-R . . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[0,40] . . D=======================================eeeeER. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[0,41] . . DeeeeeeeE------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[0,42] . . DeE------------------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[0,43] . . D=======eeeeE--------------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[0,44] . . D=============eeeeE-------------------------R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[0,45] . . D=================eeeeE---------------------R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[0,46] . . D======================eeeeE----------------R. . . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[0,47] . . .D=========================eeeeeeeeeeeE-----R. . . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[0,48] . . .D=========================eeeeE------------R. . . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[0,49] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[0,50] . . .D==============================eE----------R. . . . . . . . . . . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[0,51] . . .D====================================eeeeE-R. . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[0,52] . . . D=======================================eeeeER . . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[0,53] . . . D===========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[0,54] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[0,55] . . . D=eE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . jb\t..B1.4\n",
|
||||
"[1,0] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[1,1] . . . DeE----------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[1,2] . . . D==eeeeeeeE-------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[1,3] . . . D===============eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[1,4] . . . D====================eeeeE----------------------R . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[1,5] . . . D=========================eeeeE-----------------R . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[1,6] . . . DeE---------------------------------------------R . . . . . . . . . . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[1,7] . . . D============================eeeeE-------------R . . . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[1,8] . . . D================================eeeeeeeeeeeE--R . . . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[1,9] . . . D================================eeeeE---------R . . . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[1,10] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[1,11] . . . D====================================eE---------R . . . . . . . . . . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[1,12] . . . D==========================================eeeeER . . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[1,13] . . . D==============================================eeeeER . . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[1,14] . . . D==================================================eeeeER . . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[1,15] . . . DeE-----------------------------------------------------R . . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[1,16] . . . .D===eeeeeeeE-------------------------------------------R . . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[1,17] . . . .D==============eeeeE-----------------------------------R . . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[1,18] . . . .D==================eeeeE-------------------------------R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[1,19] . . . .D======================eeeeE---------------------------R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[1,20] . . . . D================================eeeeE----------------R . . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[1,21] . . . . D====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[1,22] . . . . D=====================================eeeeE-----------R . . . . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[1,23] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[1,24] . . . . D========================================eE----------R . . . . . . . . . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[1,25] . . . . D==============================================eeeeE-R . . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[1,26] . . . . D==================================================eeeeER. . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[1,27] . . . . D======================================================eeeeER . . . . . . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[1,28] . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[1,29] . . . . D=================================eeeeeeeE-----------------R . . . . . . . . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[1,30] . . . . D========================================eeeeE-------------R . . . . . . . . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[1,31] . . . . D===========================================eeeeE---------R . . . . . . . . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[1,32] . . . . .D==============================================eeeeE-----R . . . . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[1,33] . . . . . D=================================================eeeeE-R . . . . . . . . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[1,34] . . . . . D====================================================eeeeeeeeeeeER . . . . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[1,35] . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[1,36] . . . . . D=============================================================eeeeER . . . . . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[1,37] . . . . . D======================================================eE----------R . . . . . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[1,38] . . . . . .D============================================================eeeeER . . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[1,39] . . . . . . D===============================================================eeeeER . . . . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[1,40] . . . . . . D==================================================================eeeeER . . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[1,41] . . . . . . D============================eeeeeeeE-----------------------------------R . . . . . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[1,42] . . . . . . DeE--------------------------------------------------------------------R . . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[1,43] . . . . . . D==================================eeeeE-------------------------------R . . . . . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[1,44] . . . . . . D=====================================eeeeE---------------------------R . . . . . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[1,45] . . . . . . D===========================================eeeeE---------------------R . . . . . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[1,46] . . . . . . D===============================================eeeeE-----------------R . . . . . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[1,47] . . . . . . .D==================================================eeeeeeeeeeeE------R . . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[1,48] . . . . . . . D=================================================eeeeE-------------R . . . . . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[1,49] . . . . . . . D===========================================================eeeeE--R . . . . . . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[1,50] . . . . . . . D=====================================================eE----------R . . . . . . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[1,51] . . . . . . . D==========================================================eeeeE-R . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[1,52] . . . . . . . D==============================================================eeeeER . . . . . . . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[1,53] . . . . . . . .D=================================================================eeeeER . . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[1,54] . . . . . . . .DeE--------------------------------------------------------------------R . . . . . . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[1,55] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . jb\t..B1.4\n",
|
||||
"[2,0] . . . . . . . . DeE-------------------------------------------------------------------R . . . . . . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[2,1] . . . . . . . . D=eE------------------------------------------------------------------R . . . . . . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[2,2] . . . . . . . . D======================eeeeeeeE--------------------------------------R . . . . . . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[2,3] . . . . . . . . D==============================eeeeE---------------------------------R . . . . . . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[2,4] . . . . . . . . D===================================eeeeE----------------------------R . . . . . . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[2,5] . . . . . . . . D========================================eeeeE-----------------------R . . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[2,6] . . . . . . . . DeE-----------------------------------------------------------------R . . . . . . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[2,7] . . . . . . . . D===========================================eeeeE-------------------R . . . . . . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[2,8] . . . . . . . . D================================================eeeeeeeeeeeE-------R . . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[2,9] . . . . . . . . D================================================eeeeE-------------R . . . . . . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[2,10] . . . . . . . . D==========================================================eeeeE---R . . . . . . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[2,11] . . . . . . . . .D======================================================eE---------R . . . . . . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[2,12] . . . . . . . . .D=========================================================eeeeE---R . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[2,13] . . . . . . . . . D============================================================eeeeER . . . . . . . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[2,14] . . . . . . . . . D================================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[2,15] . . . . . . . . . DeE------------------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[2,16] . . . . . . . . . D==================eeeeeeeE-----------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[2,17] . . . . . . . . . D=========================eeeeE-------------------------------------R . . . . . . . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[2,18] . . . . . . . . . D=============================eeeeE--------------------------------R . . . . . . . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[2,19] . . . . . . . . . .D=================================eeeeE---------------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[2,20] . . . . . . . . . . D=====================================eeeeE----------------------R . . . . . . . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[2,21] . . . . . . . . . . D=========================================eeeeeeeeeeeE-----------R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[2,22] . . . . . . . . . . D=========================================eeeeE-----------------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[2,23] . . . . . . . . . . D===================================================eeeeE-------R . . . . . . . . . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[2,24] . . . . . . . . . . D===============================================eE-------------R . . . . . . . . . . . . knotw\t%k2, %k3\n",
|
||||
"[2,25] . . . . . . . . . . D=================================================eeeeE-------R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[2,26] . . . . . . . . . . . D===================================================eeeeE---R . . . . . . . . . . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[2,27] . . . . . . . . . . . D=======================================================eeeeER . . . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[2,28] . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . . . . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[2,29] . . . . . . . . . . . D============eeeeeeeE--------------------------------------R . . . . . . . . . . . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[2,30] . . . . . . . . . . . D====================eeeeE---------------------------------R . . . . . . . . . . . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[2,31] . . . . . . . . . . . D=========================eeeeE---------------------------R . . . . . . . . . . . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[2,32] . . . . . . . . . . . .D=============================eeeeE----------------------R . . . . . . . . . . . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[2,33] . . . . . . . . . . . . D==================================eeeeE----------------R . . . . . . . . . . . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[2,34] . . . . . . . . . . . . D=====================================eeeeeeeeeeeE-----R . . . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[2,35] . . . . . . . . . . . . D======================================eeeeE-----------R . . . . . . . . . . . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[2,36] . . . . . . . . . . . . D===============================================eeeeE-R . . . . . . . . . . . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[2,37] . . . . . . . . . . . . D========================================eE----------R . . . . . . . . . . . . knotw\t%k4, %k5\n",
|
||||
"[2,38] . . . . . . . . . . . . .D==============================================eeeeER . . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[2,39] . . . . . . . . . . . . . D=================================================eeeeER . . . . . . . . . . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[2,40] . . . . . . . . . . . . . D====================================================eeeeER . . . . . . . . . . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[2,41] . . . . . . . . . . . . . D======eeeeeeeE------------------------------------------R . . . . . . . . . . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[2,42] . . . . . . . . . . . . . DeE------------------------------------------------------R . . . . . . . . . . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[2,43] . . . . . . . . . . . . . D===============eeeeE-----------------------------------R . . . . . . . . . . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[2,44] . . . . . . . . . . . . . D========================eeeeE--------------------------R . . . . . . . . . . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[2,45] . . . . . . . . . . . . . D============================eeeeE----------------------R . . . . . . . . . . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[2,46] . . . . . . . . . . . . . .D======================================eeeeE-----------R . . . . . . . . . . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[2,47] . . . . . . . . . . . . . . D=========================================eeeeeeeeeeeER . . . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[2,48] . . . . . . . . . . . . . . D=========================================eeeeE------R . . . . . . . . . . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[2,49] . . . . . . . . . . . . . . D===================================================eeeeER . . . . . . . . . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[2,50] . . . . . . . . . . . . . . D============================================eE---------R . . . . . . . . . knotw\t%k6, %k7\n",
|
||||
"[2,51] . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . . . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[2,52] . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . . . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[2,53] . . . . . . . . . . . . . . .D========================================================eeeeER . . . . . . . . vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[2,54] . . . . . . . . . . . . . . . DeE----------------------------------------------------------R . . . . . . . . cmpl\t%edx, %ecx\n",
|
||||
"[2,55] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . jb\t..B1.4\n",
|
||||
"[3,0] . . . . . . . . . . . . . . . DeE---------------------------------------------------------R . . . . . . . . addl\t$32, %ecx\n",
|
||||
"[3,1] . . . . . . . . . . . . . . . DeE--------------------------------------------------------R . . . . . . . . vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"[3,2] . . . . . . . . . . . . . . . D==eeeeeeeE------------------------------------------------R . . . . . . . . vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"[3,3] . . . . . . . . . . . . . . . D=========eeeeE--------------------------------------------R . . . . . . . . vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"[3,4] . . . . . . . . . . . . . . . D================eeeeE-------------------------------------R . . . . . . . . vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"[3,5] . . . . . . . . . . . . . . . D===================eeeeE---------------------------------R . . . . . . . . vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"[3,6] . . . . . . . . . . . . . . . DeE-------------------------------------------------------R . . . . . . . . vmovaps\t%zmm0, %zmm29\n",
|
||||
"[3,7] . . . . . . . . . . . . . . . D===================================eeeeE-----------------R . . . . . . . . vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"[3,8] . . . . . . . . . . . . . . . .D======================================eeeeeeeeeeeE------R . . . . . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"[3,9] . . . . . . . . . . . . . . . .D=======================================eeeeE------------R . . . . . . . . vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"[3,10] . . . . . . . . . . . . . . . .D=================================================eeeeE--R . . . . . . . . vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"[3,11] . . . . . . . . . . . . . . . . D===========================================eE----------R . . . . . . . . knotw\t%k0, %k1\n",
|
||||
"[3,12] . . . . . . . . . . . . . . . . D===============================================eeeeE--R . . . . . . . . vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"[3,13] . . . . . . . . . . . . . . . . D==================================================eeeeER . . . . . . . vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"[3,14] . . . . . . . . . . . . . . . . D=====================================================eeeeER. . . . . . . vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"[3,15] . . . . . . . . . . . . . . . . DeE--------------------------------------------------------R. . . . . . . vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"[3,16] . . . . . . . . . . . . . . . . .D===============================eeeeeeeE------------------R. . . . . . . vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"[3,17] . . . . . . . . . . . . . . . . .D=======================================eeeeE-------------R. . . . . . . vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"[3,18] . . . . . . . . . . . . . . . . .D===========================================eeeeE---------R. . . . . . . vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"[3,19] . . . . . . . . . . . . . . . . . D==============================================eeeeE-----R. . . . . . . vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"[3,20] . . . . . . . . . . . . . . . . . D==================================================eeeeE-R. . . . . . . vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"[3,21] . . . . . . . . . . . . . . . . . D=====================================================eeeeeeeeeeeER. . . . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"[3,22] . . . . . . . . . . . . . . . . . D=====================================================eeeeE------R. . . . . vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"[3,23] . . . . . . . . . . . . . . . . . D==============================================================eeeeER . . . . vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"[3,24] . . . . . . . . . . . . . . . . . .D=======================================================eE---------R . . . . knotw\t%k2, %k3\n",
|
||||
"[3,25] . . . . . . . . . . . . . . . . . . D============================================================eeeeER . . . . vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"[3,26] . . . . . . . . . . . . . . . . . . D================================================================eeeeER . . . vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"[3,27] . . . . . . . . . . . . . . . . . . D===================================================================eeeeER . . vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"[3,28] . . . . . . . . . . . . . . . . . . DeE----------------------------------------------------------------------R . . vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"[3,29] . . . . . . . . . . . . . . . . . . D===========================eeeeeeeE------------------------------------R . . vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"[3,30] . . . . . . . . . . . . . . . . . . D==================================eeeeE--------------------------------R . . vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"[3,31] . . . . . . . . . . . . . . . . . . D======================================eeeeE----------------------------R . . vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"[3,32] . . . . . . . . . . . . . . . . . . D=========================================eeeeE------------------------R . . vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"[3,33] . . . . . . . . . . . . . . . . . . D=============================================eeeeE--------------------R . . vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"[3,34] . . . . . . . . . . . . . . . . . . .D================================================eeeeeeeeeeeE---------R . . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"[3,35] . . . . . . . . . . . . . . . . . . .D=================================================eeeeE---------------R . . vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"[3,36] . . . . . . . . . . . . . . . . . . . D==========================================================eeeeE-----R . . vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"[3,37] . . . . . . . . . . . . . . . . . . . D====================================================eE-------------R . . knotw\t%k4, %k5\n",
|
||||
"[3,38] . . . . . . . . . . . . . . . . . . . D========================================================eeeeE-----R . . vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"[3,39] . . . . . . . . . . . . . . . . . . . D============================================================eeeeE-R . . vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"[3,40] . . . . . . . . . . . . . . . . . . . D===============================================================eeeeER. . vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"[3,41] . . . . . . . . . . . . . . . . . . . D======================eeeeeeeE--------------------------------------R. . vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"[3,42] . . . . . . . . . . . . . . . . . . . .DeE-----------------------------------------------------------------R. . vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"[3,43] . . . . . . . . . . . . . . . . . . . .D============================eeeeE----------------------------------R. . vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"[3,44] . . . . . . . . . . . . . . . . . . . . D===============================eeeeE------------------------------R. . vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"[3,45] . . . . . . . . . . . . . . . . . . . . D=====================================eeeeE------------------------R. . vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"[3,46] . . . . . . . . . . . . . . . . . . . . D=========================================eeeeE--------------------R. . vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"[3,47] . . . . . . . . . . . . . . . . . . . . D============================================eeeeeeeeeeeE---------R. . vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"[3,48] . . . . . . . . . . . . . . . . . . . . D===========================================eeeeE----------------R. . vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"[3,49] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE-----R. . vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"[3,50] . . . . . . . . . . . . . . . . . . . . D==============================================eE---------------R. . knotw\t%k6, %k7\n",
|
||||
"[3,51] . . . . . . . . . . . . . . . . . . . . D======================================================eeeeE----R. . vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"[3,52] . . . . . . . . . . . . . . . . . . . . .D=========================================================eeeeER. . vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"[3,53] . . . . . . . . . . . . . . . . . . . . . D============================================================eeeeER vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"[3,54] . . . . . . . . . . . . . . . . . . . . . DeE--------------------------------------------------------------R cmpl\t%edx, %ecx\n",
|
||||
"[3,55] . . . . . . . . . . . . . . . . . . . . . DeE-------------------------------------------------------------R jb\t..B1.4\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Average Wait times (based on the timeline view):\n",
|
||||
"[0]: Executions\n",
|
||||
"[1]: Average time spent waiting in a scheduler's queue\n",
|
||||
"[2]: Average time spent waiting in a scheduler's queue while ready\n",
|
||||
"[3]: Average time elapsed from WB until retire stage\n",
|
||||
"\n",
|
||||
" [0] [1] [2] [3]\n",
|
||||
"0. 4 1.0 1.0 42.5 addl\t$32, %ecx\n",
|
||||
"1. 4 1.3 1.3 42.0 vpaddd\t%ymm5, %ymm9, %ymm14\n",
|
||||
"2. 4 7.8 7.8 30.8 vcvtdq2pd\t%ymm9, %zmm8\n",
|
||||
"3. 4 16.5 1.8 26.0 vaddpd\t%zmm8, %zmm1, %zmm10\n",
|
||||
"4. 4 21.8 1.3 21.8 vmulpd\t%zmm10, %zmm2, %zmm11\n",
|
||||
"5. 4 25.8 0.5 18.3 vfmadd213pd\t%zmm0, %zmm11, %zmm11\n",
|
||||
"6. 4 1.0 1.0 45.8 vmovaps\t%zmm0, %zmm29\n",
|
||||
"7. 4 32.3 3.0 12.3 vrcp14pd\t%zmm11, %zmm13\n",
|
||||
"8. 4 36.0 0.3 3.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm13, %zmm11\n",
|
||||
"9. 4 36.3 0.8 10.3 vfpclasspd\t$30, %zmm13, %k0\n",
|
||||
"10. 4 46.5 0.0 1.3 vmulpd\t%zmm11, %zmm11, %zmm12\n",
|
||||
"11. 4 40.8 1.3 9.5 knotw\t%k0, %k1\n",
|
||||
"12. 4 45.8 0.0 1.3 vfmadd213pd\t%zmm13, %zmm11, %zmm13 {%k1}\n",
|
||||
"13. 4 49.0 0.0 0.0 vfmadd213pd\t%zmm13, %zmm12, %zmm13 {%k1}\n",
|
||||
"14. 4 52.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm13, %zmm6\n",
|
||||
"15. 4 1.0 1.0 54.5 vpaddd\t%ymm5, %ymm14, %ymm20\n",
|
||||
"16. 4 14.0 14.0 34.8 vcvtdq2pd\t%ymm14, %zmm15\n",
|
||||
"17. 4 22.3 1.3 29.5 vaddpd\t%zmm15, %zmm1, %zmm16\n",
|
||||
"18. 4 26.0 0.3 25.3 vmulpd\t%zmm16, %zmm2, %zmm17\n",
|
||||
"19. 4 29.8 0.3 21.0 vfmadd213pd\t%zmm0, %zmm17, %zmm17\n",
|
||||
"20. 4 35.3 2.0 15.0 vrcp14pd\t%zmm17, %zmm19\n",
|
||||
"21. 4 38.8 0.0 6.5 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm19, %zmm17\n",
|
||||
"22. 4 39.3 1.0 12.5 vfpclasspd\t$30, %zmm19, %k2\n",
|
||||
"23. 4 48.8 0.0 3.5 vmulpd\t%zmm17, %zmm17, %zmm18\n",
|
||||
"24. 4 43.0 0.8 11.8 knotw\t%k2, %k3\n",
|
||||
"25. 4 47.8 0.0 3.5 vfmadd213pd\t%zmm19, %zmm17, %zmm19 {%k3}\n",
|
||||
"26. 4 51.0 0.0 1.3 vfmadd213pd\t%zmm19, %zmm18, %zmm19 {%k3}\n",
|
||||
"27. 4 54.8 0.0 0.0 vfmadd231pd\t%zmm4, %zmm19, %zmm3\n",
|
||||
"28. 4 1.0 1.0 56.5 vpaddd\t%ymm5, %ymm20, %ymm26\n",
|
||||
"29. 4 19.0 19.0 31.8 vcvtdq2pd\t%ymm20, %zmm21\n",
|
||||
"30. 4 26.3 0.3 27.5 vaddpd\t%zmm21, %zmm1, %zmm22\n",
|
||||
"31. 4 30.0 0.5 23.0 vmulpd\t%zmm22, %zmm2, %zmm23\n",
|
||||
"32. 4 33.5 0.3 18.8 vfmadd213pd\t%zmm0, %zmm23, %zmm23\n",
|
||||
"33. 4 37.5 0.5 14.3 vrcp14pd\t%zmm23, %zmm25\n",
|
||||
"34. 4 40.5 0.0 5.8 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm25, %zmm23\n",
|
||||
"35. 4 40.8 0.5 12.3 vfpclasspd\t$30, %zmm25, %k4\n",
|
||||
"36. 4 50.5 0.0 2.8 vmulpd\t%zmm23, %zmm23, %zmm24\n",
|
||||
"37. 4 44.0 0.5 11.8 knotw\t%k4, %k5\n",
|
||||
"38. 4 49.5 0.3 2.5 vfmadd213pd\t%zmm25, %zmm23, %zmm25 {%k5}\n",
|
||||
"39. 4 52.8 0.0 0.5 vfmadd213pd\t%zmm25, %zmm24, %zmm25 {%k5}\n",
|
||||
"40. 4 56.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm25, %zmm6\n",
|
||||
"41. 4 15.0 15.0 37.8 vcvtdq2pd\t%ymm26, %zmm27\n",
|
||||
"42. 4 1.0 1.0 57.3 vpaddd\t%ymm5, %ymm26, %ymm9\n",
|
||||
"43. 4 22.0 0.8 33.0 vaddpd\t%zmm27, %zmm1, %zmm28\n",
|
||||
"44. 4 27.3 2.0 27.0 vmulpd\t%zmm28, %zmm2, %zmm8\n",
|
||||
"45. 4 32.3 1.0 22.0 vfmadd231pd\t%zmm8, %zmm8, %zmm29\n",
|
||||
"46. 4 38.0 2.0 16.0 vrcp14pd\t%zmm29, %zmm31\n",
|
||||
"47. 4 41.0 0.0 5.0 vfnmadd213pd\t.L_2il0floatpacket.6(%rip){1to8}, %zmm31, %zmm29\n",
|
||||
"48. 4 40.5 0.3 11.8 vfpclasspd\t$30, %zmm31, %k6\n",
|
||||
"49. 4 51.0 0.0 2.0 vmulpd\t%zmm29, %zmm29, %zmm30\n",
|
||||
"50. 4 44.3 0.8 11.0 knotw\t%k6, %k7\n",
|
||||
"51. 4 50.5 0.5 1.5 vfmadd213pd\t%zmm31, %zmm29, %zmm31 {%k7}\n",
|
||||
"52. 4 53.8 0.0 0.0 vfmadd213pd\t%zmm31, %zmm30, %zmm31 {%k7}\n",
|
||||
"53. 4 57.0 0.0 0.0 vfmadd231pd\t%zmm4, %zmm31, %zmm3\n",
|
||||
"54. 4 1.0 1.0 58.5 cmpl\t%edx, %ecx\n",
|
||||
"55. 4 1.3 0.0 57.5 jb\t..B1.4\n",
|
||||
" 4 32.5 1.6 18.4 <total>\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<pre style=\"white-space: pre !important;\">Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-30;16:57:45\n",
|
||||
"Analyzed File - build/SKX/icc/O3/pi.marked.o\n",
|
||||
"Binary Format - 64Bit\n",
|
||||
"Architecture - SKX\n",
|
||||
"Analysis Type - Throughput\n",
|
||||
"\n",
|
||||
"Throughput Analysis Report\n",
|
||||
"--------------------------\n",
|
||||
"Block Throughput: 31.50 Cycles Throughput Bottleneck: Backend\n",
|
||||
"Loop Count: 103\n",
|
||||
"Port Binding In Cycles Per Iteration:\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"| Cycles | 30.0 0.0 | 4.0 | 2.0 2.0 | 2.0 2.0 | 0.0 | 30.0 | 1.0 | 0.0 |\n",
|
||||
"--------------------------------------------------------------------------------------------------\n",
|
||||
"\n",
|
||||
"DV - Divider pipe (on port 0)\n",
|
||||
"D - Data fetch pipe (on ports 2 and 3)\n",
|
||||
"F - Macro Fusion with the previous instruction occurred\n",
|
||||
"* - instruction micro-ops not bound to a port\n",
|
||||
"^ - Micro Fusion occurred\n",
|
||||
"# - ESP Tracking sync uop was issued\n",
|
||||
"@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected\n",
|
||||
"X - instruction not supported, was not accounted in Analysis\n",
|
||||
"\n",
|
||||
"| Num Of | Ports pressure in cycles | |\n",
|
||||
"| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |\n",
|
||||
"-----------------------------------------------------------------------------------------\n",
|
||||
"| 1 | | | | | | | 1.0 | | add ecx, 0x20\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm14, ymm9, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm8, ymm9\n",
|
||||
"| 1 | | | | | | 1.0 | | | vaddpd zmm10, zmm1, zmm8\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm11, zmm2, zmm10\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm11, zmm11, zmm0\n",
|
||||
"| 1* | | | | | | | | | vmovaps zmm29, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm13, zmm11\n",
|
||||
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm11, zmm13, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k0, zmm13, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm11, zmm11\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k1, k0\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm13{k1}, zmm11, zmm13\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm13{k1}, zmm12, zmm13\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm13, zmm4\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm20, ymm14, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm15, ymm14\n",
|
||||
"| 1 | 1.0 | | | | | | | | vaddpd zmm16, zmm1, zmm15\n",
|
||||
"| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm2, zmm16\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm17, zmm17, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm19, zmm17\n",
|
||||
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm17, zmm19, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k2, zmm19, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm17, zmm17\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k3, k2\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm17, zmm19\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm19{k3}, zmm18, zmm19\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm4\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm26, ymm20, ymm5\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm21, ymm20\n",
|
||||
"| 1 | | | | | | 1.0 | | | vaddpd zmm22, zmm1, zmm21\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm23, zmm2, zmm22\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm23, zmm23, zmm0\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm25, zmm23\n",
|
||||
"| 2^ | | | 1.0 1.0 | | | 1.0 | | | vfnmadd213pd zmm23, zmm25, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k4, zmm25, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm24, zmm23, zmm23\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k5, k4\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm25{k5}, zmm23, zmm25\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd213pd zmm25{k5}, zmm24, zmm25\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6, zmm25, zmm4\n",
|
||||
"| 2 | 1.0 | | | | | 1.0 | | | vcvtdq2pd zmm27, ymm26\n",
|
||||
"| 1 | | 1.0 | | | | | | | vpaddd ymm9, ymm26, ymm5\n",
|
||||
"| 1 | 1.0 | | | | | | | | vaddpd zmm28, zmm1, zmm27\n",
|
||||
"| 1 | | | | | | 1.0 | | | vmulpd zmm8, zmm2, zmm28\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm29, zmm8, zmm8\n",
|
||||
"| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm31, zmm29\n",
|
||||
"| 2^ | | | | 1.0 1.0 | | 1.0 | | | vfnmadd213pd zmm29, zmm31, qword ptr [rip]{1to8}\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfpclasspd k6, zmm31, 0x1e\n",
|
||||
"| 1 | 1.0 | | | | | | | | vmulpd zmm30, zmm29, zmm29\n",
|
||||
"| 1 | 1.0 | | | | | | | | knotw k7, k6\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm29, zmm31\n",
|
||||
"| 1 | | | | | | 1.0 | | | vfmadd213pd zmm31{k7}, zmm30, zmm31\n",
|
||||
"| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm31, zmm4\n",
|
||||
"| 1* | | | | | | | | | cmp ecx, edx\n",
|
||||
"| 0*F | | | | | | | | | jb 0xfffffffffffffeb3\n",
|
||||
"Total Num Of Uops: 71\n",
|
||||
"Analysis Notes:\n",
|
||||
"Backend allocation was stalled due to unavailable allocation resources.\n",
|
||||
"</pre>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -307,7 +973,8 @@
|
||||
" for l in r['analyzed kernel']\n",
|
||||
" if l['instruction']]))\n",
|
||||
"for a in archs:\n",
|
||||
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')"
|
||||
" print(a, 'has', len(df[df.arch == a]), 'tests, compiled to', len(set(list(df[df.arch == a]['kernel_index']))), 'unique assembly representations.')\n",
|
||||
"get_info((\"SKX\", \"icc\", \"O3\", \"pi\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -343,7 +1010,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"hideCode": false,
|
||||
"hidePrompt": false,
|
||||
|
||||
Reference in New Issue
Block a user